[pypy-commit] lang-smalltalk storage: Added binary storage-log output & parsing to shrink the logfile.

anton_gulenko noreply at buildbot.pypy.org
Mon Jul 7 13:16:25 CEST 2014


Author: Anton Gulenko <anton.gulenko at googlemail.com>
Branch: storage
Changeset: r858:1f520c5d82db
Date: 2014-07-01 16:23 +0200
http://bitbucket.org/pypy/lang-smalltalk/changeset/1f520c5d82db/

Log:	Added binary storage-log output & parsing to shrink the logfile.

diff --git a/spyvm/storage_logger.py b/spyvm/storage_logger.py
--- a/spyvm/storage_logger.py
+++ b/spyvm/storage_logger.py
@@ -1,16 +1,20 @@
 
-# Put flag in an object to make it modifyable after compile time.
-class LoggerActive(object):
+import sys
+
+# Put flags in an object to make it modifyable after compile time.
+class LoggerOptions(object):
     def __init__(self):
         self.active = False
+        self.binary = False
 
-_active = LoggerActive()
+_options = LoggerOptions()
 
-def activate():
-    _active.active = True
+def activate(binary = False):
+    _options.active = True
+    _options.binary = binary
 
 def log(w_obj, operation, old_storage_object=None, log_classname=True):
-    if not _active.active:
+    if not _options.active:
         return
     
     # Gather information to be logged
@@ -25,7 +29,61 @@
     else:
         classname = None
     
-    # Construct and print the logstring
+    if _options.binary:
+        binary_output(operation, old_storage, new_storage, classname, size)
+    else:
+        output(operation, old_storage, new_storage, classname, size)
+
+def output(operation, old_storage, new_storage, classname, size):
+    # Construct and print a simple logstring
     old_storage_string = "%s -> " % old_storage if old_storage else ""
     classname_string = " of %s" % classname if classname else ""
     print "%s (%s%s)%s size %d" % (operation, old_storage_string, new_storage, classname_string, size)
+    
+operation_map = {
+    "Filledin": 1,
+    "Initialized": 2,
+    "Switched": 3,
+}
+
+storage_map = {
+    "AllNilStorageShadow": 1,
+    "SmallIntegerOrNilStorageShadow": 2,
+    "FloatOrNilStorageShadow": 3,
+    "ListStorageShadow": 4,
+    "WeakListStorageShadow": 5,
+    "ClassShadow": 6,
+    "MethodDictionaryShadow": 7,
+    "BlockContextShadow": 8,
+    "MethodContextShadow": 9,
+    "CachedObjectShadow": 10,
+    "ObserveeShadow": 11,
+    None: 12,
+}
+
+def binary_output(operation, old_storage, new_storage, classname, size):
+    # Output a byte-coded log entry
+    bytes = bytearray()
+    
+    # First 3 bytes: operation, old_storage, new_storage
+    assert operation in operation_map, "Cannot handle operation %s" % operation
+    bytes.append(operation_map[operation])
+    assert old_storage in storage_map, "Cannot handle old-storage type %s" % old_storage
+    bytes.append(storage_map[old_storage])
+    assert new_storage in storage_map, "Cannot handle new-storage type %s" % new_storage
+    bytes.append(storage_map[new_storage])
+    
+    # Next: 2 bytes encoding object size (big endian)
+    assert size < 2**16, "Object of type %s too large (size %d)" % (classname, size)
+    mask = (1<<8)-1
+    bytes.append(size & mask)
+    mask = mask<<8
+    bytes.append((size & mask) >> 8)
+    
+    # Next: classname string plus terminating null-character
+    if classname:
+        for c in classname:
+            bytes.append(c)
+    bytes.append(0)
+    
+    sys.stdout.write(bytes)
diff --git a/spyvm/tool/storagelog_parser.py b/spyvm/tool/storagelog_parser.py
--- a/spyvm/tool/storagelog_parser.py
+++ b/spyvm/tool/storagelog_parser.py
@@ -1,24 +1,77 @@
 
 import re, sys, operator
+import spyvm.storage_logger
 
 OPERATIONS = ["Filledin", "Initialized", "Switched"]
 
+# Reverse the two maps used to encode the byte encoded log-output
+storage_map = {v:k for k, v in spyvm.storage_logger.storage_map.items()}
+operation_map = {v:k for k, v in spyvm.storage_logger.operation_map.items()}
+
 # ====================================================================
 # ======== Basic functions
 # ====================================================================
 
+def filesize(file):
+    import os
+    return os.path.getsize(file.name)
+
 def parse(filename, flags):
     entries = []
     with open(filename, 'r', 1) as file:
-        while True:
-            line = file.readline()
-            if len(line) == 0:
-                break
-            entry = parse_line(line, flags)
-            if entry:
-                entries.append(entry)
+        if flags.binary:
+            while True:
+                try:
+                    entry = parse_binary(file)
+                    if entry == None:
+                        if flags.verbose:
+                            tell = file.tell()
+                            format = (tell, len(entries), filesize(file) - tell)
+                            print "Stopped parsing after %d bytes (%d entries). Ignoring leftover %d bytes." % format
+                        break
+                    else:
+                        entries.append(entry)
+                except:
+                    print "Exception while parsing file, after %d bytes (%d entries)" % (file.tell(), len(entries))
+                    raise
+        else:
+            while True:
+                line = file.readline()
+                if len(line) == 0:
+                    break
+                entry = parse_line(line, flags)
+                if entry:
+                    entries.append(entry)
     return entries
 
+def parse_binary(file):
+    # First 3 bytes: operation, old storage, new storage
+    header = file.read(3)
+    operation_byte = ord(header[0])
+    old_storage_byte = ord(header[1])
+    new_storage_byte = ord(header[2])
+    # This is the only way to check if we are reading a correct log entry
+    if operation_byte not in operation_map or old_storage_byte not in storage_map or new_storage_byte not in storage_map:
+        return None
+    operation = operation_map[operation_byte]
+    old_storage = storage_map[old_storage_byte]
+    new_storage = storage_map[new_storage_byte]
+    
+    # Next 2 bytes: object size (big endian)
+    size_bytes = file.read(2)
+    size = int(ord(size_bytes[0]) + (ord(size_bytes[1])<<8))
+    
+    # Last: classname, nul-terminated
+    classname = ""
+    while True:
+        byte = file.read(1)
+        if byte == chr(0):
+            break
+        classname += byte
+    if len(classname) == 0:
+        classname = None
+    return LogEntry(operation, old_storage, new_storage, classname, size)
+
 line_pattern = re.compile("^(?P<operation>\w+) \(((?P<old>\w+) -> )?(?P<new>\w+)\)( of (?P<classname>.+))? size (?P<size>[0-9]+)$")
 
 def parse_line(line, flags):
@@ -32,25 +85,24 @@
     new_storage = result.group('new')
     classname = result.group('classname')
     size = result.group('size')
-    if old_storage is None:
-        if operation == "Filledin":
-            old_storage = " Image Loading Storage" # Space to be sorted to the beginning
-        elif operation == "Initialized":
-            old_storage = " Object Creation Storage"
-        else:
-            assert False, "old_storage has to be available in a Switched operation"
-    entry = LogEntry(operation, old_storage, new_storage, classname, size)
-    #entry.is_special = 
-    return entry
+    return LogEntry(operation, old_storage, new_storage, classname, size)
 
 class LogEntry(object):
     
     def __init__(self, operation, old_storage, new_storage, classname, size):
         self.operation = str(operation)
-        self.old_storage = str(old_storage)
         self.new_storage = str(new_storage)
         self.classname = str(classname)
         self.size = float(size)
+        
+        if old_storage is None:
+            if operation == "Filledin":
+                old_storage = " Image Loading Storage" # Space to be sorted to the beginning
+            elif operation == "Initialized":
+                old_storage = " Object Creation Storage"
+            else:
+                assert False, "old_storage has to be available in a Switched operation"
+        self.old_storage = str(old_storage)
     
     def full_key(self):
         return (self.operation, self.old_storage, self.new_storage)
@@ -471,7 +523,7 @@
 # ======== Main
 # ====================================================================
 
-def command_print_entries(entries):
+def command_print_entries(entries, flags):
     for e in entries:
         print e
 
@@ -506,6 +558,7 @@
         ('allstorage', '-a'),
         ('detailed', '-d'),
         ('classes', '-c'),
+        ('binary', '-b'),
     ])
     
     command_prefix = "command_"
diff --git a/targetimageloadingsmalltalk.py b/targetimageloadingsmalltalk.py
--- a/targetimageloadingsmalltalk.py
+++ b/targetimageloadingsmalltalk.py
@@ -131,6 +131,7 @@
           -ni|--no-interrupts
           -d|--max-stack-depth [number, default %d, <= 0 disables stack protection]
           -l|--storage-log
+          -lb|--storage-log-binary (output should be redirected to file)
           [image path, default: Squeak.image]
     """ % (argv[0], constants.MAX_LOOP_DEPTH)
 
@@ -197,6 +198,8 @@
             idx += 1
         elif arg in ["-l", "--storage-log"]:
             storage_logger.activate()
+        elif arg in ["-lb", "--storage-log-binary"]:
+            storage_logger.activate(binary=True)
         elif path is None:
             path = argv[idx]
         else:


More information about the pypy-commit mailing list