[Python-checkins] cpython (merge 3.5 -> default): Issue #26881: The modulefinder module now supports extended opcode arguments.

serhiy.storchaka python-checkins at python.org
Sun May 8 17:02:31 EDT 2016


https://hg.python.org/cpython/rev/c27e3773d0f9
changeset:   101277:c27e3773d0f9
parent:      101275:e4835b1ed7b1
parent:      101276:3579cdaf56d9
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun May 08 23:44:54 2016 +0300
summary:
  Issue #26881: The modulefinder module now supports extended opcode arguments.

files:
  Lib/dis.py                    |  59 ++++++++++------------
  Lib/modulefinder.py           |  43 ++++++----------
  Lib/test/test_modulefinder.py |  13 +++++
  Misc/NEWS                     |   4 +
  4 files changed, 62 insertions(+), 57 deletions(-)


diff --git a/Lib/dis.py b/Lib/dis.py
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -284,31 +284,17 @@
 
     """
     labels = findlabels(code)
-    extended_arg = 0
     starts_line = None
     free = None
-    # enumerate() is not an option, since we sometimes process
-    # multiple elements on a single pass through the loop
-    n = len(code)
-    i = 0
-    while i < n:
-        op = code[i]
-        offset = i
+    for offset, op, arg in _unpack_opargs(code):
         if linestarts is not None:
-            starts_line = linestarts.get(i, None)
+            starts_line = linestarts.get(offset, None)
             if starts_line is not None:
                 starts_line += line_offset
-        is_jump_target = i in labels
-        i = i+1
-        arg = None
+        is_jump_target = offset in labels
         argval = None
         argrepr = ''
-        if op >= HAVE_ARGUMENT:
-            arg = code[i] + code[i+1]*256 + extended_arg
-            extended_arg = 0
-            i = i+2
-            if op == EXTENDED_ARG:
-                extended_arg = arg*65536
+        if arg is not None:
             #  Set argval to the dereferenced value of the argument when
             #  availabe, and argrepr to the string representation of argval.
             #    _disassemble_bytes needs the string repr of the
@@ -319,7 +305,7 @@
             elif op in hasname:
                 argval, argrepr = _get_name_info(arg, names)
             elif op in hasjrel:
-                argval = i + arg
+                argval = offset + 3 + arg
                 argrepr = "to " + repr(argval)
             elif op in haslocal:
                 argval, argrepr = _get_name_info(arg, varnames)
@@ -329,7 +315,7 @@
             elif op in hasfree:
                 argval, argrepr = _get_name_info(arg, cells)
             elif op in hasnargs:
-                argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
+                argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
         yield Instruction(opname[op], op,
                           arg, argval, argrepr,
                           offset, starts_line, is_jump_target)
@@ -365,6 +351,25 @@
 
 disco = disassemble                     # XXX For backwards compatibility
 
+def _unpack_opargs(code):
+    # enumerate() is not an option, since we sometimes process
+    # multiple elements on a single pass through the loop
+    extended_arg = 0
+    n = len(code)
+    i = 0
+    while i < n:
+        op = code[i]
+        offset = i
+        i = i+1
+        arg = None
+        if op >= HAVE_ARGUMENT:
+            arg = code[i] + code[i+1]*256 + extended_arg
+            extended_arg = 0
+            i = i+2
+            if op == EXTENDED_ARG:
+                extended_arg = arg*65536
+        yield (offset, op, arg)
+
 def findlabels(code):
     """Detect all offsets in a byte code which are jump targets.
 
@@ -372,19 +377,11 @@
 
     """
     labels = []
-    # enumerate() is not an option, since we sometimes process
-    # multiple elements on a single pass through the loop
-    n = len(code)
-    i = 0
-    while i < n:
-        op = code[i]
-        i = i+1
-        if op >= HAVE_ARGUMENT:
-            arg = code[i] + code[i+1]*256
-            i = i+2
+    for offset, op, arg in _unpack_opargs(code):
+        if arg is not None:
             label = -1
             if op in hasjrel:
-                label = i+arg
+                label = offset + 3 + arg
             elif op in hasjabs:
                 label = arg
             if label >= 0:
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
--- a/Lib/modulefinder.py
+++ b/Lib/modulefinder.py
@@ -13,13 +13,12 @@
     warnings.simplefilter('ignore', DeprecationWarning)
     import imp
 
-# XXX Clean up once str8's cstor matches bytes.
-LOAD_CONST = bytes([dis.opmap['LOAD_CONST']])
-IMPORT_NAME = bytes([dis.opmap['IMPORT_NAME']])
-STORE_NAME = bytes([dis.opmap['STORE_NAME']])
-STORE_GLOBAL = bytes([dis.opmap['STORE_GLOBAL']])
+LOAD_CONST = dis.opmap['LOAD_CONST']
+IMPORT_NAME = dis.opmap['IMPORT_NAME']
+STORE_NAME = dis.opmap['STORE_NAME']
+STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
 STORE_OPS = STORE_NAME, STORE_GLOBAL
-HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
+EXTENDED_ARG = dis.EXTENDED_ARG
 
 # Modulefinder does a good job at simulating Python's, but it can not
 # handle __path__ modifications packages make at runtime.  Therefore there
@@ -337,38 +336,30 @@
                         fullname = name + "." + sub
                         self._add_badmodule(fullname, caller)
 
-    def scan_opcodes_25(self, co,
-                     unpack = struct.unpack):
+    def scan_opcodes(self, co):
         # Scan the code, and yield 'interesting' opcode combinations
-        # Python 2.5 version (has absolute and relative imports)
         code = co.co_code
         names = co.co_names
         consts = co.co_consts
-        LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
-        while code:
-            c = bytes([code[0]])
-            if c in STORE_OPS:
-                oparg, = unpack('<H', code[1:3])
+        opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
+                  if op != EXTENDED_ARG]
+        for i, (op, oparg) in enumerate(opargs):
+            if op in STORE_OPS:
                 yield "store", (names[oparg],)
-                code = code[3:]
                 continue
-            if code[:9:3] == LOAD_LOAD_AND_IMPORT:
-                oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
-                level = consts[oparg_1]
+            if (op == IMPORT_NAME and i >= 2
+                    and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
+                level = consts[opargs[i-2][1]]
+                fromlist = consts[opargs[i-1][1]]
                 if level == 0: # absolute import
-                    yield "absolute_import", (consts[oparg_2], names[oparg_3])
+                    yield "absolute_import", (fromlist, names[oparg])
                 else: # relative import
-                    yield "relative_import", (level, consts[oparg_2], names[oparg_3])
-                code = code[9:]
+                    yield "relative_import", (level, fromlist, names[oparg])
                 continue
-            if c >= HAVE_ARGUMENT:
-                code = code[3:]
-            else:
-                code = code[1:]
 
     def scan_code(self, co, m):
         code = co.co_code
-        scanner = self.scan_opcodes_25
+        scanner = self.scan_opcodes
         for what, args in scanner(co):
             if what == "store":
                 name, = args
diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py
--- a/Lib/test/test_modulefinder.py
+++ b/Lib/test/test_modulefinder.py
@@ -319,6 +319,19 @@
         expected = "co_filename %r changed to %r" % (old_path, new_path)
         self.assertIn(expected, output)
 
+    def test_extended_opargs(self):
+        extended_opargs_test = [
+            "a",
+            ["a", "b"],
+            [], [],
+            """\
+a.py
+                                %r
+                                import b
+b.py
+""" % list(range(2**16))]  # 2**16 constants
+        self._do_test(extended_opargs_test)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -268,6 +268,8 @@
 Library
 -------
 
+- Issue #26881: The modulefinder module now supports extended opcode arguments.
+
 - Issue #23815: Fixed crashes related to directly created instances of types in
   _tkinter and curses.panel modules.
 
@@ -277,6 +279,8 @@
 - Issue #26873: xmlrpc now raises ResponseError on unsupported type tags
   instead of silently return incorrect result.
 
+- Issue #26881: modulefinder now works with bytecode with extended args.
+
 - Issue #26915:  The __contains__ methods in the collections ABCs now check
   for identity before checking equality.  This better matches the behavior
   of the concrete classes, allows sensible handling of NaNs, and makes it

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list