[pypy-svn] r45188 - in pypy/dist/pypy: config translator/c translator/c/test

arigo at codespeak.net arigo at codespeak.net
Wed Jul 18 17:16:48 CEST 2007


Author: arigo
Date: Wed Jul 18 17:16:47 2007
New Revision: 45188

Added:
   pypy/dist/pypy/translator/c/sandbox.py   (contents, props changed)
   pypy/dist/pypy/translator/c/sandboxmsg.py   (contents, props changed)
   pypy/dist/pypy/translator/c/test/test_sandbox.py   (contents, props changed)
Modified:
   pypy/dist/pypy/config/translationoption.py
   pypy/dist/pypy/translator/c/database.py
   pypy/dist/pypy/translator/c/genc.py
   pypy/dist/pypy/translator/c/node.py
Log:
Beginning of sandboxing support:

"""Generation of sandboxing stand-alone executable from RPython code.
In place of real calls to any external function, this code builds
trampolines that marshal their input arguments, dump them to STDOUT,
and wait for an answer on STDIN.  Enable with 'translate.py --sandbox'.
"""

So far, only external functions with int or zero-terminated char*
arguments are supported.  The others give a translation-time
NotImplementedError: at not point should a --sandbox translation give an
unsafe executable.



Modified: pypy/dist/pypy/config/translationoption.py
==============================================================================
--- pypy/dist/pypy/config/translationoption.py	(original)
+++ pypy/dist/pypy/config/translationoption.py	Wed Jul 18 17:16:47 2007
@@ -57,6 +57,8 @@
                default=0),
     BoolOption("countmallocs", "Count mallocs and frees", default=False,
                cmdline=None),
+    BoolOption("sandbox", "Produce a fully-sandboxed executable",
+               default=False, cmdline="--sandbox"),
 
     # misc
     StrOption("cc", "Specify compiler to use for compiling generated C", cmdline="--cc"),

Modified: pypy/dist/pypy/translator/c/database.py
==============================================================================
--- pypy/dist/pypy/translator/c/database.py	(original)
+++ pypy/dist/pypy/translator/c/database.py	Wed Jul 18 17:16:47 2007
@@ -25,9 +25,11 @@
     def __init__(self, translator=None, standalone=False,
                  gcpolicyclass=None,
                  stacklesstransformer=None,
-                 thread_enabled=False):
+                 thread_enabled=False,
+                 sandbox=False):
         self.translator = translator
         self.standalone = standalone
+        self.sandbox    = sandbox
         self.stacklesstransformer = stacklesstransformer
         if gcpolicyclass is None:
             gcpolicyclass = gc.RefcountingGcPolicy
@@ -354,3 +356,7 @@
         for node in self.structdefnodes.values():
             produce(node)
         return result
+
+    def need_sandboxing(self, fnobj):
+        return self.sandbox and (
+            not getattr(fnobj, '_safe_not_sandboxed', False))

Modified: pypy/dist/pypy/translator/c/genc.py
==============================================================================
--- pypy/dist/pypy/translator/c/genc.py	(original)
+++ pypy/dist/pypy/translator/c/genc.py	Wed Jul 18 17:16:47 2007
@@ -58,7 +58,8 @@
         db = LowLevelDatabase(translator, standalone=self.standalone,
                               gcpolicyclass=gcpolicyclass,
                               stacklesstransformer=stacklesstransformer,
-                              thread_enabled=self.config.translation.thread)
+                              thread_enabled=self.config.translation.thread,
+                              sandbox=self.config.translation.sandbox)
         # pass extra options into pyobjmaker
         if pyobj_options:
             for key, value in pyobj_options.items():

Modified: pypy/dist/pypy/translator/c/node.py
==============================================================================
--- pypy/dist/pypy/translator/c/node.py	(original)
+++ pypy/dist/pypy/translator/c/node.py	Wed Jul 18 17:16:47 2007
@@ -605,7 +605,7 @@
         self.db = db
         self.T = T
         self.obj = obj
-        if hasattr(obj, 'includes'):
+        if hasattr(obj, 'includes') and not db.need_sandboxing(obj):
             self.includes = obj.includes
             self.name = forcename or self.basename()
         else:
@@ -695,13 +695,16 @@
         funcgen.implementation_end()
 
 def select_function_code_generators(fnobj, db, functionname):
+    sandbox = db.need_sandboxing(fnobj)
     if hasattr(fnobj, '_external_name'):
+        assert not sandbox
         db.externalfuncs[fnobj._external_name] = fnobj
         return []
     elif fnobj._callable in extfunc.EXTERNALS:
         # 'fnobj' is one of the ll_xyz() functions with the suggested_primitive
         # flag in pypy.rpython.module.*.  The corresponding C wrappers are
         # written by hand in src/ll_*.h, and declared in extfunc.EXTERNALS.
+        assert not sandbox
         db.externalfuncs[fnobj._callable] = fnobj
         return []
     elif getattr(fnobj._callable, 'suggested_primitive', False):
@@ -712,10 +715,16 @@
         return [FunctionCodeGenerator(fnobj.graph, db, exception_policy,
                                       functionname)]
     elif getattr(fnobj, 'external', None) == 'C':
-        # deprecated case
         if hasattr(fnobj, 'includes'):
-            return []   # assume no wrapper needed
+            # apply the sandbox transformation
+            if sandbox:
+                from pypy.translator.c.sandbox import getSandboxFuncCodeGen
+                return [getSandboxFuncCodeGen(fnobj, db)]
+            else:
+                return []   # assume no wrapper needed
         else:
+            # deprecated case
+            assert not sandbox
             return [CExternalFunctionCodeGenerator(fnobj, db)]
     else:
         raise ValueError, "don't know how to generate code for %r" % (fnobj,)

Added: pypy/dist/pypy/translator/c/sandbox.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/c/sandbox.py	Wed Jul 18 17:16:47 2007
@@ -0,0 +1,139 @@
+"""Generation of sandboxing stand-alone executable from RPython code.
+In place of real calls to any external function, this code builds
+trampolines that marshal their input arguments, dump them to STDOUT,
+and wait for an answer on STDIN.  Enable with 'translate.py --sandbox'.
+"""
+from pypy.translator.c.sandboxmsg import MessageBuilder, LLMessage
+
+# ____________________________________________________________
+#
+# Sandboxing code generator for external functions
+#
+
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.annotation import model as annmodel
+from pypy.rlib.unroll import unrolling_iterable
+from pypy.translator.c import funcgen
+from pypy.tool.sourcetools import func_with_new_name
+from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
+
+def getSandboxFuncCodeGen(fnobj, db):
+    graph = get_external_function_sandbox_graph(fnobj, db)
+    return funcgen.FunctionCodeGenerator(graph, db)
+
+# a version of os.read() and os.write() that are not mangled
+# by the sandboxing mechanism
+ll_read_not_sandboxed = rffi.llexternal('read',
+                                        [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
+                                        rffi.SIZE_T)
+ll_read_not_sandboxed._obj._safe_not_sandboxed = True
+
+ll_write_not_sandboxed = rffi.llexternal('write',
+                                         [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
+                                         rffi.SIZE_T)
+ll_write_not_sandboxed._obj._safe_not_sandboxed = True
+
+def writeall_not_sandboxed(fd, buf, length):
+    while length > 0:
+        size = rffi.cast(rffi.SIZE_T, length)
+        count = rffi.cast(lltype.Signed, ll_write_not_sandboxed(fd, buf, size))
+        if count < 0:
+            raise IOError
+        length -= count
+        buf = lltype.direct_ptradd(lltype.direct_arrayitems(buf), count)
+        buf = rffi.cast(rffi.CCHARP, buf)
+
+def readall_not_sandboxed(fd, length):
+    buf = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+    p = buf
+    got = 0
+    while got < length:
+        size1 = rffi.cast(rffi.SIZE_T, length - got)
+        count = rffi.cast(lltype.Signed, ll_read_not_sandboxed(fd, p, size1))
+        if count < 0:
+            raise IOError
+        got += count
+        p = lltype.direct_ptradd(lltype.direct_arrayitems(p), count)
+        p = rffi.cast(rffi.CCHARP, p)
+    return buf
+
+def buf2num(buf, index=0):
+    c0 = ord(buf[index  ])
+    c1 = ord(buf[index+1])
+    c2 = ord(buf[index+2])
+    c3 = ord(buf[index+3])
+    if c0 >= 0x80:
+        c0 -= 0x100
+    return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
+
+
+def get_external_function_sandbox_graph(fnobj, db):
+    """Build the graph of a helper trampoline function to be used
+    in place of real calls to the external function 'fnobj'.  The
+    trampoline marshals its input arguments, dumps them to STDOUT,
+    and waits for an answer on STDIN.
+    """
+    # XXX for now, only supports function with int and string arguments
+    # and returning an int.
+    FUNCTYPE = lltype.typeOf(fnobj)
+    unroll_args = []
+    for i, ARG in enumerate(FUNCTYPE.ARGS):
+        if ARG == rffi.INT:       # 'int' argument
+            methodname = "packnum"
+        elif ARG == rffi.CCHARP:  # 'char*' argument, assumed zero-terminated
+            methodname = "packccharp"
+        else:
+            raise NotImplementedError("external function %r argument type %s" %
+                                      (fnobj, ARG))
+        unroll_args.append((i, methodname))
+    if FUNCTYPE.RESULT != rffi.INT:
+        raise NotImplementedError("exernal function %r return type %s" % (
+            fnobj, FUNCTYPE.RESULT))
+    unroll_args = unrolling_iterable(unroll_args)
+    fnname = fnobj._name
+
+    def execute(*args):
+        STDIN = 0
+        STDOUT = 1
+        assert len(args) == len(FUNCTYPE.ARGS)
+        # marshal the input arguments
+        msg = MessageBuilder()
+        msg.packstring(fnname)
+        for index, methodname in unroll_args:
+            getattr(msg, methodname)(args[index])
+        buf = msg.as_rffi_buf()
+        try:
+            writeall_not_sandboxed(STDOUT, buf, msg.getlength())
+        finally:
+            lltype.free(buf, flavor='raw')
+
+        # wait for the answer
+        buf = readall_not_sandboxed(STDIN, 4)
+        try:
+            length = buf2num(buf)
+        finally:
+            lltype.free(buf, flavor='raw')
+
+        length -= 4     # the original length includes the header
+        if length < 0:
+            raise IOError
+        buf = readall_not_sandboxed(STDIN, length)
+        try:
+            # decode the answer
+            msg = LLMessage(buf, 0, length)
+            errcode = msg.nextnum()
+            if errcode != 0:
+                raise IOError
+            result = msg.nextnum()
+        finally:
+            lltype.free(buf, flavor='raw')
+
+        return result
+    execute = func_with_new_name(execute, 'sandboxed_' + fnname)
+
+    ann = MixLevelHelperAnnotator(db.translator.rtyper)
+    args_s = [annmodel.lltype_to_annotation(ARG) for ARG in FUNCTYPE.ARGS]
+    s_result = annmodel.lltype_to_annotation(FUNCTYPE.RESULT)
+    graph = ann.getgraph(execute, args_s, s_result)
+    ann.finish()
+    return graph

Added: pypy/dist/pypy/translator/c/sandboxmsg.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/c/sandboxmsg.py	Wed Jul 18 17:16:47 2007
@@ -0,0 +1,150 @@
+import sys, os
+import struct
+import select
+
+from pypy.annotation import policy, model as annmodel
+
+# ____________________________________________________________
+#
+# Marshalling of external function calls' arguments
+#
+
+class MessageBuilder(object):
+    def __init__(self):
+        self.value = ['\xFF', '\xFF', '\xFF', '\xFF']
+
+    def packstring(self, s):
+        self.packnum(len(s), "s")
+        self.value += s
+        return self
+    packstring._annenforceargs_ = policy.Sig(None, str)
+
+    def packccharp(self, p):
+        length = 0
+        while p[length] != '\x00':
+            length += 1
+        self.packnum(length, "s")
+        for i in range(length):
+            self.value.append(p[i])
+        return self
+
+    def packnum(self, n, prefix="i"):
+        self.value.append(prefix)
+        self.value.append(chr((n >> 24) & 0xFF))
+        self.value.append(chr((n >> 16) & 0xFF))
+        self.value.append(chr((n >>  8) & 0xFF))
+        self.value.append(chr((n      ) & 0xFF))
+        return self
+    packnum._annenforceargs_ = policy.Sig(None, int, annmodel.SomeChar())
+
+    def _fixlength(self):
+        n = len(self.value)
+        self.value[0] = chr((n >> 24) & 0xFF)
+        self.value[1] = chr((n >> 16) & 0xFF)
+        self.value[2] = chr((n >>  8) & 0xFF)
+        self.value[3] = chr((n      ) & 0xFF)
+
+    def getvalue(self):
+        self._fixlength()
+        return ''.join(self.value)
+
+    def as_rffi_buf(self):
+        from pypy.rpython.lltypesystem import lltype, rffi
+        self._fixlength()
+        value = self.value
+        length = len(value)
+        array = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+        for i in range(length):
+            array[i] = value[i]
+        return array
+
+    def getlength(self):
+        return len(self.value)
+
+
+class LLMessage(object):
+    def __init__(self, value, start, stop):
+        self.value = value
+        self.pos = start
+        self.stop = stop
+
+    def _char(self):
+        i = self.pos
+        if i >= self.stop:
+            raise ValueError
+        self.pos = i + 1
+        return self.value[i]
+
+    def nextstring(self):
+        length = self.nextnum("s")
+        i = self.pos
+        self.pos = i + length
+        if self.pos > self.stop:
+            raise ValueError
+        # general version assuming that self.value is only indexable,
+        # not sliceable.  See also the Message subclass.
+        return ''.join([self.value[index] for index in range(i, self.pos)])
+
+    def nextnum(self, prefix="i"):
+        t = self._char()
+        if t != prefix:
+            raise ValueError
+        c0 = ord(self._char())
+        c1 = ord(self._char())
+        c2 = ord(self._char())
+        c3 = ord(self._char())
+        if c0 >= 0x80:
+            c0 -= 0x100
+        return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
+
+    def end(self):
+        return self.pos >= self.stop
+
+
+class Message(LLMessage):
+    "NOT_RPYTHON"
+    # 'value' is a regular string in this case,
+    # allowing a more reasonable implementation of nextstring()
+    def __init__(self, buf):
+        LLMessage.__init__(self, buf, start=0, stop=len(buf))
+
+    def nextstring(self):
+        length = self.nextnum("s")
+        i = self.pos
+        self.pos = i + length
+        if self.pos > self.stop:
+            raise ValueError
+        return self.value[i:self.pos]
+
+def timeout_read(f, size, timeout=None):
+    if size < 0:
+        raise ValueError("negative size")
+    if timeout is None:
+        return f.read(size)
+    else:
+        # XXX not Win32-compliant!
+        assert not sys.platform.startswith('win'), "XXX fix me"
+        # It also assumes that 'f' does no buffering!
+        fd = f.fileno()
+        result = ""
+        while len(result) < size:
+            iwtd, owtd, ewtd = select.select([fd], [], [], timeout)
+            if not iwtd:
+                raise Timeout("got %d bytes after %s seconds, expected %d" % (
+                    len(result), timeout, size))
+            buf = os.read(fd, size - len(result))
+            if not buf:
+                raise EOFError
+            result += buf
+        return result
+
+class Timeout(Exception):
+    pass
+
+def read_message(f, timeout=None):
+    """NOT_RPYTHON - Warning! 'timeout' only works if 'f' is opened
+    with no buffering at all!
+    """
+    msglength, = struct.unpack("!i", timeout_read(f, 4, timeout))
+    buf = timeout_read(f, msglength - 4, timeout)
+    return Message(buf)

Added: pypy/dist/pypy/translator/c/test/test_sandbox.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/c/test/test_sandbox.py	Wed Jul 18 17:16:47 2007
@@ -0,0 +1,67 @@
+import sys, os
+import struct
+
+from pypy.rpython.lltypesystem import rffi
+from pypy.translator.c.sandboxmsg import Message, MessageBuilder, read_message
+from pypy.translator.interactive import Translation
+
+
+def test_sandbox_message():
+    def num(n):
+        return struct.pack("!i", n)
+    msg = MessageBuilder()
+    msg.packstring("open")
+    msg.packccharp(rffi.str2charp("/tmp/foobar"))
+    msg.packnum(123)
+    res = msg.getvalue()
+    assert res == (num(len(res)) +
+                   "s" + num(4) + "open" +
+                   "s" + num(11) + "/tmp/foobar" +
+                   "i" + num(123))
+
+    msg = Message(res[4:])
+    m1 = msg.nextstring()
+    assert m1 == "open"
+    m2 = msg.nextstring()
+    assert m2 == "/tmp/foobar"
+    m3 = msg.nextnum()
+    assert m3 == 123
+
+def test_sandbox():
+    def entry_point(argv):
+        fd = os.open("/tmp/foobar", os.O_RDONLY, 0777)
+        assert fd == 77
+        fd2 = os.dup(fd)
+        assert fd2 == 78
+        return 0
+
+    t = Translation(entry_point, backend='c', standalone=True, sandbox=True)
+    exe = t.compile()
+    g, f = os.popen2(exe, "t", 0)
+
+    msg = read_message(f, timeout=10.0)
+    m1 = msg.nextstring()
+    assert m1 == "open"
+    m2 = msg.nextstring()
+    assert m2 == "/tmp/foobar"
+    m3 = msg.nextnum()
+    assert m3 == os.O_RDONLY
+    m4 = msg.nextnum()
+    assert m4 == 0777
+    assert msg.end()
+
+    g.write(MessageBuilder().packnum(0).packnum(77).getvalue())
+
+    msg = read_message(f, timeout=10.0)
+    m1 = msg.nextstring()
+    assert m1 == "dup"
+    m2 = msg.nextnum()
+    assert m2 == 77
+    assert msg.end()
+
+    g.write(MessageBuilder().packnum(0).packnum(78).getvalue())
+
+    g.close()
+    tail = f.read()
+    f.close()
+    assert tail == ""



More information about the Pypy-commit mailing list