[pypy-svn] r45188 - in pypy/dist/pypy: config translator/c translator/c/test
arigo at codespeak.net
arigo at codespeak.net
Wed Jul 18 17:16:48 CEST 2007
Author: arigo
Date: Wed Jul 18 17:16:47 2007
New Revision: 45188
Added:
pypy/dist/pypy/translator/c/sandbox.py (contents, props changed)
pypy/dist/pypy/translator/c/sandboxmsg.py (contents, props changed)
pypy/dist/pypy/translator/c/test/test_sandbox.py (contents, props changed)
Modified:
pypy/dist/pypy/config/translationoption.py
pypy/dist/pypy/translator/c/database.py
pypy/dist/pypy/translator/c/genc.py
pypy/dist/pypy/translator/c/node.py
Log:
Beginning of sandboxing support:
"""Generation of sandboxing stand-alone executable from RPython code.
In place of real calls to any external function, this code builds
trampolines that marshal their input arguments, dump them to STDOUT,
and wait for an answer on STDIN. Enable with 'translate.py --sandbox'.
"""
So far, only external functions with int or zero-terminated char*
arguments are supported. The others give a translation-time
NotImplementedError: at not point should a --sandbox translation give an
unsafe executable.
Modified: pypy/dist/pypy/config/translationoption.py
==============================================================================
--- pypy/dist/pypy/config/translationoption.py (original)
+++ pypy/dist/pypy/config/translationoption.py Wed Jul 18 17:16:47 2007
@@ -57,6 +57,8 @@
default=0),
BoolOption("countmallocs", "Count mallocs and frees", default=False,
cmdline=None),
+ BoolOption("sandbox", "Produce a fully-sandboxed executable",
+ default=False, cmdline="--sandbox"),
# misc
StrOption("cc", "Specify compiler to use for compiling generated C", cmdline="--cc"),
Modified: pypy/dist/pypy/translator/c/database.py
==============================================================================
--- pypy/dist/pypy/translator/c/database.py (original)
+++ pypy/dist/pypy/translator/c/database.py Wed Jul 18 17:16:47 2007
@@ -25,9 +25,11 @@
def __init__(self, translator=None, standalone=False,
gcpolicyclass=None,
stacklesstransformer=None,
- thread_enabled=False):
+ thread_enabled=False,
+ sandbox=False):
self.translator = translator
self.standalone = standalone
+ self.sandbox = sandbox
self.stacklesstransformer = stacklesstransformer
if gcpolicyclass is None:
gcpolicyclass = gc.RefcountingGcPolicy
@@ -354,3 +356,7 @@
for node in self.structdefnodes.values():
produce(node)
return result
+
+ def need_sandboxing(self, fnobj):
+ return self.sandbox and (
+ not getattr(fnobj, '_safe_not_sandboxed', False))
Modified: pypy/dist/pypy/translator/c/genc.py
==============================================================================
--- pypy/dist/pypy/translator/c/genc.py (original)
+++ pypy/dist/pypy/translator/c/genc.py Wed Jul 18 17:16:47 2007
@@ -58,7 +58,8 @@
db = LowLevelDatabase(translator, standalone=self.standalone,
gcpolicyclass=gcpolicyclass,
stacklesstransformer=stacklesstransformer,
- thread_enabled=self.config.translation.thread)
+ thread_enabled=self.config.translation.thread,
+ sandbox=self.config.translation.sandbox)
# pass extra options into pyobjmaker
if pyobj_options:
for key, value in pyobj_options.items():
Modified: pypy/dist/pypy/translator/c/node.py
==============================================================================
--- pypy/dist/pypy/translator/c/node.py (original)
+++ pypy/dist/pypy/translator/c/node.py Wed Jul 18 17:16:47 2007
@@ -605,7 +605,7 @@
self.db = db
self.T = T
self.obj = obj
- if hasattr(obj, 'includes'):
+ if hasattr(obj, 'includes') and not db.need_sandboxing(obj):
self.includes = obj.includes
self.name = forcename or self.basename()
else:
@@ -695,13 +695,16 @@
funcgen.implementation_end()
def select_function_code_generators(fnobj, db, functionname):
+ sandbox = db.need_sandboxing(fnobj)
if hasattr(fnobj, '_external_name'):
+ assert not sandbox
db.externalfuncs[fnobj._external_name] = fnobj
return []
elif fnobj._callable in extfunc.EXTERNALS:
# 'fnobj' is one of the ll_xyz() functions with the suggested_primitive
# flag in pypy.rpython.module.*. The corresponding C wrappers are
# written by hand in src/ll_*.h, and declared in extfunc.EXTERNALS.
+ assert not sandbox
db.externalfuncs[fnobj._callable] = fnobj
return []
elif getattr(fnobj._callable, 'suggested_primitive', False):
@@ -712,10 +715,16 @@
return [FunctionCodeGenerator(fnobj.graph, db, exception_policy,
functionname)]
elif getattr(fnobj, 'external', None) == 'C':
- # deprecated case
if hasattr(fnobj, 'includes'):
- return [] # assume no wrapper needed
+ # apply the sandbox transformation
+ if sandbox:
+ from pypy.translator.c.sandbox import getSandboxFuncCodeGen
+ return [getSandboxFuncCodeGen(fnobj, db)]
+ else:
+ return [] # assume no wrapper needed
else:
+ # deprecated case
+ assert not sandbox
return [CExternalFunctionCodeGenerator(fnobj, db)]
else:
raise ValueError, "don't know how to generate code for %r" % (fnobj,)
Added: pypy/dist/pypy/translator/c/sandbox.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/c/sandbox.py Wed Jul 18 17:16:47 2007
@@ -0,0 +1,139 @@
+"""Generation of sandboxing stand-alone executable from RPython code.
+In place of real calls to any external function, this code builds
+trampolines that marshal their input arguments, dump them to STDOUT,
+and wait for an answer on STDIN. Enable with 'translate.py --sandbox'.
+"""
+from pypy.translator.c.sandboxmsg import MessageBuilder, LLMessage
+
+# ____________________________________________________________
+#
+# Sandboxing code generator for external functions
+#
+
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.annotation import model as annmodel
+from pypy.rlib.unroll import unrolling_iterable
+from pypy.translator.c import funcgen
+from pypy.tool.sourcetools import func_with_new_name
+from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
+
+def getSandboxFuncCodeGen(fnobj, db):
+ graph = get_external_function_sandbox_graph(fnobj, db)
+ return funcgen.FunctionCodeGenerator(graph, db)
+
+# a version of os.read() and os.write() that are not mangled
+# by the sandboxing mechanism
+ll_read_not_sandboxed = rffi.llexternal('read',
+ [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
+ rffi.SIZE_T)
+ll_read_not_sandboxed._obj._safe_not_sandboxed = True
+
+ll_write_not_sandboxed = rffi.llexternal('write',
+ [rffi.INT, rffi.CCHARP, rffi.SIZE_T],
+ rffi.SIZE_T)
+ll_write_not_sandboxed._obj._safe_not_sandboxed = True
+
+def writeall_not_sandboxed(fd, buf, length):
+ while length > 0:
+ size = rffi.cast(rffi.SIZE_T, length)
+ count = rffi.cast(lltype.Signed, ll_write_not_sandboxed(fd, buf, size))
+ if count < 0:
+ raise IOError
+ length -= count
+ buf = lltype.direct_ptradd(lltype.direct_arrayitems(buf), count)
+ buf = rffi.cast(rffi.CCHARP, buf)
+
+def readall_not_sandboxed(fd, length):
+ buf = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+ p = buf
+ got = 0
+ while got < length:
+ size1 = rffi.cast(rffi.SIZE_T, length - got)
+ count = rffi.cast(lltype.Signed, ll_read_not_sandboxed(fd, p, size1))
+ if count < 0:
+ raise IOError
+ got += count
+ p = lltype.direct_ptradd(lltype.direct_arrayitems(p), count)
+ p = rffi.cast(rffi.CCHARP, p)
+ return buf
+
+def buf2num(buf, index=0):
+ c0 = ord(buf[index ])
+ c1 = ord(buf[index+1])
+ c2 = ord(buf[index+2])
+ c3 = ord(buf[index+3])
+ if c0 >= 0x80:
+ c0 -= 0x100
+ return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
+
+
+def get_external_function_sandbox_graph(fnobj, db):
+ """Build the graph of a helper trampoline function to be used
+ in place of real calls to the external function 'fnobj'. The
+ trampoline marshals its input arguments, dumps them to STDOUT,
+ and waits for an answer on STDIN.
+ """
+ # XXX for now, only supports function with int and string arguments
+ # and returning an int.
+ FUNCTYPE = lltype.typeOf(fnobj)
+ unroll_args = []
+ for i, ARG in enumerate(FUNCTYPE.ARGS):
+ if ARG == rffi.INT: # 'int' argument
+ methodname = "packnum"
+ elif ARG == rffi.CCHARP: # 'char*' argument, assumed zero-terminated
+ methodname = "packccharp"
+ else:
+ raise NotImplementedError("external function %r argument type %s" %
+ (fnobj, ARG))
+ unroll_args.append((i, methodname))
+ if FUNCTYPE.RESULT != rffi.INT:
+ raise NotImplementedError("exernal function %r return type %s" % (
+ fnobj, FUNCTYPE.RESULT))
+ unroll_args = unrolling_iterable(unroll_args)
+ fnname = fnobj._name
+
+ def execute(*args):
+ STDIN = 0
+ STDOUT = 1
+ assert len(args) == len(FUNCTYPE.ARGS)
+ # marshal the input arguments
+ msg = MessageBuilder()
+ msg.packstring(fnname)
+ for index, methodname in unroll_args:
+ getattr(msg, methodname)(args[index])
+ buf = msg.as_rffi_buf()
+ try:
+ writeall_not_sandboxed(STDOUT, buf, msg.getlength())
+ finally:
+ lltype.free(buf, flavor='raw')
+
+ # wait for the answer
+ buf = readall_not_sandboxed(STDIN, 4)
+ try:
+ length = buf2num(buf)
+ finally:
+ lltype.free(buf, flavor='raw')
+
+ length -= 4 # the original length includes the header
+ if length < 0:
+ raise IOError
+ buf = readall_not_sandboxed(STDIN, length)
+ try:
+ # decode the answer
+ msg = LLMessage(buf, 0, length)
+ errcode = msg.nextnum()
+ if errcode != 0:
+ raise IOError
+ result = msg.nextnum()
+ finally:
+ lltype.free(buf, flavor='raw')
+
+ return result
+ execute = func_with_new_name(execute, 'sandboxed_' + fnname)
+
+ ann = MixLevelHelperAnnotator(db.translator.rtyper)
+ args_s = [annmodel.lltype_to_annotation(ARG) for ARG in FUNCTYPE.ARGS]
+ s_result = annmodel.lltype_to_annotation(FUNCTYPE.RESULT)
+ graph = ann.getgraph(execute, args_s, s_result)
+ ann.finish()
+ return graph
Added: pypy/dist/pypy/translator/c/sandboxmsg.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/c/sandboxmsg.py Wed Jul 18 17:16:47 2007
@@ -0,0 +1,150 @@
+import sys, os
+import struct
+import select
+
+from pypy.annotation import policy, model as annmodel
+
+# ____________________________________________________________
+#
+# Marshalling of external function calls' arguments
+#
+
+class MessageBuilder(object):
+ def __init__(self):
+ self.value = ['\xFF', '\xFF', '\xFF', '\xFF']
+
+ def packstring(self, s):
+ self.packnum(len(s), "s")
+ self.value += s
+ return self
+ packstring._annenforceargs_ = policy.Sig(None, str)
+
+ def packccharp(self, p):
+ length = 0
+ while p[length] != '\x00':
+ length += 1
+ self.packnum(length, "s")
+ for i in range(length):
+ self.value.append(p[i])
+ return self
+
+ def packnum(self, n, prefix="i"):
+ self.value.append(prefix)
+ self.value.append(chr((n >> 24) & 0xFF))
+ self.value.append(chr((n >> 16) & 0xFF))
+ self.value.append(chr((n >> 8) & 0xFF))
+ self.value.append(chr((n ) & 0xFF))
+ return self
+ packnum._annenforceargs_ = policy.Sig(None, int, annmodel.SomeChar())
+
+ def _fixlength(self):
+ n = len(self.value)
+ self.value[0] = chr((n >> 24) & 0xFF)
+ self.value[1] = chr((n >> 16) & 0xFF)
+ self.value[2] = chr((n >> 8) & 0xFF)
+ self.value[3] = chr((n ) & 0xFF)
+
+ def getvalue(self):
+ self._fixlength()
+ return ''.join(self.value)
+
+ def as_rffi_buf(self):
+ from pypy.rpython.lltypesystem import lltype, rffi
+ self._fixlength()
+ value = self.value
+ length = len(value)
+ array = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw')
+ for i in range(length):
+ array[i] = value[i]
+ return array
+
+ def getlength(self):
+ return len(self.value)
+
+
+class LLMessage(object):
+ def __init__(self, value, start, stop):
+ self.value = value
+ self.pos = start
+ self.stop = stop
+
+ def _char(self):
+ i = self.pos
+ if i >= self.stop:
+ raise ValueError
+ self.pos = i + 1
+ return self.value[i]
+
+ def nextstring(self):
+ length = self.nextnum("s")
+ i = self.pos
+ self.pos = i + length
+ if self.pos > self.stop:
+ raise ValueError
+ # general version assuming that self.value is only indexable,
+ # not sliceable. See also the Message subclass.
+ return ''.join([self.value[index] for index in range(i, self.pos)])
+
+ def nextnum(self, prefix="i"):
+ t = self._char()
+ if t != prefix:
+ raise ValueError
+ c0 = ord(self._char())
+ c1 = ord(self._char())
+ c2 = ord(self._char())
+ c3 = ord(self._char())
+ if c0 >= 0x80:
+ c0 -= 0x100
+ return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
+
+ def end(self):
+ return self.pos >= self.stop
+
+
+class Message(LLMessage):
+ "NOT_RPYTHON"
+ # 'value' is a regular string in this case,
+ # allowing a more reasonable implementation of nextstring()
+ def __init__(self, buf):
+ LLMessage.__init__(self, buf, start=0, stop=len(buf))
+
+ def nextstring(self):
+ length = self.nextnum("s")
+ i = self.pos
+ self.pos = i + length
+ if self.pos > self.stop:
+ raise ValueError
+ return self.value[i:self.pos]
+
+def timeout_read(f, size, timeout=None):
+ if size < 0:
+ raise ValueError("negative size")
+ if timeout is None:
+ return f.read(size)
+ else:
+ # XXX not Win32-compliant!
+ assert not sys.platform.startswith('win'), "XXX fix me"
+ # It also assumes that 'f' does no buffering!
+ fd = f.fileno()
+ result = ""
+ while len(result) < size:
+ iwtd, owtd, ewtd = select.select([fd], [], [], timeout)
+ if not iwtd:
+ raise Timeout("got %d bytes after %s seconds, expected %d" % (
+ len(result), timeout, size))
+ buf = os.read(fd, size - len(result))
+ if not buf:
+ raise EOFError
+ result += buf
+ return result
+
+class Timeout(Exception):
+ pass
+
+def read_message(f, timeout=None):
+ """NOT_RPYTHON - Warning! 'timeout' only works if 'f' is opened
+ with no buffering at all!
+ """
+ msglength, = struct.unpack("!i", timeout_read(f, 4, timeout))
+ buf = timeout_read(f, msglength - 4, timeout)
+ return Message(buf)
Added: pypy/dist/pypy/translator/c/test/test_sandbox.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/c/test/test_sandbox.py Wed Jul 18 17:16:47 2007
@@ -0,0 +1,67 @@
+import sys, os
+import struct
+
+from pypy.rpython.lltypesystem import rffi
+from pypy.translator.c.sandboxmsg import Message, MessageBuilder, read_message
+from pypy.translator.interactive import Translation
+
+
+def test_sandbox_message():
+ def num(n):
+ return struct.pack("!i", n)
+ msg = MessageBuilder()
+ msg.packstring("open")
+ msg.packccharp(rffi.str2charp("/tmp/foobar"))
+ msg.packnum(123)
+ res = msg.getvalue()
+ assert res == (num(len(res)) +
+ "s" + num(4) + "open" +
+ "s" + num(11) + "/tmp/foobar" +
+ "i" + num(123))
+
+ msg = Message(res[4:])
+ m1 = msg.nextstring()
+ assert m1 == "open"
+ m2 = msg.nextstring()
+ assert m2 == "/tmp/foobar"
+ m3 = msg.nextnum()
+ assert m3 == 123
+
+def test_sandbox():
+ def entry_point(argv):
+ fd = os.open("/tmp/foobar", os.O_RDONLY, 0777)
+ assert fd == 77
+ fd2 = os.dup(fd)
+ assert fd2 == 78
+ return 0
+
+ t = Translation(entry_point, backend='c', standalone=True, sandbox=True)
+ exe = t.compile()
+ g, f = os.popen2(exe, "t", 0)
+
+ msg = read_message(f, timeout=10.0)
+ m1 = msg.nextstring()
+ assert m1 == "open"
+ m2 = msg.nextstring()
+ assert m2 == "/tmp/foobar"
+ m3 = msg.nextnum()
+ assert m3 == os.O_RDONLY
+ m4 = msg.nextnum()
+ assert m4 == 0777
+ assert msg.end()
+
+ g.write(MessageBuilder().packnum(0).packnum(77).getvalue())
+
+ msg = read_message(f, timeout=10.0)
+ m1 = msg.nextstring()
+ assert m1 == "dup"
+ m2 = msg.nextnum()
+ assert m2 == 77
+ assert msg.end()
+
+ g.write(MessageBuilder().packnum(0).packnum(78).getvalue())
+
+ g.close()
+ tail = f.read()
+ f.close()
+ assert tail == ""
More information about the Pypy-commit
mailing list