[pypy-commit] pypy length-hint: merge default
pjenvey
noreply at buildbot.pypy.org
Thu Sep 27 02:04:31 CEST 2012
Author: Philip Jenvey <pjenvey at underboss.org>
Branch: length-hint
Changeset: r57623:0cd03df08692
Date: 2012-09-26 15:21 -0700
http://bitbucket.org/pypy/pypy/changeset/0cd03df08692/
Log: merge default
diff --git a/pypy/doc/config/objspace.usemodules._csv.txt b/pypy/doc/config/objspace.usemodules._csv.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules._csv.txt
@@ -0,0 +1,2 @@
+Implementation in RPython for the core of the 'csv' module
+
diff --git a/pypy/jit/metainterp/optimizeopt/util.py b/pypy/jit/metainterp/optimizeopt/util.py
--- a/pypy/jit/metainterp/optimizeopt/util.py
+++ b/pypy/jit/metainterp/optimizeopt/util.py
@@ -2,9 +2,10 @@
from pypy.rlib.objectmodel import r_dict, compute_identity_hash
from pypy.rlib.rarithmetic import intmask
from pypy.rlib.unroll import unrolling_iterable
-from pypy.jit.metainterp import resoperation, history
+from pypy.jit.metainterp import resoperation
from pypy.rlib.debug import make_sure_not_resized
from pypy.jit.metainterp.resoperation import rop
+from pypy.rlib.objectmodel import we_are_translated
# ____________________________________________________________
# Misc. utilities
@@ -28,13 +29,20 @@
def make_dispatcher_method(Class, name_prefix, op_prefix=None, default=None):
ops = _findall(Class, name_prefix, op_prefix)
def dispatch(self, op, *args):
- opnum = op.getopnum()
- for value, cls, func in ops:
- if opnum == value:
- assert isinstance(op, cls)
+ if we_are_translated():
+ opnum = op.getopnum()
+ for value, cls, func in ops:
+ if opnum == value:
+ assert isinstance(op, cls)
+ return func(self, op, *args)
+ if default:
+ return default(self, op, *args)
+ else:
+ func = getattr(Class, name_prefix + op.getopname().upper(), None)
+ if func is not None:
return func(self, op, *args)
- if default:
- return default(self, op, *args)
+ if default:
+ return default(self, op, *args)
dispatch.func_name = "dispatch_" + name_prefix
return dispatch
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2028,6 +2028,7 @@
y -= 1
return res
def g(x, y):
+ set_param(myjitdriver, 'max_unroll_loops', 5)
a1 = f(A(x), y)
a2 = f(A(x), y)
b1 = f(B(x), y)
diff --git a/pypy/jit/metainterp/test/test_send.py b/pypy/jit/metainterp/test/test_send.py
--- a/pypy/jit/metainterp/test/test_send.py
+++ b/pypy/jit/metainterp/test/test_send.py
@@ -1,5 +1,5 @@
import py
-from pypy.rlib.jit import JitDriver, promote, elidable
+from pypy.rlib.jit import JitDriver, promote, elidable, set_param
from pypy.jit.codewriter.policy import StopAtXPolicy
from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
@@ -181,6 +181,7 @@
def getvalue(self):
return self.y
def f(x, y):
+ set_param(myjitdriver, 'max_unroll_loops', 5)
if x & 1:
w = W1(x)
else:
@@ -226,6 +227,7 @@
w2 = W2(20)
def f(x, y):
+ set_param(myjitdriver, 'max_unroll_loops', 5)
if x & 1:
w = w1
else:
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -1,11 +1,13 @@
from pypy.interpreter.mixedmodule import MixedModule
+from pypy.rlib import rdynload
+
class Module(MixedModule):
appleveldefs = {
}
interpleveldefs = {
- '__version__': 'space.wrap("0.3")',
+ '__version__': 'space.wrap("0.4")',
'nonstandard_integer_types': 'misc.nonstandard_integer_types',
@@ -42,3 +44,12 @@
'FFI_DEFAULT_ABI': 'ctypefunc._get_abi(space, "FFI_DEFAULT_ABI")',
'FFI_CDECL': 'ctypefunc._get_abi(space,"FFI_DEFAULT_ABI")',#win32 name
}
+
+for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL",
+ "RTLD_NODELETE", "RTLD_NOLOAD", "RTLD_DEEPBIND"]:
+ if getattr(rdynload.cConfig, _name) is not None:
+ Module.interpleveldefs[_name] = 'space.wrap(%d)' % (
+ getattr(rdynload.cConfig, _name),)
+
+for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL"]:
+ Module.interpleveldefs.setdefault(_name, 'space.wrap(0)')
diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -286,8 +286,8 @@
for i, cf in enumerate(ctype.fields_list):
if cf.is_bitfield():
raise OperationError(space.w_NotImplementedError,
- space.wrap("cannot pass as argument a struct "
- "with bit fields"))
+ space.wrap("cannot pass as argument or return value "
+ "a struct with bit fields"))
ffi_subtype = self.fb_fill_type(cf.ctype, False)
if elements:
elements[i] = ffi_subtype
diff --git a/pypy/module/_cffi_backend/libraryobj.py b/pypy/module/_cffi_backend/libraryobj.py
--- a/pypy/module/_cffi_backend/libraryobj.py
+++ b/pypy/module/_cffi_backend/libraryobj.py
@@ -5,7 +5,6 @@
from pypy.interpreter.typedef import TypeDef
from pypy.rpython.lltypesystem import lltype, rffi
from pypy.rlib.rdynload import DLLHANDLE, dlopen, dlsym, dlclose, DLOpenError
-from pypy.rlib.rdynload import RTLD_GLOBAL
from pypy.module._cffi_backend.cdataobj import W_CData
from pypy.module._cffi_backend.ctypeobj import W_CType
@@ -15,17 +14,13 @@
_immutable_ = True
handle = rffi.cast(DLLHANDLE, 0)
- def __init__(self, space, filename, is_global):
+ def __init__(self, space, filename, flags):
self.space = space
- if is_global and RTLD_GLOBAL is not None:
- mode = RTLD_GLOBAL
- else:
- mode = -1 # default value, corresponds to RTLD_LOCAL
with rffi.scoped_str2charp(filename) as ll_libname:
if filename is None:
filename = "<None>"
try:
- self.handle = dlopen(ll_libname, mode)
+ self.handle = dlopen(ll_libname, flags)
except DLOpenError, e:
raise operationerrfmt(space.w_OSError,
"cannot load library %s: %s",
@@ -100,7 +95,7 @@
W_Library.acceptable_as_base_class = False
- at unwrap_spec(filename="str_or_None", is_global=int)
-def load_library(space, filename, is_global=0):
- lib = W_Library(space, filename, is_global)
+ at unwrap_spec(filename="str_or_None", flags=int)
+def load_library(space, filename, flags=0):
+ lib = W_Library(space, filename, flags)
return space.wrap(lib)
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -42,19 +42,34 @@
return sizeof(BPtr)
-def find_and_load_library(name, is_global=0):
+def find_and_load_library(name, flags=RTLD_NOW):
import ctypes.util
if name is None:
path = None
else:
path = ctypes.util.find_library(name)
- return load_library(path, is_global)
+ return load_library(path, flags)
def test_load_library():
x = find_and_load_library('c')
assert repr(x).startswith("<clibrary '")
- x = find_and_load_library('c', 1)
+ x = find_and_load_library('c', RTLD_NOW | RTLD_GLOBAL)
assert repr(x).startswith("<clibrary '")
+ x = find_and_load_library('c', RTLD_LAZY)
+ assert repr(x).startswith("<clibrary '")
+
+def test_all_rtld_symbols():
+ import sys
+ FFI_DEFAULT_ABI # these symbols must be defined
+ FFI_CDECL
+ RTLD_LAZY
+ RTLD_NOW
+ RTLD_GLOBAL
+ RTLD_LOCAL
+ if sys.platform.startswith("linux"):
+ RTLD_NODELETE
+ RTLD_NOLOAD
+ RTLD_DEEPBIND
def test_nonstandard_integer_types():
d = nonstandard_integer_types()
diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py
--- a/pypy/module/_cffi_backend/test/test_c.py
+++ b/pypy/module/_cffi_backend/test/test_c.py
@@ -22,7 +22,6 @@
from pypy.tool.udir import udir
from pypy.conftest import gettestobjspace, option
from pypy.interpreter import gateway
-from pypy.module._cffi_backend.test import _backend_test_c
from pypy.module._cffi_backend import Module
from pypy.translator.platform import host
from pypy.translator.tool.cbuild import ExternalCompilationInfo
@@ -87,20 +86,24 @@
all_names = ', '.join(Module.interpleveldefs.keys())
+backend_test_c = py.path.local(__file__).join('..', '_backend_test_c.py')
+
lst = []
-for name, value in _backend_test_c.__dict__.items():
- if name.startswith('test_'):
- lst.append(value)
-lst.sort(key=lambda func: func.func_code.co_firstlineno)
+with backend_test_c.open('r') as f:
+ for line in f:
+ if line.startswith('def test_'):
+ line = line[4:]
+ line = line[:line.index('():')]
+ lst.append(line)
tmpdir = udir.join('test_c').ensure(dir=1)
tmpname = tmpdir.join('_test_c.py')
with tmpname.open('w') as f:
for func in lst:
- print >> f, 'def %s(self):' % (func.__name__,)
+ print >> f, 'def %s(self):' % (func,)
print >> f, ' import _all_test_c'
- print >> f, ' _all_test_c.%s()' % (func.__name__,)
+ print >> f, ' _all_test_c.%s()' % (func,)
tmpname2 = tmpdir.join('_all_test_c.py')
with tmpname2.open('w') as f:
@@ -110,7 +113,7 @@
print >> f, ' class test:'
print >> f, ' raises = staticmethod(raises)'
print >> f, ' skip = staticmethod(skip)'
- print >> f, py.path.local(__file__).join('..', '_backend_test_c.py').read()
+ print >> f, backend_test_c.read()
mod = tmpname.pyimport()
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -405,7 +405,6 @@
"ascii_encode",
"latin_1_encode",
"utf_7_encode",
- "utf_8_encode",
"utf_16_encode",
"utf_16_be_encode",
"utf_16_le_encode",
@@ -422,7 +421,6 @@
"ascii_decode",
"latin_1_decode",
"utf_7_decode",
- "utf_8_decode",
"utf_16_decode",
"utf_16_be_decode",
"utf_16_le_decode",
@@ -437,6 +435,30 @@
make_encoder_wrapper('mbcs_encode')
make_decoder_wrapper('mbcs_decode')
+# utf-8 functions are not regular, because we have to pass
+# "allow_surrogates=True"
+ at unwrap_spec(uni=unicode, errors='str_or_None')
+def utf_8_encode(space, uni, errors="strict"):
+ if errors is None:
+ errors = 'strict'
+ state = space.fromcache(CodecState)
+ result = runicode.unicode_encode_utf_8(
+ uni, len(uni), errors, state.encode_error_handler,
+ allow_surrogates=True)
+ return space.newtuple([space.wrap(result), space.wrap(len(uni))])
+
+ at unwrap_spec(string='bufferstr', errors='str_or_None')
+def utf_8_decode(space, string, errors="strict", w_final=False):
+ if errors is None:
+ errors = 'strict'
+ final = space.is_true(w_final)
+ state = space.fromcache(CodecState)
+ result, consumed = runicode.str_decode_utf_8(
+ string, len(string), errors,
+ final, state.decode_error_handler,
+ allow_surrogates=True)
+ return space.newtuple([space.wrap(result), space.wrap(consumed)])
+
@unwrap_spec(data=str, errors='str_or_None', byteorder=int)
def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
if errors is None:
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -67,8 +67,15 @@
w_line = space.next(self.w_iter)
except OperationError, e:
if e.match(space, space.w_StopIteration):
- if field_builder is not None:
- raise self.error("newline inside string")
+ if (field_builder is not None and
+ state != START_RECORD and state != EAT_CRNL and
+ (len(field_builder.build()) > 0 or
+ state == IN_QUOTED_FIELD)):
+ if dialect.strict:
+ raise self.error("newline inside string")
+ else:
+ self.save_field(field_builder)
+ break
raise
self.line_num += 1
line = space.str_w(w_line)
diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py
--- a/pypy/module/_csv/test/test_reader.py
+++ b/pypy/module/_csv/test/test_reader.py
@@ -99,3 +99,11 @@
def test_dubious_quote(self):
self._read_test(['12,12,1",'], [['12', '12', '1"', '']])
+
+ def test_read_eof(self):
+ self._read_test(['a,"'], [['a', '']])
+ self._read_test(['"a'], [['a']])
+ self._read_test(['^'], [['\n']], escapechar='^')
+ self._read_test(['a,"'], 'Error', strict=True)
+ self._read_test(['"a'], 'Error', strict=True)
+ self._read_test(['^'], 'Error', escapechar='^', strict=True)
diff --git a/pypy/module/_ffi/interp_funcptr.py b/pypy/module/_ffi/interp_funcptr.py
--- a/pypy/module/_ffi/interp_funcptr.py
+++ b/pypy/module/_ffi/interp_funcptr.py
@@ -287,7 +287,11 @@
w_restype)
addr = rffi.cast(rffi.VOIDP, addr)
func = libffi.Func(name, argtypes, restype, addr, flags)
- return W_FuncPtr(func, argtypes_w, w_restype)
+ try:
+ return W_FuncPtr(func, argtypes_w, w_restype)
+ except OSError:
+ raise OperationError(space.w_SystemError,
+ space.wrap("internal error building the Func object"))
W_FuncPtr.typedef = TypeDef(
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -29,7 +29,9 @@
info is a pair (hostaddr, port).
"""
try:
- sock, addr = self.accept(W_RSocket)
+ fd, addr = self.accept()
+ sock = rsocket.make_socket(
+ fd, self.family, self.type, self.proto, W_RSocket)
return space.newtuple([space.wrap(sock),
addr.as_object(sock.fd, space)])
except SocketError, e:
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -874,7 +874,7 @@
# Iteration
-class W_DictMultiIterKeysObject(W_Object):
+class W_BaseDictMultiIterObject(W_Object):
from pypy.objspace.std.dicttype import dictiter_typedef as typedef
_immutable_fields_ = ["iteratorimplementation"]
@@ -885,32 +885,17 @@
w_self.space = space
w_self.iteratorimplementation = iteratorimplementation
+class W_DictMultiIterKeysObject(W_BaseDictMultiIterObject):
+ pass
+
+class W_DictMultiIterValuesObject(W_BaseDictMultiIterObject):
+ pass
+
+class W_DictMultiIterItemsObject(W_BaseDictMultiIterObject):
+ pass
+
registerimplementation(W_DictMultiIterKeysObject)
-
-class W_DictMultiIterValuesObject(W_Object):
- from pypy.objspace.std.dicttype import dictiter_typedef as typedef
-
- _immutable_fields_ = ["iteratorimplementation"]
-
- ignore_for_isinstance_cache = True
-
- def __init__(w_self, space, iteratorimplementation):
- w_self.space = space
- w_self.iteratorimplementation = iteratorimplementation
-
registerimplementation(W_DictMultiIterValuesObject)
-
-class W_DictMultiIterItemsObject(W_Object):
- from pypy.objspace.std.dicttype import dictiter_typedef as typedef
-
- _immutable_fields_ = ["iteratorimplementation"]
-
- ignore_for_isinstance_cache = True
-
- def __init__(w_self, space, iteratorimplementation):
- w_self.space = space
- w_self.iteratorimplementation = iteratorimplementation
-
registerimplementation(W_DictMultiIterItemsObject)
def iter__DictMultiIterKeysObject(space, w_dictiter):
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -71,7 +71,7 @@
if result is not None:
return W_RopeObject(result)
elif encoding == "utf-8":
- result = rope.unicode_encode_utf8(node)
+ result = rope.unicode_encode_utf8(node, allow_surrogates=True)
if result is not None:
return W_RopeObject(result)
return encode_object(space, w_unistr, encoding, errors)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -13,7 +13,7 @@
from pypy.rlib.objectmodel import compute_hash, specialize
from pypy.rlib.objectmodel import compute_unique_id
from pypy.rlib.rstring import UnicodeBuilder
-from pypy.rlib.runicode import unicode_encode_unicode_escape
+from pypy.rlib.runicode import make_unicode_escape_function
from pypy.module.unicodedata import unicodedb
from pypy.tool.sourcetools import func_with_new_name
from pypy.rlib import jit
@@ -918,10 +918,13 @@
space.wrap("character mapping must return integer, None or unicode"))
return W_UnicodeObject(u''.join(result))
+_repr_function, _ = make_unicode_escape_function(
+ pass_printable=False, unicode_output=False, quotes=True, prefix='u')
+
def repr__Unicode(space, w_unicode):
chars = w_unicode._value
size = len(chars)
- s = unicode_encode_unicode_escape(chars, size, "strict", quotes=True)
+ s = _repr_function(chars, size, "strict")
return space.wrap(s)
def mod__Unicode_ANY(space, w_format, w_values):
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -236,13 +236,14 @@
if encoding == 'ascii':
u = space.unicode_w(w_object)
eh = encode_error_handler(space)
- return space.wrap(unicode_encode_ascii(u, len(u), None,
- errorhandler=eh))
+ return space.wrap(unicode_encode_ascii(
+ u, len(u), None, errorhandler=eh))
if encoding == 'utf-8':
u = space.unicode_w(w_object)
eh = encode_error_handler(space)
- return space.wrap(unicode_encode_utf_8(u, len(u), None,
- errorhandler=eh))
+ return space.wrap(unicode_encode_utf_8(
+ u, len(u), None, errorhandler=eh,
+ allow_surrogates=True))
from pypy.module._codecs.interp_codecs import lookup_codec
w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
if errors is None:
@@ -265,15 +266,14 @@
# XXX error handling
s = space.bufferstr_w(w_obj)
eh = decode_error_handler(space)
- return space.wrap(str_decode_ascii(s, len(s), None,
- final=True,
- errorhandler=eh)[0])
+ return space.wrap(str_decode_ascii(
+ s, len(s), None, final=True, errorhandler=eh)[0])
if encoding == 'utf-8':
s = space.bufferstr_w(w_obj)
eh = decode_error_handler(space)
- return space.wrap(str_decode_utf_8(s, len(s), None,
- final=True,
- errorhandler=eh)[0])
+ return space.wrap(str_decode_utf_8(
+ s, len(s), None, final=True, errorhandler=eh,
+ allow_surrogates=True)[0])
w_codecs = space.getbuiltinmodule("_codecs")
w_decode = space.getattr(w_codecs, space.wrap("decode"))
if errors is None:
diff --git a/pypy/rlib/rdynload.py b/pypy/rlib/rdynload.py
--- a/pypy/rlib/rdynload.py
+++ b/pypy/rlib/rdynload.py
@@ -44,6 +44,10 @@
RTLD_LOCAL = rffi_platform.DefinedConstantInteger('RTLD_LOCAL')
RTLD_GLOBAL = rffi_platform.DefinedConstantInteger('RTLD_GLOBAL')
RTLD_NOW = rffi_platform.DefinedConstantInteger('RTLD_NOW')
+ RTLD_LAZY = rffi_platform.DefinedConstantInteger('RTLD_LAZY')
+ RTLD_NODELETE = rffi_platform.DefinedConstantInteger('RTLD_NODELETE')
+ RTLD_NOLOAD = rffi_platform.DefinedConstantInteger('RTLD_NOLOAD')
+ RTLD_DEEPBIND = rffi_platform.DefinedConstantInteger('RTLD_DEEPBIND')
class cConfig:
pass
@@ -72,6 +76,7 @@
RTLD_LOCAL = cConfig.RTLD_LOCAL
RTLD_GLOBAL = cConfig.RTLD_GLOBAL
RTLD_NOW = cConfig.RTLD_NOW
+ RTLD_LAZY = cConfig.RTLD_LAZY
def dlerror():
# XXX this would never work on top of ll2ctypes, because
@@ -90,7 +95,8 @@
mode = RTLD_LOCAL
else:
mode = 0
- mode |= RTLD_NOW
+ if (mode & (RTLD_LAZY | RTLD_NOW)) == 0:
+ mode |= RTLD_NOW
res = c_dlopen(name, rffi.cast(rffi.INT, mode))
if not res:
err = dlerror()
diff --git a/pypy/rlib/rope.py b/pypy/rlib/rope.py
--- a/pypy/rlib/rope.py
+++ b/pypy/rlib/rope.py
@@ -1485,7 +1485,7 @@
if rope.is_bytestring():
return rope
-def unicode_encode_utf8(rope):
+def unicode_encode_utf8(rope, allow_surrogates=False):
from pypy.rlib.runicode import unicode_encode_utf_8
if rope.is_ascii():
return rope
@@ -1494,7 +1494,8 @@
unicode_encode_utf8(rope.right))
elif isinstance(rope, LiteralUnicodeNode):
return LiteralStringNode(
- unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
+ unicode_encode_utf_8(rope.u, len(rope.u), "strict",
+ allow_surrogates=allow_surrogates))
elif isinstance(rope, LiteralStringNode):
return LiteralStringNode(_str_encode_utf_8(rope.s))
diff --git a/pypy/rlib/rsocket.py b/pypy/rlib/rsocket.py
--- a/pypy/rlib/rsocket.py
+++ b/pypy/rlib/rsocket.py
@@ -609,9 +609,11 @@
"""
_mixin_ = True # for interp_socket.py
fd = _c.INVALID_SOCKET
- def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0):
+ def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0,
+ fd=_c.INVALID_SOCKET):
"""Create a new socket."""
- fd = _c.socket(family, type, proto)
+ if _c.invalid_socket(fd):
+ fd = _c.socket(family, type, proto)
if _c.invalid_socket(fd):
raise self.error_handler()
# PLAT RISCOS
@@ -717,11 +719,9 @@
addrlen_p[0] = rffi.cast(_c.socklen_t, maxlen)
return addr, addr.addr_p, addrlen_p
- def accept(self, SocketClass=None):
+ def accept(self):
"""Wait for an incoming connection.
- Return (new socket object, client address)."""
- if SocketClass is None:
- SocketClass = RSocket
+ Return (new socket fd, client address)."""
if self._select(False) == 1:
raise SocketTimeout
address, addr_p, addrlen_p = self._addrbuf()
@@ -734,9 +734,7 @@
if _c.invalid_socket(newfd):
raise self.error_handler()
address.addrlen = rffi.cast(lltype.Signed, addrlen)
- sock = make_socket(newfd, self.family, self.type, self.proto,
- SocketClass)
- return (sock, address)
+ return (newfd, address)
def bind(self, address):
"""Bind the socket to a local address."""
@@ -755,6 +753,11 @@
if res != 0:
raise self.error_handler()
+ def detach(self):
+ fd = self.fd
+ self.fd = _c.INVALID_SOCKET
+ return fd
+
if _c.WIN32:
def _connect(self, address):
"""Connect the socket to a remote address."""
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -77,12 +77,14 @@
]
def str_decode_utf_8(s, size, errors, final=False,
- errorhandler=None):
+ errorhandler=None, allow_surrogates=False):
if errorhandler is None:
errorhandler = raise_unicode_exception_decode
- return str_decode_utf_8_impl(s, size, errors, final, errorhandler)
+ return str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+ allow_surrogates=allow_surrogates)
-def str_decode_utf_8_impl(s, size, errors, final, errorhandler):
+def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+ allow_surrogates):
if size == 0:
return u'', 0
@@ -184,8 +186,7 @@
if (ordch2>>6 != 0x2 or # 0b10
(ordch1 == 0xe0 and ordch2 < 0xa0)
# surrogates shouldn't be valid UTF-8!
- # Uncomment the line below to make them invalid.
- # or (ordch1 == 0xed and ordch2 > 0x9f)
+ or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f)
):
r, pos = errorhandler(errors, 'utf-8',
'invalid continuation byte',
@@ -254,13 +255,21 @@
result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
result.append((chr((0x80 | (ch & 0x3f)))))
-def unicode_encode_utf_8(s, size, errors, errorhandler=None):
+def unicode_encode_utf_8(s, size, errors, errorhandler=None,
+ allow_surrogates=False):
+ if errorhandler is None:
+ errorhandler = raise_unicode_exception_encode
+ return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
+ allow_surrogates=allow_surrogates)
+
+def unicode_encode_utf_8_impl(s, size, errors, errorhandler,
+ allow_surrogates=False):
assert(size >= 0)
result = StringBuilder(size)
- i = 0
- while i < size:
- ch = ord(s[i])
- i += 1
+ pos = 0
+ while pos < size:
+ ch = ord(s[pos])
+ pos += 1
if ch < 0x80:
# Encode ASCII
result.append(chr(ch))
@@ -272,20 +281,32 @@
# Encode UCS2 Unicode ordinals
if ch < 0x10000:
# Special case: check for high surrogate
- if 0xD800 <= ch <= 0xDBFF and i != size:
- ch2 = ord(s[i])
- # Check for low surrogate and combine the two to
- # form a UCS4 value
- if 0xDC00 <= ch2 <= 0xDFFF:
- ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
- i += 1
- _encodeUCS4(result, ch3)
+ if 0xD800 <= ch <= 0xDFFF:
+ if pos != size:
+ ch2 = ord(s[pos])
+ # Check for low surrogate and combine the two to
+ # form a UCS4 value
+ if ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+ ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
+ pos += 1
+ _encodeUCS4(result, ch3)
+ continue
+ if not allow_surrogates:
+ r, pos = errorhandler(errors, 'utf-8',
+ 'surrogates not allowed',
+ s, pos-1, pos)
+ for ch in r:
+ if ord(ch) < 0x80:
+ result.append(chr(ord(ch)))
+ else:
+ errorhandler('strict', 'utf-8',
+ 'surrogates not allowed',
+ s, pos-1, pos)
continue
- # Fall through: handles isolated high surrogates
+ # else: Fall through and handles isolated high surrogates
result.append((chr((0xe0 | (ch >> 12)))))
result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
result.append((chr((0x80 | (ch & 0x3f)))))
- continue
else:
_encodeUCS4(result, ch)
return result.build()
@@ -1202,74 +1223,120 @@
return builder.build(), pos
-def unicode_encode_unicode_escape(s, size, errors, errorhandler=None, quotes=False):
- # errorhandler is not used: this function cannot cause Unicode errors
- result = StringBuilder(size)
+def make_unicode_escape_function(pass_printable=False, unicode_output=False,
+ quotes=False, prefix=None):
+ # Python3 has two similar escape functions: One to implement
+ # encode('unicode_escape') and which outputs bytes, and unicode.__repr__
+ # which outputs unicode. They cannot share RPython code, so we generate
+ # them with the template below.
+ # Python2 does not really need this, but it reduces diffs between branches.
- if quotes:
- if s.find(u'\'') != -1 and s.find(u'\"') == -1:
- quote = ord('\"')
- result.append('u"')
+ if unicode_output:
+ STRING_BUILDER = UnicodeBuilder
+ STR = unicode
+ CHR = UNICHR
+ else:
+ STRING_BUILDER = StringBuilder
+ STR = str
+ CHR = chr
+
+ def unicode_escape(s, size, errors, errorhandler=None):
+ # errorhandler is not used: this function cannot cause Unicode errors
+ result = STRING_BUILDER(size)
+
+ if quotes:
+ if prefix:
+ result.append(STR(prefix))
+ if s.find(u'\'') != -1 and s.find(u'\"') == -1:
+ quote = ord('\"')
+ result.append(STR('"'))
+ else:
+ quote = ord('\'')
+ result.append(STR('\''))
else:
- quote = ord('\'')
- result.append('u\'')
- else:
- quote = 0
+ quote = 0
- if size == 0:
- return ''
+ if size == 0:
+ return STR('')
- pos = 0
- while pos < size:
- ch = s[pos]
- oc = ord(ch)
+ pos = 0
+ while pos < size:
+ ch = s[pos]
+ oc = ord(ch)
- # Escape quotes
- if quotes and (oc == quote or ch == '\\'):
- result.append('\\')
- result.append(chr(oc))
- pos += 1
- continue
-
- # The following logic is enabled only if MAXUNICODE == 0xffff, or
- # for testing on top of a host CPython where sys.maxunicode == 0xffff
- if ((MAXUNICODE < 65536 or
- (not we_are_translated() and sys.maxunicode < 65536))
- and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
- # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
- pos += 1
- oc2 = ord(s[pos])
-
- if 0xDC00 <= oc2 <= 0xDFFF:
- ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
- raw_unicode_escape_helper(result, ucs)
+ # Escape quotes
+ if quotes and (oc == quote or ch == '\\'):
+ result.append(STR('\\'))
+ result.append(CHR(oc))
pos += 1
continue
- # Fall through: isolated surrogates are copied as-is
- pos -= 1
- # Map special whitespace to '\t', \n', '\r'
- if ch == '\t':
- result.append('\\t')
- elif ch == '\n':
- result.append('\\n')
- elif ch == '\r':
- result.append('\\r')
- elif ch == '\\':
- result.append('\\\\')
+ # The following logic is enabled only if MAXUNICODE == 0xffff, or
+ # for testing on top of a host Python where sys.maxunicode == 0xffff
+ if ((MAXUNICODE < 65536 or
+ (not we_are_translated() and sys.maxunicode < 65536))
+ and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
+ # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
+ pos += 1
+ oc2 = ord(s[pos])
- # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
- elif oc < 32 or oc >= 0x7F:
- raw_unicode_escape_helper(result, oc)
+ if 0xDC00 <= oc2 <= 0xDFFF:
+ ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
+ char_escape_helper(result, ucs)
+ pos += 1
+ continue
+ # Fall through: isolated surrogates are copied as-is
+ pos -= 1
- # Copy everything else as-is
+ # Map special whitespace to '\t', \n', '\r'
+ if ch == '\t':
+ result.append(STR('\\t'))
+ elif ch == '\n':
+ result.append(STR('\\n'))
+ elif ch == '\r':
+ result.append(STR('\\r'))
+ elif ch == '\\':
+ result.append(STR('\\\\'))
+
+ # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
+ elif pass_printable and not unicodedb.isprintable(oc):
+ char_escape_helper(result, oc)
+ elif not pass_printable and (oc < 32 or oc >= 0x7F):
+ char_escape_helper(result, oc)
+
+ # Copy everything else as-is
+ else:
+ result.append(CHR(oc))
+ pos += 1
+
+ if quotes:
+ result.append(CHR(quote))
+ return result.build()
+
+ def char_escape_helper(result, char):
+ num = hex(char)
+ if STR is unicode:
+ num = num.decode('ascii')
+ if char >= 0x10000:
+ result.append(STR("\\U"))
+ zeros = 8
+ elif char >= 0x100:
+ result.append(STR("\\u"))
+ zeros = 4
else:
- result.append(chr(oc))
- pos += 1
+ result.append(STR("\\x"))
+ zeros = 2
+ lnum = len(num)
+ nb = zeros + 2 - lnum # num starts with '0x'
+ if nb > 0:
+ result.append_multiple_char(STR('0'), nb)
+ result.append_slice(num, 2, lnum)
- if quotes:
- result.append(chr(quote))
- return result.build()
+ return unicode_escape, char_escape_helper
+
+# This function is also used by _codecs/interp_codecs.py
+(unicode_encode_unicode_escape, raw_unicode_escape_helper
+ ) = make_unicode_escape_function()
# ____________________________________________________________
# Raw unicode escape
@@ -1326,23 +1393,6 @@
return result.build(), pos
-def raw_unicode_escape_helper(result, char):
- num = hex(char)
- if char >= 0x10000:
- result.append("\\U")
- zeros = 8
- elif char >= 0x100:
- result.append("\\u")
- zeros = 4
- else:
- result.append("\\x")
- zeros = 2
- lnum = len(num)
- nb = zeros + 2 - lnum # num starts with '0x'
- if nb > 0:
- result.append_multiple_char('0', nb)
- result.append_slice(num, 2, lnum)
-
def unicode_encode_raw_unicode_escape(s, size, errors, errorhandler=None):
# errorhandler is not used: this function cannot cause Unicode errors
if size == 0:
diff --git a/pypy/rlib/test/test_rpoll.py b/pypy/rlib/test/test_rpoll.py
--- a/pypy/rlib/test/test_rpoll.py
+++ b/pypy/rlib/test/test_rpoll.py
@@ -25,7 +25,8 @@
assert events[0][0] == serv.fd
assert events[0][1] & POLLIN
- servconn, cliaddr = serv.accept()
+ servconn_fd, cliaddr = serv.accept()
+ servconn = RSocket(AF_INET, fd=servconn_fd)
events = poll({serv.fd: POLLIN,
cli.fd: POLLOUT}, timeout=500)
diff --git a/pypy/rlib/test/test_rsocket.py b/pypy/rlib/test/test_rsocket.py
--- a/pypy/rlib/test/test_rsocket.py
+++ b/pypy/rlib/test/test_rsocket.py
@@ -167,7 +167,8 @@
lock.acquire()
thread.start_new_thread(connecting, ())
print 'waiting for connection'
- s1, addr2 = sock.accept()
+ fd1, addr2 = sock.accept()
+ s1 = RSocket(fd=fd1)
print 'connection accepted'
lock.acquire()
print 'connecting side knows that the connection was accepted too'
@@ -255,7 +256,8 @@
if errcodesok:
assert err.value.errno in (errno.EINPROGRESS, errno.EWOULDBLOCK)
- s1, addr2 = sock.accept()
+ fd1, addr2 = sock.accept()
+ s1 = RSocket(fd=fd1)
s1.setblocking(False)
assert addr.eq(s2.getpeername())
assert addr2.get_port() == s2.getsockname().get_port()
@@ -414,7 +416,8 @@
clientsock = RSocket(AF_UNIX)
clientsock.connect(a)
- s, addr = serversock.accept()
+ fd, addr = serversock.accept()
+ s = RSocket(AF_UNIX, fd=fd)
s.send('X')
data = clientsock.recv(100)
diff --git a/pypy/rlib/test/test_runicode.py b/pypy/rlib/test/test_runicode.py
--- a/pypy/rlib/test/test_runicode.py
+++ b/pypy/rlib/test/test_runicode.py
@@ -118,6 +118,9 @@
for i in range(10000):
for encoding in ("utf-7 utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
+ if encoding == 'utf-8' and 0xd800 <= i <= 0xdfff:
+ # Don't try to encode lone surrogates
+ continue
self.checkdecode(unichr(i), encoding)
def test_random(self):
@@ -242,9 +245,8 @@
self.checkdecode(s, "utf-8")
def test_utf8_surrogate(self):
- # A surrogate should not be valid utf-8, but python 2.x accepts them.
- # This test will raise an error with python 3.x
- self.checkdecode(u"\ud800", "utf-8")
+ # surrogates used to be allowed by python 2.x
+ raises(UnicodeDecodeError, self.checkdecode, u"\ud800", "utf-8")
def test_invalid_start_byte(self):
"""
@@ -691,12 +693,16 @@
self.checkencode(s, "utf-8")
def test_utf8_surrogates(self):
- # check replacing of two surrogates by single char while encoding
# make sure that the string itself is not marshalled
u = u"\ud800"
for i in range(4):
u += u"\udc00"
- self.checkencode(u, "utf-8")
+ if runicode.MAXUNICODE < 65536:
+ # Check replacing of two surrogates by single char while encoding
+ self.checkencode(u, "utf-8")
+ else:
+ # This is not done in wide unicode builds
+ raises(UnicodeEncodeError, self.checkencode, u, "utf-8")
def test_ascii_error(self):
self.checkencodeerror(u"abc\xFF\xFF\xFFcde", "ascii", 3, 6)
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -28,8 +28,10 @@
from pypy.rpython.annlowlevel import hlstr
value = hlstr(llvalue)
assert value is not None
- univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict',
- False, self.ll_raise_unicode_exception_decode)
+ univalue, _ = self.rstr_decode_utf_8(
+ value, len(value), 'strict', final=False,
+ errorhandler=self.ll_raise_unicode_exception_decode,
+ allow_surrogates=False)
return self.ll.llunicode(univalue)
def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
@@ -50,9 +52,9 @@
self.runicode_encode_utf_8 = None
def ensure_ll_encode_utf8(self):
- from pypy.rlib.runicode import unicode_encode_utf_8
- self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
- 'runicode_encode_utf_8')
+ from pypy.rlib.runicode import unicode_encode_utf_8_impl
+ self.runicode_encode_utf_8 = func_with_new_name(
+ unicode_encode_utf_8_impl, 'runicode_encode_utf_8')
def rtype_method_upper(self, hop):
raise TypeError("Cannot do toupper on unicode string")
@@ -65,9 +67,16 @@
from pypy.rpython.annlowlevel import hlunicode
s = hlunicode(ll_s)
assert s is not None
- bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+ bytes = self.runicode_encode_utf_8(
+ s, len(s), 'strict',
+ errorhandler=self.ll_raise_unicode_exception_decode,
+ allow_surrogates=False)
return self.ll.llstr(bytes)
+ def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+ startingpos, endingpos):
+ raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+
class __extend__(annmodel.SomeString):
def rtyper_makerepr(self, rtyper):
return rtyper.type_system.rstr.string_repr
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -22,16 +22,15 @@
def get_recent_cpython_executable():
if sys.platform == 'win32':
- python = sys.executable.replace('\\', '/') + ' '
+ python = sys.executable.replace('\\', '/')
else:
- python = sys.executable + ' '
-
+ python = sys.executable
# Is there a command 'python' that runs python 2.5-2.7?
# If there is, then we can use it instead of sys.executable
returncode, stdout, stderr = runsubprocess.run_subprocess(
"python", "-V")
if _CPYTHON_RE.match(stdout) or _CPYTHON_RE.match(stderr):
- python = 'python '
+ python = 'python'
return python
@@ -559,6 +558,7 @@
for rule in rules:
mk.rule(*rule)
+ #XXX: this conditional part is not tested at all
if self.config.translation.gcrootfinder == 'asmgcc':
trackgcfiles = [cfile[:cfile.rfind('.')] for cfile in mk.cfiles]
if self.translator.platform.name == 'msvc':
@@ -581,7 +581,7 @@
else:
mk.definition('PYPY_MAIN_FUNCTION', "main")
- python = get_recent_cpython_executable()
+ mk.definition('PYTHON', get_recent_cpython_executable())
if self.translator.platform.name == 'msvc':
lblofiles = []
@@ -603,22 +603,22 @@
'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
mk.rule('.c.gcmap', '',
['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
- 'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
+ 'cmd /c $(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
)
mk.rule('gcmaptable.c', '$(GCMAPFILES)',
- 'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
+ 'cmd /c $(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
else:
mk.definition('OBJECTS', '$(ASMLBLFILES) gcmaptable.s')
mk.rule('%.s', '%.c', '$(CC) $(CFLAGS) $(CFLAGSEXTRA) -frandom-seed=$< -o $@ -S $< $(INCLUDEDIRS)')
mk.rule('%.lbl.s %.gcmap', '%.s',
- [python +
- '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
+ [
+ '$(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py '
'-t $< > $*.gctmp',
'mv $*.gctmp $*.gcmap'])
mk.rule('gcmaptable.s', '$(GCMAPFILES)',
- [python +
- '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
+ [
+ '$(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py '
'$(GCMAPFILES) > $@.tmp',
'mv $@.tmp $@'])
mk.rule('.PRECIOUS', '%.s', "# don't remove .s files if Ctrl-C'ed")
More information about the pypy-commit
mailing list