[pypy-commit] pypy unicode-utf8: (fijal, argio) whack until we run into a serious problem
fijal
pypy.commits at gmail.com
Mon Feb 27 06:12:22 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90376:85fee86ba1f7
Date: 2017-02-27 12:11 +0100
http://bitbucket.org/pypy/pypy/changeset/85fee86ba1f7/
Log: (fijal, argio) whack until we run into a serious problem
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -10,7 +10,7 @@
interpleveldefs = {
"StringBuilder": "interp_builders.W_StringBuilder",
- "UnicodeBuilder": "interp_builders.W_UnicodeBuilder",
+ #"UnicodeBuilder": "interp_builders.W_UnicodeBuilder",
}
class TimeModule(MixedModule):
diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py
--- a/pypy/module/__pypy__/interp_builders.py
+++ b/pypy/module/__pypy__/interp_builders.py
@@ -64,4 +64,4 @@
return W_Builder
W_StringBuilder = create_builder("StringBuilder", str, StringBuilder, "newbytes")
-W_UnicodeBuilder = create_builder("UnicodeBuilder", unicode, UnicodeBuilder, "newunicode")
+#W_UnicodeBuilder = create_builder("UnicodeBuilder", unicode, UnicodeBuilder, "newunicode")
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -486,6 +486,7 @@
@unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int,
w_final=WrappedDefault(False))
def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=None):
+ assert False, "fix in the future"
if errors is None:
errors = 'strict'
final = space.is_true(w_final)
@@ -507,6 +508,7 @@
@unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int,
w_final=WrappedDefault(False))
def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=None):
+ assert False, "fix in the future"
final = space.is_true(w_final)
state = space.fromcache(CodecState)
if byteorder == 0:
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -47,8 +47,8 @@
return NonConstant("foobar")
identifier_w = bytes_w = str_w
- def unicode_w(self, space):
- return NonConstant(u"foobar")
+ def utf8_w(self, space):
+ return NonConstant("foobar")
def int_w(self, space, allow_conversion=True):
return NonConstant(-42)
@@ -208,7 +208,7 @@
def newbytes(self, x):
return w_some_obj()
- def newunicode(self, x):
+ def newutf8(self, x, l):
return w_some_obj()
newtext = newbytes
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -17,7 +17,7 @@
from pypy.objspace.std.stringmethods import StringMethods
from pypy.objspace.std.unicodeobject import (
decode_object, unicode_from_encoded_object,
- getdefaultencoding)
+ getdefaultencoding, unicode_from_string)
from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
@@ -53,17 +53,7 @@
return space.newint(uid)
def convert_to_w_unicode(self, space):
- # Use the default encoding.
- encoding = getdefaultencoding(space)
- if encoding == 'ascii':
- try:
- rutf8.check_ascii(self._value)
- return space.newutf8(self._value, len(self._value))
- except rutf8.AsciiCheckError:
- xxx
- else:
- xxx
- return space.unicode_w(decode_object(space, self, encoding, None))
+ return unicode_from_string(space, self)
def descr_add(self, space, w_other):
"""x.__add__(y) <==> x+y"""
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -1032,7 +1032,7 @@
unilist = space.listview_unicode(w_iterable)
if unilist is not None:
- xxx
+ assert False, "disabled"
w_list.strategy = strategy = space.fromcache(UnicodeListStrategy)
# need to copy because intlist can share with w_iterable
w_list.lstorage = strategy.erase(unilist[:])
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -203,6 +203,7 @@
return unichr(unicodedb.totitle(ord(ch)))
def _newlist_unwrapped(self, space, lst):
+ assert False, "should not be called"
return space.newlist_unicode(lst)
@staticmethod
diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py
--- a/rpython/annotator/listdef.py
+++ b/rpython/annotator/listdef.py
@@ -107,6 +107,9 @@
self.bookkeeper.annotator.reflowfromposition(position_key)
def generalize(self, s_other_value):
+ if hasattr(self.s_value, 'can_be_None') and not self.s_value.can_be_None and getattr(s_other_value, 'can_be_None', False):
+ import pdb
+ pdb.set_trace()
s_new_value = unionof(self.s_value, s_other_value)
updated = s_new_value != self.s_value
if updated:
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -671,7 +671,7 @@
return getbookkeeper().newlist(s_item)
def method_rsplit(self, patt, max=-1):
- s_item = self.basestringclass(no_nul=self.no_nul)
+ s_item = self.basestringclass(no_nul=self.no_nul, can_be_None=False)
return getbookkeeper().newlist(s_item)
def method_replace(self, s1, s2):
@@ -696,7 +696,7 @@
if not s_enc.is_constant():
raise AnnotatorError("Non-constant encoding not supported")
enc = s_enc.const
- if enc not in ('ascii', 'latin-1', 'utf-8'):
+ if enc not in ('ascii', 'latin-1', 'utf-8', 'utf8'):
raise AnnotatorError("Encoding %s not supported for unicode" % (enc,))
return SomeString(no_nul=self.no_nul)
method_encode.can_only_throw = []
@@ -729,7 +729,7 @@
if not s_enc.is_constant():
raise AnnotatorError("Non-constant encoding not supported")
enc = s_enc.const
- if enc not in ('ascii', 'latin-1', 'utf-8'):
+ if enc not in ('ascii', 'latin-1', 'utf-8', 'utf8'):
raise AnnotatorError("Encoding %s not supported for strings" % (enc,))
return SomeUnicodeString(no_nul=self.no_nul)
method_decode.can_only_throw = [UnicodeDecodeError]
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -346,8 +346,6 @@
#
# See also unicode_encode_utf8sp().
#
- if errorhandler is None:
- errorhandler = default_unicode_error_encode
# NB. a bit messy because rtyper/rstr.py also calls the same
# function. Make sure we annotate for the args it passes, too
if NonConstant(False):
@@ -361,6 +359,9 @@
def unicode_encode_utf_8_impl(s, size, errors, errorhandler,
allow_surrogates=False):
+ # XXX hack
+ if errorhandler is None:
+ errorhandler = default_unicode_error_encode
assert(size >= 0)
result = StringBuilder(size)
pos = 0
diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py
--- a/rpython/rlib/streamio.py
+++ b/rpython/rlib/streamio.py
@@ -708,7 +708,9 @@
assert stop >= 0
chunks.append(self.buf[:stop])
break
- chunks.append(self.buf)
+ buf = self.buf
+ assert buf is not None
+ chunks.append(buf)
return ''.join(chunks)
def readline(self):
diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -335,7 +335,7 @@
return hop.gendirectcall(self.ll.ll_str2unicode, v_self)
elif encoding == 'latin-1':
return hop.gendirectcall(self.ll_decode_latin1, v_self)
- elif encoding == 'utf-8':
+ elif encoding == 'utf-8' or encoding == 'utf8':
return hop.gendirectcall(self.ll_decode_utf8, v_self)
else:
raise TyperError("encoding %s not implemented" % (encoding, ))
@@ -408,7 +408,7 @@
return hop.gendirectcall(self.ll_str, v_self)
elif encoding == "latin-1":
return hop.gendirectcall(self.ll_encode_latin1, v_self)
- elif encoding == 'utf-8':
+ elif encoding == 'utf-8' or encoding == 'utf8':
return hop.gendirectcall(self.ll_encode_utf8, v_self)
else:
raise TyperError("encoding %s not implemented" % (encoding, ))
More information about the pypy-commit
mailing list