[pypy-issue] Issue #2938: pypy3: time.strptime(): UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte (pypy/pypy)
Jon Dufresne
issues-reply at bitbucket.org
Sun Jan 13 15:32:17 EST 2019
New issue 2938: pypy3: time.strptime(): UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte
https://bitbucket.org/pypy/pypy/issues/2938/pypy3-timestrptime-unicodedecodeerror-utf8
Jon Dufresne:
```
$ pypy3 --version
Python 3.5.3 (7cafdf4fca72, Aug 27 2018, 22:02:53)
[PyPy 6.0.0 with GCC 8.2.1 20180801 (Red Hat 8.2.1-2)]
```
When running the Pillow test suite with pypy3, I receive the error:
```
UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte
```
I do not receive this error with CPython or pypy2.
Steps to reproduce:
```
git clone git at github.com:python-pillow/Pillow.git
cd Pillow
git checkout b62ff510aa90663bbc76ed4d6309b0774875b973 # The latest revision as of today
tox -e pypy3
```
Result:
```
==================================================================================================== FAILURES =====================================================================================================
___________________________________________________________________________________________ TestPdfParser.test_parsing ____________________________________________________________________________________________
self = <test_pdfparser.TestPdfParser testMethod=test_parsing>
def test_parsing(self):
self.assertEqual(PdfParser.interpret_name(b"Name#23Hash"),
b"Name#Hash")
self.assertEqual(PdfParser.interpret_name(
b"Name#23Hash", as_text=True
), "Name#Hash")
self.assertEqual(PdfParser.get_value(b"1 2 R ", 0),
(IndirectReference(1, 2), 5))
self.assertEqual(PdfParser.get_value(b"true[", 0), (True, 4))
self.assertEqual(PdfParser.get_value(b"false%", 0), (False, 5))
self.assertEqual(PdfParser.get_value(b"null<", 0), (None, 4))
self.assertEqual(PdfParser.get_value(b"%cmt\n %cmt\n 123\n", 0),
(123, 15))
self.assertEqual(PdfParser.get_value(b"<901FA3>", 0),
(b"\x90\x1F\xA3", 8))
self.assertEqual(PdfParser.get_value(b"asd < 9 0 1 f A > qwe", 3),
(b"\x90\x1F\xA0", 17))
self.assertEqual(PdfParser.get_value(b"(asd)", 0), (b"asd", 5))
self.assertEqual(PdfParser.get_value(b"(asd(qwe)zxc)zzz(aaa)", 0),
(b"asd(qwe)zxc", 13))
self.assertEqual(PdfParser.get_value(b"(Two \\\nwords.)", 0),
(b"Two words.", 14))
self.assertEqual(PdfParser.get_value(b"(Two\nlines.)", 0),
(b"Two\nlines.", 12))
self.assertEqual(PdfParser.get_value(b"(Two\r\nlines.)", 0),
(b"Two\nlines.", 13))
self.assertEqual(PdfParser.get_value(b"(Two\\nlines.)", 0),
(b"Two\nlines.", 13))
self.assertEqual(PdfParser.get_value(b"(One\\(paren).", 0),
(b"One(paren", 12))
self.assertEqual(PdfParser.get_value(b"(One\\)paren).", 0),
(b"One)paren", 12))
self.assertEqual(PdfParser.get_value(b"(\\0053)", 0), (b"\x053", 7))
self.assertEqual(PdfParser.get_value(b"(\\053)", 0), (b"\x2B", 6))
self.assertEqual(PdfParser.get_value(b"(\\53)", 0), (b"\x2B", 5))
self.assertEqual(PdfParser.get_value(b"(\\53a)", 0), (b"\x2Ba", 6))
self.assertEqual(PdfParser.get_value(b"(\\1111)", 0), (b"\x491", 7))
self.assertEqual(PdfParser.get_value(b" 123 (", 0), (123, 4))
self.assertAlmostEqual(PdfParser.get_value(b" 123.4 %", 0)[0], 123.4)
self.assertEqual(PdfParser.get_value(b" 123.4 %", 0)[1], 6)
self.assertRaises(PdfFormatError, PdfParser.get_value, b"]", 0)
d = PdfParser.get_value(b"<</Name (value) /N /V>>", 0)[0]
self.assertIsInstance(d, PdfDict)
self.assertEqual(len(d), 2)
self.assertEqual(d.Name, "value")
self.assertEqual(d[b"Name"], b"value")
self.assertEqual(d.N, PdfName("V"))
a = PdfParser.get_value(b"[/Name (value) /N /V]", 0)[0]
self.assertIsInstance(a, list)
self.assertEqual(len(a), 4)
self.assertEqual(a[0], PdfName("Name"))
s = PdfParser.get_value(
b"<</Name (value) /Length 5>>\nstream\nabcde\nendstream<<...", 0
)[0]
self.assertIsInstance(s, PdfStream)
self.assertEqual(s.dictionary.Name, "value")
self.assertEqual(s.decode(), b"abcde")
for name in ["CreationDate", "ModDate"]:
for date, value in {
b"20180729214124": "20180729214124",
b"D:20180729214124": "20180729214124",
b"D:2018072921": "20180729210000",
b"D:20180729214124Z": "20180729214124",
b"D:20180729214124+08'00'": "20180729134124",
b"D:20180729214124-05'00'": "20180730024124"
}.items():
d = PdfParser.get_value(
b"<</"+name.encode()+b" ("+date+b")>>", 0)[0]
self.assertEqual(
> time.strftime("%Y%m%d%H%M%S", getattr(d, name)), value)
Tests/test_pdfparser.py:96:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/pypy3/site-packages/PIL/PdfParser.py:298: in __getattr__
value = time.strptime(value[:len(format)+2], format)
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:504: in _strptime_time
tt = _strptime(data_string, format)[0]
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:318: in _strptime
_TimeRE_cache = TimeRE()
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:194: in __init__
self.locale_time = LocaleTime()
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:73: in __init__
self.__calc_weekday()
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:95: in __calc_weekday
a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:95: in <listcomp>
a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
/usr/lib64/pypy3-6.0/lib-python/3/calendar.py:80: in __getitem__
return funcs(self.format)
/usr/lib64/pypy3-6.0/lib-python/3/datetime.py:754: in strftime
return _wrap_strftime(self, format, self.timetuple())
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
object = datetime.date(2001, 1, 3), format = '%a', timetuple = time.struct_time(tm_year=2001, tm_mon=1, tm_mday=3, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=3, tm_isdst=-1)
def _wrap_strftime(object, format, timetuple):
# Don't call utcoffset() or tzname() unless actually needed.
freplace = None # the string to use for %f
zreplace = None # the string to use for %z
Zreplace = None # the string to use for %Z
# Scan format for %z and %Z escapes, replacing as needed.
newformat = []
push = newformat.append
i, n = 0, len(format)
while i < n:
ch = format[i]
i += 1
if ch == '%':
if i < n:
ch = format[i]
i += 1
if ch == 'f':
if freplace is None:
freplace = '%06d' % getattr(object,
'microsecond', 0)
newformat.append(freplace)
elif ch == 'z':
if zreplace is None:
zreplace = ""
if hasattr(object, "utcoffset"):
offset = object.utcoffset()
if offset is not None:
sign = '+'
if offset.days < 0:
offset = -offset
sign = '-'
h, m = divmod(offset, timedelta(hours=1))
assert not m % timedelta(minutes=1), "whole minute"
m //= timedelta(minutes=1)
zreplace = '%c%02d%02d' % (sign, h, m)
assert '%' not in zreplace
newformat.append(zreplace)
elif ch == 'Z':
if Zreplace is None:
Zreplace = ""
if hasattr(object, "tzname"):
s = object.tzname()
if s is not None:
# strftime is going to have at this: escape %
Zreplace = s.replace('%', '%%')
newformat.append(Zreplace)
else:
push('%')
push(ch)
else:
push('%')
else:
push(ch)
newformat = "".join(newformat)
> return _time.strftime(newformat, timetuple)
E UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte
/usr/lib64/pypy3-6.0/lib-python/3/datetime.py:221: UnicodeDecodeError
```
More information about the pypy-issue
mailing list