[pypy-issue] Issue #2938: pypy3: time.strptime(): UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte (pypy/pypy)

Sun Jan 13 15:32:17 EST 2019

New issue 2938: pypy3: time.strptime(): UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte
https://bitbucket.org/pypy/pypy/issues/2938/pypy3-timestrptime-unicodedecodeerror-utf8

Jon Dufresne:

```
$ pypy3 --version
Python 3.5.3 (7cafdf4fca72, Aug 27 2018, 22:02:53)
[PyPy 6.0.0 with GCC 8.2.1 20180801 (Red Hat 8.2.1-2)]
```

When running the Pillow test suite with pypy3, I receive the error:

```
UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte
```

I do not receive this error with CPython or pypy2.

Steps to reproduce:

```
git clone git at github.com:python-pillow/Pillow.git
cd Pillow
git checkout b62ff510aa90663bbc76ed4d6309b0774875b973  # The latest revision as of today
tox -e pypy3
```

Result:

```
==================================================================================================== FAILURES =====================================================================================================
___________________________________________________________________________________________ TestPdfParser.test_parsing ____________________________________________________________________________________________

self = <test_pdfparser.TestPdfParser testMethod=test_parsing>

    def test_parsing(self):
        self.assertEqual(PdfParser.interpret_name(b"Name#23Hash"),
                         b"Name#Hash")
        self.assertEqual(PdfParser.interpret_name(
                             b"Name#23Hash", as_text=True
                         ), "Name#Hash")
        self.assertEqual(PdfParser.get_value(b"1 2 R ", 0),
                         (IndirectReference(1, 2), 5))
        self.assertEqual(PdfParser.get_value(b"true[", 0), (True, 4))
        self.assertEqual(PdfParser.get_value(b"false%", 0), (False, 5))
        self.assertEqual(PdfParser.get_value(b"null<", 0), (None, 4))
        self.assertEqual(PdfParser.get_value(b"%cmt\n %cmt\n 123\n", 0),
                         (123, 15))
        self.assertEqual(PdfParser.get_value(b"<901FA3>", 0),
                         (b"\x90\x1F\xA3", 8))
        self.assertEqual(PdfParser.get_value(b"asd < 9 0 1 f A > qwe", 3),
                         (b"\x90\x1F\xA0", 17))
        self.assertEqual(PdfParser.get_value(b"(asd)", 0), (b"asd", 5))
        self.assertEqual(PdfParser.get_value(b"(asd(qwe)zxc)zzz(aaa)", 0),
                         (b"asd(qwe)zxc", 13))
        self.assertEqual(PdfParser.get_value(b"(Two \\\nwords.)", 0),
                         (b"Two words.", 14))
        self.assertEqual(PdfParser.get_value(b"(Two\nlines.)", 0),
                         (b"Two\nlines.", 12))
        self.assertEqual(PdfParser.get_value(b"(Two\r\nlines.)", 0),
                         (b"Two\nlines.", 13))
        self.assertEqual(PdfParser.get_value(b"(Two\\nlines.)", 0),
                         (b"Two\nlines.", 13))
        self.assertEqual(PdfParser.get_value(b"(One\\(paren).", 0),
                         (b"One(paren", 12))
        self.assertEqual(PdfParser.get_value(b"(One\\)paren).", 0),
                         (b"One)paren", 12))
        self.assertEqual(PdfParser.get_value(b"(\\0053)", 0), (b"\x053", 7))
        self.assertEqual(PdfParser.get_value(b"(\\053)", 0), (b"\x2B", 6))
        self.assertEqual(PdfParser.get_value(b"(\\53)", 0), (b"\x2B", 5))
        self.assertEqual(PdfParser.get_value(b"(\\53a)", 0), (b"\x2Ba", 6))
        self.assertEqual(PdfParser.get_value(b"(\\1111)", 0), (b"\x491", 7))
        self.assertEqual(PdfParser.get_value(b" 123 (", 0), (123, 4))
        self.assertAlmostEqual(PdfParser.get_value(b" 123.4 %", 0)[0], 123.4)
        self.assertEqual(PdfParser.get_value(b" 123.4 %", 0)[1], 6)
        self.assertRaises(PdfFormatError, PdfParser.get_value, b"]", 0)
        d = PdfParser.get_value(b"<</Name (value) /N /V>>", 0)[0]
        self.assertIsInstance(d, PdfDict)
        self.assertEqual(len(d), 2)
        self.assertEqual(d.Name, "value")
        self.assertEqual(d[b"Name"], b"value")
        self.assertEqual(d.N, PdfName("V"))
        a = PdfParser.get_value(b"[/Name (value) /N /V]", 0)[0]
        self.assertIsInstance(a, list)
        self.assertEqual(len(a), 4)
        self.assertEqual(a[0], PdfName("Name"))
        s = PdfParser.get_value(
            b"<</Name (value) /Length 5>>\nstream\nabcde\nendstream<<...", 0
        )[0]
        self.assertIsInstance(s, PdfStream)
        self.assertEqual(s.dictionary.Name, "value")
        self.assertEqual(s.decode(), b"abcde")
        for name in ["CreationDate", "ModDate"]:
            for date, value in {
                b"20180729214124": "20180729214124",
                b"D:20180729214124": "20180729214124",
                b"D:2018072921": "20180729210000",
                b"D:20180729214124Z": "20180729214124",
                b"D:20180729214124+08'00'": "20180729134124",
                b"D:20180729214124-05'00'": "20180730024124"
            }.items():
                d = PdfParser.get_value(
                    b"<</"+name.encode()+b" ("+date+b")>>", 0)[0]
                self.assertEqual(
>                   time.strftime("%Y%m%d%H%M%S", getattr(d, name)), value)

Tests/test_pdfparser.py:96: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/pypy3/site-packages/PIL/PdfParser.py:298: in __getattr__
    value = time.strptime(value[:len(format)+2], format)
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:504: in _strptime_time
    tt = _strptime(data_string, format)[0]
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:318: in _strptime
    _TimeRE_cache = TimeRE()
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:194: in __init__
    self.locale_time = LocaleTime()
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:73: in __init__
    self.__calc_weekday()
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:95: in __calc_weekday
    a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
/usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:95: in <listcomp>
    a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
/usr/lib64/pypy3-6.0/lib-python/3/calendar.py:80: in __getitem__
    return funcs(self.format)
/usr/lib64/pypy3-6.0/lib-python/3/datetime.py:754: in strftime
    return _wrap_strftime(self, format, self.timetuple())
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

object = datetime.date(2001, 1, 3), format = '%a', timetuple = time.struct_time(tm_year=2001, tm_mon=1, tm_mday=3, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=3, tm_isdst=-1)

    def _wrap_strftime(object, format, timetuple):
        # Don't call utcoffset() or tzname() unless actually needed.
        freplace = None  # the string to use for %f
        zreplace = None  # the string to use for %z
        Zreplace = None  # the string to use for %Z

        # Scan format for %z and %Z escapes, replacing as needed.
        newformat = []
        push = newformat.append
        i, n = 0, len(format)
        while i < n:
            ch = format[i]
            i += 1
            if ch == '%':
                if i < n:
                    ch = format[i]
                    i += 1
                    if ch == 'f':
                        if freplace is None:
                            freplace = '%06d' % getattr(object,
                                                        'microsecond', 0)
                        newformat.append(freplace)
                    elif ch == 'z':
                        if zreplace is None:
                            zreplace = ""
                            if hasattr(object, "utcoffset"):
                                offset = object.utcoffset()
                                if offset is not None:
                                    sign = '+'
                                    if offset.days < 0:
                                        offset = -offset
                                        sign = '-'
                                    h, m = divmod(offset, timedelta(hours=1))
                                    assert not m % timedelta(minutes=1), "whole minute"
                                    m //= timedelta(minutes=1)
                                    zreplace = '%c%02d%02d' % (sign, h, m)
                        assert '%' not in zreplace
                        newformat.append(zreplace)
                    elif ch == 'Z':
                        if Zreplace is None:
                            Zreplace = ""
                            if hasattr(object, "tzname"):
                                s = object.tzname()
                                if s is not None:
                                    # strftime is going to have at this: escape %
                                    Zreplace = s.replace('%', '%%')
                        newformat.append(Zreplace)
                    else:
                        push('%')
                        push(ch)
                else:
                    push('%')
            else:
                push(ch)
        newformat = "".join(newformat)
>       return _time.strftime(newformat, timetuple)
E       UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte

/usr/lib64/pypy3-6.0/lib-python/3/datetime.py:221: UnicodeDecodeError
```