[pypy-svn] r60951 - in pypy/trunk/pypy/lib: . app_test

fijal at codespeak.net fijal at codespeak.net
Wed Jan 14 13:49:21 CET 2009


Author: fijal
Date: Wed Jan 14 13:49:20 2009
New Revision: 60951

Modified:
   pypy/trunk/pypy/lib/app_test/test_binascii.py
   pypy/trunk/pypy/lib/binascii.py
Log:
this is taken directly from cpython source. I'm fed up with trying to dig
when the padding is correct and when it's not. It's actually simpler and
more correct (and faster) and more rpython.


Modified: pypy/trunk/pypy/lib/app_test/test_binascii.py
==============================================================================
--- pypy/trunk/pypy/lib/app_test/test_binascii.py	(original)
+++ pypy/trunk/pypy/lib/app_test/test_binascii.py	Wed Jan 14 13:49:20 2009
@@ -150,3 +150,7 @@
 def test_wrong_padding():
     s = 'CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ'
     raises(binascii.Error, binascii.a2b_base64, s)
+
+def test_crap_after_padding():
+    s = 'xxx=axxxx'
+    assert binascii.a2b_base64(s) == '\xc7\x1c'

Modified: pypy/trunk/pypy/lib/binascii.py
==============================================================================
--- pypy/trunk/pypy/lib/binascii.py	(original)
+++ pypy/trunk/pypy/lib/binascii.py	Wed Jan 14 13:49:20 2009
@@ -130,6 +130,7 @@
     '9': 61,
     '+': 62,
     '/': 63,
+    '=': 0,
 }
 
 
@@ -137,67 +138,46 @@
     s = s.rstrip()
     # clean out all invalid characters, this also strips the final '=' padding
     # check for correct padding
-    if not s or s.startswith('='):
-        count = 0
-    else:
-        count = 0
-        while s[len(s)-count-1] == '=':
-            count += 1
-    clean_s = []
-    for item in s:
-        if item in table_a2b_base64:
-            clean_s.append(item)
-    if len(clean_s) % 4 == 1:
-        if count < 1:
-            raise Error("incorrect padding")
-    if len(clean_s) % 4 == 2:
-        if count < 2:
-            raise Error("incorrect padding")
-    s = ''.join(clean_s)
-    if len(s) % 4:
-        s = s + ('=' * (4 - len(s) % 4))
-
-    # Add '=' padding back into the string
-     
-    def quadruplets_gen(s):
-        l = [s[i:i+4] for i in range(0, len(s), 4)]
-        for s in l:
-            yield (table_a2b_base64[s[0]],
-                   table_a2b_base64[s[1]],
-                   table_a2b_base64[s[2]],
-                   table_a2b_base64[s[3]])
-
-    result = [
-        chr(A << 2 | ((B >> 4) & 0x3)) + 
-        chr((B & 0xf) << 4 | ((C >> 2 ) & 0xf)) + 
-        chr((C & 0x3) << 6 | D )
-        for A, B, C, D in quadruplets_gen(s[:-4])]
 
-    if s:
-        try:
-            final = s[-4:]
-            if final[2] == '=':
-                A = table_a2b_base64[final[0]]
-                B = table_a2b_base64[final[1]]
-                snippet =  chr(A << 2 | ((B >> 4) & 0x3))
-            elif final[3] == '=':
-                A = table_a2b_base64[final[0]]
-                B = table_a2b_base64[final[1]]
-                C = table_a2b_base64[final[2]]
-                snippet =  chr(A << 2 | ((B >> 4) & 0x3)) + \
-                          chr((B & 0xf) << 4 | ((C >> 2 ) & 0xf))
+    def next_valid_char(s, pos):
+        for i in range(pos + 1, len(s)):
+            c = s[i]
+            if c < '\x7f':
+                try:
+                    table_a2b_base64[c]
+                    return c
+                except KeyError:
+                    pass
+        return None
+    
+    quad_pos = 0
+    leftbits = 0
+    leftchar = 0
+    res = []
+    for i, c in enumerate(s):
+        if c > '\x7f' or c == '\n' or c == '\r' or c == ' ':
+            continue
+        if c == '=':
+            if quad_pos < 2 or (quad_pos == 2 and next_valid_char(s, i) != '='):
+                continue
             else:
-                A = table_a2b_base64[final[0]]
-                B = table_a2b_base64[final[1]]
-                C = table_a2b_base64[final[2]]
-                D = table_a2b_base64[final[3]]
-                snippet =  chr(A << 2 | ((B >> 4) & 0x3)) + \
-                          chr((B & 0xf) << 4 | ((C >> 2 ) & 0xf)) + \
-                          chr((C & 0x3) << 6 | D )
-            result.append(snippet)
+                leftbits = 0
+                break
+        try:
+            next_c = table_a2b_base64[c]
         except KeyError:
-            raise Error('Incorrect padding')
-    return ''.join(result)
+            continue
+        quad_pos = (quad_pos + 1) & 0x03
+        leftchar = (leftchar << 6) | next_c
+        leftbits += 6
+        if leftbits >= 8:
+            leftbits -= 8
+            res.append((leftchar >> leftbits & 0xff))
+            leftchar &= ((1 << leftbits) - 1)
+    if leftbits != 0:
+        raise Error('Incorrect padding')
+    
+    return ''.join([chr(i) for i in res])
     
 table_b2a_base64 = \
 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"



More information about the Pypy-commit mailing list