[Python-checkins] python/dist/src/Lib sre_compile.py,1.46,1.47 sre_constants.py,1.31,1.32
loewis@users.sourceforge.net
loewis@users.sourceforge.net
Sat, 19 Apr 2003 05:56:10 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv11938/Lib
Modified Files:
sre_compile.py sre_constants.py
Log Message:
Fully support 32-bit codes. Enable BIGCHARSET in UCS-4 builds.
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.46
retrieving revision 1.47
diff -C2 -d -r1.46 -r1.47
*** sre_compile.py 14 Apr 2003 17:59:31 -0000 1.46
--- sre_compile.py 19 Apr 2003 12:56:07 -0000 1.47
***************
*** 17,21 ****
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
! MAXCODE = 65535
def _compile(code, pattern, flags):
--- 17,24 ----
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
! if _sre.CODESIZE == 2:
! MAXCODE = 65535
! else:
! MAXCODE = 0xFFFFFFFFL
def _compile(code, pattern, flags):
***************
*** 192,198 ****
return charset # cannot compress
except IndexError:
- if sys.maxunicode != 65535:
- # XXX: big charsets don't work in UCS-4 builds
- return charset
# character set contains unicode characters
return _optimize_unicode(charset, fixup)
--- 195,198 ----
***************
*** 229,233 ****
def _mk_bitmap(bits):
data = []
! m = 1; v = 0
for c in bits:
if c:
--- 229,237 ----
def _mk_bitmap(bits):
data = []
! if _sre.CODESIZE == 2:
! start = (1, 0)
! else:
! start = (1L, 0L)
! m, v = start
for c in bits:
if c:
***************
*** 236,240 ****
if m > MAXCODE:
data.append(v)
! m = 1; v = 0
return data
--- 240,244 ----
if m > MAXCODE:
data.append(v)
! m, v = start
return data
***************
*** 259,277 ****
# CHARSET matching).
def _optimize_unicode(charset, fixup):
charmap = [0]*65536
negate = 0
! for op, av in charset:
! if op is NEGATE:
! negate = 1
! elif op is LITERAL:
! charmap[fixup(av)] = 1
! elif op is RANGE:
! for i in range(fixup(av[0]), fixup(av[1])+1):
! charmap[i] = 1
! elif op is CATEGORY:
! # XXX: could expand category
! return charset # cannot compress
if negate:
for i in range(65536):
charmap[i] = not charmap[i]
--- 263,298 ----
# CHARSET matching).
+ # In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
+ # of the basic multilingual plane; an efficient representation
+ # for all of UTF-16 has not yet been developed. This means,
+ # in particular, that negated charsets cannot be represented as
+ # bigcharsets.
+
def _optimize_unicode(charset, fixup):
+ try:
+ import array
+ except ImportError:
+ return charset
charmap = [0]*65536
negate = 0
! try:
! for op, av in charset:
! if op is NEGATE:
! negate = 1
! elif op is LITERAL:
! charmap[fixup(av)] = 1
! elif op is RANGE:
! for i in range(fixup(av[0]), fixup(av[1])+1):
! charmap[i] = 1
! elif op is CATEGORY:
! # XXX: could expand category
! return charset # cannot compress
! except IndexError:
! # non-BMP characters
! return charset
if negate:
+ if sys.maxunicode != 65535:
+ # XXX: negation does not work with big charsets
+ return charset
for i in range(65536):
charmap[i] = not charmap[i]
***************
*** 288,297 ****
data = data + _mk_bitmap(chunk)
header = [block]
! assert MAXCODE == 65535
! for i in range(128):
! if sys.byteorder == 'big':
! header.append(256*mapping[2*i]+mapping[2*i+1])
! else:
! header.append(mapping[2*i]+256*mapping[2*i+1])
data[0:0] = header
return [(BIGCHARSET, data)]
--- 309,320 ----
data = data + _mk_bitmap(chunk)
header = [block]
! if MAXCODE == 65535:
! code = 'H'
! else:
! code = 'L'
! # Convert block indices to byte array of 256 bytes
! mapping = array.array('b', mapping).tostring()
! # Convert byte array to word array
! header = header + array.array(code, mapping).tolist()
data[0:0] = header
return [(BIGCHARSET, data)]
Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** sre_constants.py 14 Apr 2003 17:59:32 -0000 1.31
--- sre_constants.py 19 Apr 2003 12:56:07 -0000 1.32
***************
*** 14,18 ****
# update when constants are added or removed
! MAGIC = 20010701
# max code word in this release
--- 14,18 ----
# update when constants are added or removed
! MAGIC = 20030419
# max code word in this release