[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.19,2.20 sre_constants.h,2.5,2.6
Fredrik Lundh
python-dev@python.org
Sun, 2 Jul 2000 05:00:09 -0700
Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv10442/Modules
Modified Files:
_sre.c sre_constants.h
Log Message:
-- use charset bitmaps where appropriate. this gives a 5-10%
speedup for some tests, including the python tokenizer.
-- added support for an optional charset anchor to the engine
(currently unused by the code generator).
-- removed workaround for array module bug.
Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.19
retrieving revision 2.20
diff -C2 -r2.19 -r2.20
*** _sre.c 2000/07/01 23:49:14 2.19
--- _sre.c 2000/07/02 12:00:07 2.20
***************
*** 379,382 ****
--- 379,389 ----
break;
+ case SRE_OP_CHARSET:
+ /* args: <bitmap> (16 bits per code word) */
+ if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
+ return ok;
+ set += 16;
+ break;
+
case SRE_OP_CATEGORY:
/* args: <category> */
***************
*** 953,979 ****
SRE_CHAR* end = state->end;
int status = 0;
! int prefix_len = 0;
! SRE_CODE* prefix;
! SRE_CODE* overlap;
! int literal = 0;
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
! /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
if (pattern[3] > 0) {
/* adjust end point (but make sure we leave at least one
! character in there) */
end -= pattern[3]-1;
if (end <= ptr)
end = ptr+1;
}
-
- literal = pattern[2];
-
- prefix = pattern + 6;
- prefix_len = pattern[5];
! overlap = prefix + prefix_len - 1;
pattern += 1 + pattern[1];
--- 960,989 ----
SRE_CHAR* end = state->end;
int status = 0;
! int prefix_len;
! SRE_CODE* prefix = NULL;
! SRE_CODE* charset = NULL;
! SRE_CODE* overlap = NULL;
! int flags = 0;
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
! /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
!
! flags = pattern[2];
if (pattern[3] > 0) {
/* adjust end point (but make sure we leave at least one
! character in there, so literal search will work) */
end -= pattern[3]-1;
if (end <= ptr)
end = ptr+1;
}
! if (flags & SRE_INFO_PREFIX) {
! prefix_len = pattern[5];
! prefix = pattern + 6;
! overlap = prefix + prefix_len - 1;
! } else if (flags & SRE_INFO_CHARSET)
! charset = pattern + 5;
pattern += 1 + pattern[1];
***************
*** 981,985 ****
#if defined(USE_FAST_SEARCH)
! if (prefix_len > 1) {
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
--- 991,995 ----
#if defined(USE_FAST_SEARCH)
! if (prefix && overlap && prefix_len > 1) {
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
***************
*** 999,1004 ****
state->start = ptr - prefix_len + 1;
state->ptr = ptr + 1;
! if (literal)
! return 1; /* all of it */
status = SRE_MATCH(state, pattern + 2*prefix_len);
if (status != 0)
--- 1009,1014 ----
state->start = ptr - prefix_len + 1;
state->ptr = ptr + 1;
! if (flags & SRE_INFO_LITERAL)
! return 1; /* we got all of it */
status = SRE_MATCH(state, pattern + 2*prefix_len);
if (status != 0)
***************
*** 1017,1023 ****
#endif
! if (pattern[0] == SRE_OP_LITERAL) {
! /* pattern starts with a literal character. this is used for
! short prefixes, and if fast search is disabled*/
SRE_CODE chr = pattern[1];
for (;;) {
--- 1027,1033 ----
#endif
! if (pattern[0] == SRE_OP_LITERAL) {
! /* pattern starts with a literal character. this is used
! for short prefixes, and if fast search is disabled */
SRE_CODE chr = pattern[1];
for (;;) {
***************
*** 1033,1036 ****
--- 1043,1062 ----
break;
}
+ #if 0
+ } else if (charset) {
+ /* pattern starts with a character from a known set */
+ for (;;) {
+ while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
+ ptr++;
+ if (ptr == end)
+ return 0;
+ TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
+ state->start = ptr;
+ state->ptr = ptr;
+ status = SRE_MATCH(state, pattern);
+ if (status != 0)
+ break;
+ }
+ #endif
} else
/* general case */
***************
*** 1045,1048 ****
--- 1071,1075 ----
return status;
}
+
#if !defined(SRE_RECURSIVE)
Index: sre_constants.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre_constants.h,v
retrieving revision 2.5
retrieving revision 2.6
diff -C2 -r2.5 -r2.6
*** sre_constants.h 2000/06/30 10:41:31 2.5
--- sre_constants.h 2000/07/02 12:00:07 2.6
***************
*** 21,41 ****
#define SRE_OP_CALL 7
#define SRE_OP_CATEGORY 8
! #define SRE_OP_GROUP 9
! #define SRE_OP_GROUP_IGNORE 10
! #define SRE_OP_IN 11
! #define SRE_OP_IN_IGNORE 12
! #define SRE_OP_INFO 13
! #define SRE_OP_JUMP 14
! #define SRE_OP_LITERAL 15
! #define SRE_OP_LITERAL_IGNORE 16
! #define SRE_OP_MARK 17
! #define SRE_OP_MAX_REPEAT 18
! #define SRE_OP_MAX_REPEAT_ONE 19
! #define SRE_OP_MIN_REPEAT 20
! #define SRE_OP_NOT_LITERAL 21
! #define SRE_OP_NOT_LITERAL_IGNORE 22
! #define SRE_OP_NEGATE 23
! #define SRE_OP_RANGE 24
! #define SRE_OP_REPEAT 25
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
--- 21,42 ----
#define SRE_OP_CALL 7
#define SRE_OP_CATEGORY 8
! #define SRE_OP_CHARSET 9
! #define SRE_OP_GROUP 10
! #define SRE_OP_GROUP_IGNORE 11
! #define SRE_OP_IN 12
! #define SRE_OP_IN_IGNORE 13
! #define SRE_OP_INFO 14
! #define SRE_OP_JUMP 15
! #define SRE_OP_LITERAL 16
! #define SRE_OP_LITERAL_IGNORE 17
! #define SRE_OP_MARK 18
! #define SRE_OP_MAX_REPEAT 19
! #define SRE_OP_MAX_REPEAT_ONE 20
! #define SRE_OP_MIN_REPEAT 21
! #define SRE_OP_NOT_LITERAL 22
! #define SRE_OP_NOT_LITERAL_IGNORE 23
! #define SRE_OP_NEGATE 24
! #define SRE_OP_RANGE 25
! #define SRE_OP_REPEAT 26
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
***************
*** 69,70 ****
--- 70,74 ----
#define SRE_FLAG_UNICODE 32
#define SRE_FLAG_VERBOSE 64
+ #define SRE_INFO_PREFIX 1
+ #define SRE_INFO_LITERAL 2
+ #define SRE_INFO_CHARSET 4