[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.19,2.20 sre_constants.h,2.5,2.6

Fredrik Lundh python-dev@python.org
Sun, 2 Jul 2000 05:00:09 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory slayer.i.sourceforge.net:/tmp/cvs-serv10442/Modules

Modified Files:
	_sre.c sre_constants.h 
Log Message:


-- use charset bitmaps where appropriate.  this gives a 5-10%
   speedup for some tests, including the python tokenizer.

-- added support for an optional charset anchor to the engine
   (currently unused by the code generator).

-- removed workaround for array module bug.


Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.19
retrieving revision 2.20
diff -C2 -r2.19 -r2.20
*** _sre.c	2000/07/01 23:49:14	2.19
--- _sre.c	2000/07/02 12:00:07	2.20
***************
*** 379,382 ****
--- 379,389 ----
  			break;
  
+         case SRE_OP_CHARSET:
+             /* args: <bitmap> (16 bits per code word) */
+             if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
+                 return ok;
+             set += 16;
+             break;
+ 
  		case SRE_OP_CATEGORY:
              /* args: <category> */
***************
*** 953,979 ****
  	SRE_CHAR* end = state->end;
  	int status = 0;
!     int prefix_len = 0;
!     SRE_CODE* prefix;
!     SRE_CODE* overlap;
!     int literal = 0;
  
      if (pattern[0] == SRE_OP_INFO) {
          /* optimization info block */
!         /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
  
          if (pattern[3] > 0) {
              /* adjust end point (but make sure we leave at least one
!                character in there) */
              end -= pattern[3]-1;
              if (end <= ptr)
                  end = ptr+1;
          }
- 
-         literal = pattern[2];
- 
-         prefix = pattern + 6;
-         prefix_len = pattern[5];
  
!         overlap = prefix + prefix_len - 1;
  
          pattern += 1 + pattern[1];
--- 960,989 ----
  	SRE_CHAR* end = state->end;
  	int status = 0;
!     int prefix_len;
!     SRE_CODE* prefix = NULL;
!     SRE_CODE* charset = NULL;
!     SRE_CODE* overlap = NULL;
!     int flags = 0;
  
      if (pattern[0] == SRE_OP_INFO) {
          /* optimization info block */
!         /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
! 
!         flags = pattern[2];
  
          if (pattern[3] > 0) {
              /* adjust end point (but make sure we leave at least one
!                character in there, so literal search will work) */
              end -= pattern[3]-1;
              if (end <= ptr)
                  end = ptr+1;
          }
  
!         if (flags & SRE_INFO_PREFIX) {
!             prefix_len = pattern[5];
!             prefix = pattern + 6;
!             overlap = prefix + prefix_len - 1;
!         } else if (flags & SRE_INFO_CHARSET)
!             charset = pattern + 5;
  
          pattern += 1 + pattern[1];
***************
*** 981,985 ****
  
  #if defined(USE_FAST_SEARCH)
!     if (prefix_len > 1) {
          /* pattern starts with a known prefix.  use the overlap
             table to skip forward as fast as we possibly can */
--- 991,995 ----
  
  #if defined(USE_FAST_SEARCH)
!     if (prefix && overlap && prefix_len > 1) {
          /* pattern starts with a known prefix.  use the overlap
             table to skip forward as fast as we possibly can */
***************
*** 999,1004 ****
                          state->start = ptr - prefix_len + 1;
                          state->ptr = ptr + 1;
!                         if (literal)
!                             return 1; /* all of it */
                          status = SRE_MATCH(state, pattern + 2*prefix_len);
                          if (status != 0)
--- 1009,1014 ----
                          state->start = ptr - prefix_len + 1;
                          state->ptr = ptr + 1;
!                         if (flags & SRE_INFO_LITERAL)
!                             return 1; /* we got all of it */
                          status = SRE_MATCH(state, pattern + 2*prefix_len);
                          if (status != 0)
***************
*** 1017,1023 ****
  #endif
  
! 	if (pattern[0] == SRE_OP_LITERAL) {
! 		/* pattern starts with a literal character.  this is used for
!            short prefixes, and if fast search is disabled*/
  		SRE_CODE chr = pattern[1];
  		for (;;) {
--- 1027,1033 ----
  #endif
  
!     if (pattern[0] == SRE_OP_LITERAL) {
! 		/* pattern starts with a literal character.  this is used
!            for short prefixes, and if fast search is disabled */
  		SRE_CODE chr = pattern[1];
  		for (;;) {
***************
*** 1033,1036 ****
--- 1043,1062 ----
  				break;
  		}
+ #if 0
+     } else if (charset) {
+ 		/* pattern starts with a character from a known set */
+ 		for (;;) {
+ 			while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
+ 				ptr++;
+ 			if (ptr == end)
+ 				return 0;
+ 			TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
+ 			state->start = ptr;
+ 			state->ptr = ptr;
+ 			status = SRE_MATCH(state, pattern);
+ 			if (status != 0)
+ 				break;
+         }
+ #endif
  	} else
  		/* general case */
***************
*** 1045,1048 ****
--- 1071,1075 ----
  	return status;
  }
+     
  
  #if !defined(SRE_RECURSIVE)

Index: sre_constants.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/sre_constants.h,v
retrieving revision 2.5
retrieving revision 2.6
diff -C2 -r2.5 -r2.6
*** sre_constants.h	2000/06/30 10:41:31	2.5
--- sre_constants.h	2000/07/02 12:00:07	2.6
***************
*** 21,41 ****
  #define SRE_OP_CALL 7
  #define SRE_OP_CATEGORY 8
! #define SRE_OP_GROUP 9
! #define SRE_OP_GROUP_IGNORE 10
! #define SRE_OP_IN 11
! #define SRE_OP_IN_IGNORE 12
! #define SRE_OP_INFO 13
! #define SRE_OP_JUMP 14
! #define SRE_OP_LITERAL 15
! #define SRE_OP_LITERAL_IGNORE 16
! #define SRE_OP_MARK 17
! #define SRE_OP_MAX_REPEAT 18
! #define SRE_OP_MAX_REPEAT_ONE 19
! #define SRE_OP_MIN_REPEAT 20
! #define SRE_OP_NOT_LITERAL 21
! #define SRE_OP_NOT_LITERAL_IGNORE 22
! #define SRE_OP_NEGATE 23
! #define SRE_OP_RANGE 24
! #define SRE_OP_REPEAT 25
  #define SRE_AT_BEGINNING 0
  #define SRE_AT_BEGINNING_LINE 1
--- 21,42 ----
  #define SRE_OP_CALL 7
  #define SRE_OP_CATEGORY 8
! #define SRE_OP_CHARSET 9
! #define SRE_OP_GROUP 10
! #define SRE_OP_GROUP_IGNORE 11
! #define SRE_OP_IN 12
! #define SRE_OP_IN_IGNORE 13
! #define SRE_OP_INFO 14
! #define SRE_OP_JUMP 15
! #define SRE_OP_LITERAL 16
! #define SRE_OP_LITERAL_IGNORE 17
! #define SRE_OP_MARK 18
! #define SRE_OP_MAX_REPEAT 19
! #define SRE_OP_MAX_REPEAT_ONE 20
! #define SRE_OP_MIN_REPEAT 21
! #define SRE_OP_NOT_LITERAL 22
! #define SRE_OP_NOT_LITERAL_IGNORE 23
! #define SRE_OP_NEGATE 24
! #define SRE_OP_RANGE 25
! #define SRE_OP_REPEAT 26
  #define SRE_AT_BEGINNING 0
  #define SRE_AT_BEGINNING_LINE 1
***************
*** 69,70 ****
--- 70,74 ----
  #define SRE_FLAG_UNICODE 32
  #define SRE_FLAG_VERBOSE 64
+ #define SRE_INFO_PREFIX 1
+ #define SRE_INFO_LITERAL 2
+ #define SRE_INFO_CHARSET 4