[Python-checkins] CVS: python/dist/src/Modules _sre.c,2.67,2.68

Fredrik Lundh effbot@users.sourceforge.net
Sat, 20 Oct 2001 10:48:49 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv8184/Modules

Modified Files:
	_sre.c 
Log Message:


rewrote the pattern.split method in C

also restored SRE Unicode support for 1.6/2.0/2.1


Index: _sre.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v
retrieving revision 2.67
retrieving revision 2.68
diff -C2 -d -r2.67 -r2.68
*** _sre.c	2001/10/18 19:30:11	2.67
--- _sre.c	2001/10/20 17:48:46	2.68
***************
*** 34,37 ****
--- 34,38 ----
   * 2001-09-18 fl  added _getliteral helper
   * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
+  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
   *
   * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
***************
*** 66,77 ****
  #undef VERBOSE
  
! #if PY_VERSION_HEX >= 0x01060000 && defined(Py_USING_UNICODE)
  /* defining this enables unicode support (default under 1.6a1 and later) */
  #define HAVE_UNICODE
  #endif
  
  /* -------------------------------------------------------------------- */
  /* optional features */
  
  /* prevent run-away recursion (bad patterns on long strings) */
  
--- 67,83 ----
  #undef VERBOSE
  
! #if PY_VERSION_HEX >= 0x01060000
! #if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
  /* defining this enables unicode support (default under 1.6a1 and later) */
  #define HAVE_UNICODE
  #endif
+ #endif
  
  /* -------------------------------------------------------------------- */
  /* optional features */
  
+ /* test: define to use sre._split helper instead of C code */
+ #undef USE_PYTHON_SPLIT
+ 
  /* prevent run-away recursion (bad patterns on long strings) */
  
***************
*** 1489,1493 ****
  
  LOCAL(PyObject*)
! state_getslice(SRE_STATE* state, int index, PyObject* string)
  {
      int i, j;
--- 1495,1499 ----
  
  LOCAL(PyObject*)
! state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
  {
      int i, j;
***************
*** 1496,1500 ****
  
      if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
!         i = j = 0;
      } else {
          i = ((char*)state->mark[index] - (char*)state->beginning) /
--- 1502,1512 ----
  
      if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
!         if (empty)
!             /* want empty string */
!             i = j = 0;
!         else {
!             Py_INCREF(Py_None);
!             return Py_None;
!         }
      } else {
          i = ((char*)state->mark[index] - (char*)state->beginning) /
***************
*** 1783,1786 ****
--- 1795,1799 ----
  }
  
+ #if defined(USE_PYTHON_SPLIT)
  static PyObject*
  pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
***************
*** 1799,1802 ****
--- 1812,1816 ----
          );
  }
+ #endif
  
  static PyObject*
***************
*** 1806,1810 ****
      PyObject* list;
      int status;
!     int i;
  
      PyObject* string;
--- 1820,1824 ----
      PyObject* list;
      int status;
!     int i, b, e;
  
      PyObject* string;
***************
*** 1843,1857 ****
              switch (self->groups) {
              case 0:
!                 item = PySequence_GetSlice(
!                     string,
!                     ((char*) state.start - (char*) state.beginning) /
!                     state.charsize,
!                     ((char*) state.ptr - (char*) state.beginning) /
!                     state.charsize);
                  if (!item)
                      goto error;
                  break;
              case 1:
!                 item = state_getslice(&state, 1, string);
                  if (!item)
                      goto error;
--- 1857,1870 ----
              switch (self->groups) {
              case 0:
!                 b = ((char*) state.start - (char*) state.beginning) /
!                     state.charsize;
!                 e = ((char*) state.ptr - (char*) state.beginning) /
!                     state.charsize;
!                 item = PySequence_GetSlice(string, b, e);
                  if (!item)
                      goto error;
                  break;
              case 1:
!                 item = state_getslice(&state, 1, string, 1);
                  if (!item)
                      goto error;
***************
*** 1862,1866 ****
                      goto error;
                  for (i = 0; i < self->groups; i++) {
!                     PyObject* o = state_getslice(&state, i+1, string);
                      if (!o) {
                          Py_DECREF(item);
--- 1875,1879 ----
                      goto error;
                  for (i = 0; i < self->groups; i++) {
!                     PyObject* o = state_getslice(&state, i+1, string, 1);
                      if (!o) {
                          Py_DECREF(item);
***************
*** 1903,1906 ****
--- 1916,2030 ----
      
  }
+ 
+ #if !defined(USE_PYTHON_SPLIT)
+ static PyObject*
+ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
+ {
+     SRE_STATE state;
+     PyObject* list;
+     PyObject* item;
+     int status;
+     int n;
+     int i, b, e;
+     int g;
+ 
+     PyObject* string;
+     int maxsplit = 0;
+     static char* kwlist[] = { "source", "maxsplit", NULL };
+     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
+                                      &string, &maxsplit))
+         return NULL;
+ 
+     string = state_init(&state, self, string, 0, INT_MAX);
+     if (!string)
+         return NULL;
+ 
+     list = PyList_New(0);
+ 
+     i = n = 0;
+ 
+     while (maxsplit == 0 || n < maxsplit) {
+ 
+         state_reset(&state);
+ 
+         state.ptr = state.start;
+ 
+         if (state.charsize == 1) {
+             status = sre_search(&state, PatternObject_GetCode(self));
+         } else {
+ #if defined(HAVE_UNICODE)
+             status = sre_usearch(&state, PatternObject_GetCode(self));
+ #endif
+         }
+ 
+         if (status > 0) {
+ 
+             if (state.start == state.ptr) {
+                 if (i >= state.endpos)
+                     break;
+                 /* skip one character */
+                 state.start = (void*) ((char*) state.ptr + state.charsize);
+                 continue;
+             }
+ 
+             b = ((char*) state.start - (char*) state.beginning) /
+                 state.charsize;
+             e = ((char*) state.ptr - (char*) state.beginning) /
+                 state.charsize;
+ 
+             /* get segment before this match */
+             item = PySequence_GetSlice(string, i, b);
+             if (!item)
+                 goto error;
+             status = PyList_Append(list, item);
+             Py_DECREF(item);
+             if (status < 0)
+                 goto error;
+ 
+             for (g = 0; g < self->groups; g++) {
+                 item = state_getslice(&state, g+1, string, 0);
+                 if (!item)
+                     goto error;
+                 status = PyList_Append(list, item);
+                 Py_DECREF(item);
+                 if (status < 0)
+                     goto error;
+             }
+ 
+             i = e;
+             n = n + 1;
+ 
+             state.start = state.ptr;
+ 
+         } else {
+ 
+             if (status == 0)
+                 break;
+ 
+             pattern_error(status);
+             goto error;
+ 
+         }
+     }
+ 
+     /* get segment following last match */
+     item = PySequence_GetSlice(string, i, state.endpos);
+     if (!item)
+         goto error;
+     status = PyList_Append(list, item);
+     Py_DECREF(item);
+     if (status < 0)
+         goto error;
+ 
+     state_fini(&state);
+     return list;
+ 
+ error:
+     Py_DECREF(list);
+     state_fini(&state);
+     return NULL;
+     
+ }
+ #endif
  
  static PyObject*