[Python-3000] Droping find/rfind?

Ron Adam rrr at ronadam.com
Sat Aug 26 13:46:14 CEST 2006


Nick Coghlan wrote:
> Ron Adam wrote:
>> Nick Coghlan wrote:

[clipped]

>> It might be nice if slice objects could be used in more ways in python. 
>> That may work in most cases where you would want a string view.
> 
> That's quite an interesting idea. With that approach, rather than having to 
> duplicate 'concrete sequence with copying semantics' and 'sequence view with 
> non-copying semantics' everywhere, you could just provide methods on objects 
> that returned the appropriate slice objects representing the location of 
> relevant sections, rather than copies of the sections themselves.

Yes, and possibly having more methods that accept slice objects could 
make that idea work in a way that would seem more natural.


> To make that work effectively, you'd need to implement __nonzero__ on slice 
> objects as "((self.stop - self.start) // self.step) > 0" (Either that or 
> implement __len__, which would contribute to making slice() look more and more 
> like xrange(), as someone else noted recently).

Since xrange() has the same signature, it might be nice to be able to
use a slice object directly in xrange to get indices to a substring or list.

For that to work, slice.indices would need to not return None, and/or
xrange would need to accept None.  They differ in how they handle
negative indices as well.  So I expect it may be too big of a change.


> Using the same signature as partition:
> 
>     def partition_indices(self, sep, start=None, stop=None):
>         if start is None: start = 0
>         if stop is None: stop = len(s)
>         try:
>             idxsep = self.index(sep, start, stop)
>         except ValueError:
>             return slice(start, stop), slice(0), slice(0)
>         endsep = idxsep + len(sep)
>         return slice(start, idxsep), slice(idxsep, endsep), slice(endsep, stop)
> 
> Then partition() itself would be equivalent to:
> 
>     def partition(self, sep, start=None, stop=None):
>         before, sep, after = self.partition_indices(sep, start, stop)
>         return self[before], self[sep], self[after]
> 
> Cheers,
> Nick.


Just a little timing for the fun of it. ;-)


2.5c1 (r25c1:51305, Aug 17 2006, 10:41:11) [MSC v.1310 32 bit (Intel)]
splitindex      : 0.02866
splitview       : 0.28021
splitpartition  : 0.34991
splitslice      : 0.07892


This may not be the best use case, (if you can call it that).  It does 
show that the slice "as a view" idea may have some potential. But 
underneath it's just using index, so a well written function with index 
will probably always be faster.

Cheers,
    Ron


"""
     Compare different index, string view, and partition methods.
"""

# -------- Split by str.index.
def splitindex(s):
      pos = 0
      while True:
        try:
            posstart = s.index("{", pos)
            posarg = s.index(" ", posstart)
            posend = s.index("}", posarg)
        except ValueError:
            break
        yield None, s[pos:posstart]
        yield s[posstart+1:posarg], s[posarg+1:posend]
        pos = posend+1
      rest = s[pos:]
      if rest:
          yield None, rest


# --------- Simple string view.
class strview(object):
      def __new__(cls, source, start=None, stop=None):
          self = object.__new__(cls)
          self.source = source
          #self.start = start if start is not None else 0
          self.start = start != None and start or 0
          #self.stop = stop if stop is not None else len(source)
          self.stop = stop != None and stop or len(source)
          return self
      def __str__(self):
          return self.source[self.start:self.stop]
      def __len__(self):
          return self.stop - self.start
      def partition(self, sep):
          _src = self.source
          try:
              startsep = _src.index(sep, self.start, self.stop)
          except ValueError:
              # Separator wasn't found!
              return self, _NULL_STR, _NULL_STR
          # Return new views of the three string parts
          endsep = startsep + len(sep)
          return (strview(_src, self.start, startsep),
                  strview(_src, startsep, endsep),
                  strview(_src, endsep, self.stop))

_NULL_STR = strview('')

def splitview(s):
       rest = strview(s)
       while 1:
           prefix, found, rest = rest.partition("{")
           if prefix:
               yield (None, str(prefix))
           if not found:
               break
           first, found, rest = rest.partition(" ")
           if not found:
               break
           second, found, rest = rest.partition("}")
           if not found:
               break
           yield (str(first), str(second))


# -------- Split by str.partition.
def splitpartition(s):
     rest = s
     while 1:
         prefix, found, temp = rest.partition("{")
         first, found, temp = temp.partition(" ")
         second, found, temp = temp.partition("}")
         if not found: break
         yield None, prefix
         yield first, second
         rest = temp
     if rest != '':
         yield None, rest


# -------- Split by partition slices.
import sys

def partslice(s, sep, sub_slice=slice(0, sys.maxint)):
     start, stop = sub_slice.start, sub_slice.stop
     try:
         found = s.index(sep, start, stop)
     except ValueError:
         return sub_slice, slice(stop,stop), slice(stop,stop)
     foundend = found + len(sep)
     return ( slice(start, found),
              slice(found, foundend),
              slice(foundend, stop) )

def splitslice(s):
     rest = slice(0, sys.maxint)
     while 1:
         prefix, found, temp = partslice(s, "{", rest)
         first, found, temp = partslice(s, " ", temp)
         second, found, temp = partslice(s, "}", temp)
         if found.start == found.stop:
             break
         yield None, s[prefix]
         yield s[first], s[second]
         rest = temp
     if rest.start != rest.stop:
         yield None, s[rest]

# -------- Tests.
import time
print sys.version

s = 'foo{spam eggs}bar{ham eggs}fob{beacon eggs}' * 2000 + 'xyz'
r = list(splitindex(s))
functions = [splitindex, splitview, splitpartition, splitslice]
for f in functions:
     start = time.clock()
     result = list(f(s))
     print '%-16s: %7.5f' % (f.__name__, time.clock()-start)
     assert result == r






More information about the Python-3000 mailing list