[Python-3000] Droping find/rfind?
Ron Adam
rrr at ronadam.com
Sat Aug 26 13:46:14 CEST 2006
Nick Coghlan wrote:
> Ron Adam wrote:
>> Nick Coghlan wrote:
[clipped]
>> It might be nice if slice objects could be used in more ways in python.
>> That may work in most cases where you would want a string view.
>
> That's quite an interesting idea. With that approach, rather than having to
> duplicate 'concrete sequence with copying semantics' and 'sequence view with
> non-copying semantics' everywhere, you could just provide methods on objects
> that returned the appropriate slice objects representing the location of
> relevant sections, rather than copies of the sections themselves.
Yes, and possibly having more methods that accept slice objects could
make that idea work in a way that would seem more natural.
> To make that work effectively, you'd need to implement __nonzero__ on slice
> objects as "((self.stop - self.start) // self.step) > 0" (Either that or
> implement __len__, which would contribute to making slice() look more and more
> like xrange(), as someone else noted recently).
Since xrange() has the same signature, it might be nice to be able to
use a slice object directly in xrange to get indices to a substring or list.
For that to work, slice.indices would need to not return None, and/or
xrange would need to accept None. They differ in how they handle
negative indices as well. So I expect it may be too big of a change.
> Using the same signature as partition:
>
> def partition_indices(self, sep, start=None, stop=None):
> if start is None: start = 0
> if stop is None: stop = len(s)
> try:
> idxsep = self.index(sep, start, stop)
> except ValueError:
> return slice(start, stop), slice(0), slice(0)
> endsep = idxsep + len(sep)
> return slice(start, idxsep), slice(idxsep, endsep), slice(endsep, stop)
>
> Then partition() itself would be equivalent to:
>
> def partition(self, sep, start=None, stop=None):
> before, sep, after = self.partition_indices(sep, start, stop)
> return self[before], self[sep], self[after]
>
> Cheers,
> Nick.
Just a little timing for the fun of it. ;-)
2.5c1 (r25c1:51305, Aug 17 2006, 10:41:11) [MSC v.1310 32 bit (Intel)]
splitindex : 0.02866
splitview : 0.28021
splitpartition : 0.34991
splitslice : 0.07892
This may not be the best use case, (if you can call it that). It does
show that the slice "as a view" idea may have some potential. But
underneath it's just using index, so a well written function with index
will probably always be faster.
Cheers,
Ron
"""
Compare different index, string view, and partition methods.
"""
# -------- Split by str.index.
def splitindex(s):
pos = 0
while True:
try:
posstart = s.index("{", pos)
posarg = s.index(" ", posstart)
posend = s.index("}", posarg)
except ValueError:
break
yield None, s[pos:posstart]
yield s[posstart+1:posarg], s[posarg+1:posend]
pos = posend+1
rest = s[pos:]
if rest:
yield None, rest
# --------- Simple string view.
class strview(object):
def __new__(cls, source, start=None, stop=None):
self = object.__new__(cls)
self.source = source
#self.start = start if start is not None else 0
self.start = start != None and start or 0
#self.stop = stop if stop is not None else len(source)
self.stop = stop != None and stop or len(source)
return self
def __str__(self):
return self.source[self.start:self.stop]
def __len__(self):
return self.stop - self.start
def partition(self, sep):
_src = self.source
try:
startsep = _src.index(sep, self.start, self.stop)
except ValueError:
# Separator wasn't found!
return self, _NULL_STR, _NULL_STR
# Return new views of the three string parts
endsep = startsep + len(sep)
return (strview(_src, self.start, startsep),
strview(_src, startsep, endsep),
strview(_src, endsep, self.stop))
_NULL_STR = strview('')
def splitview(s):
rest = strview(s)
while 1:
prefix, found, rest = rest.partition("{")
if prefix:
yield (None, str(prefix))
if not found:
break
first, found, rest = rest.partition(" ")
if not found:
break
second, found, rest = rest.partition("}")
if not found:
break
yield (str(first), str(second))
# -------- Split by str.partition.
def splitpartition(s):
rest = s
while 1:
prefix, found, temp = rest.partition("{")
first, found, temp = temp.partition(" ")
second, found, temp = temp.partition("}")
if not found: break
yield None, prefix
yield first, second
rest = temp
if rest != '':
yield None, rest
# -------- Split by partition slices.
import sys
def partslice(s, sep, sub_slice=slice(0, sys.maxint)):
start, stop = sub_slice.start, sub_slice.stop
try:
found = s.index(sep, start, stop)
except ValueError:
return sub_slice, slice(stop,stop), slice(stop,stop)
foundend = found + len(sep)
return ( slice(start, found),
slice(found, foundend),
slice(foundend, stop) )
def splitslice(s):
rest = slice(0, sys.maxint)
while 1:
prefix, found, temp = partslice(s, "{", rest)
first, found, temp = partslice(s, " ", temp)
second, found, temp = partslice(s, "}", temp)
if found.start == found.stop:
break
yield None, s[prefix]
yield s[first], s[second]
rest = temp
if rest.start != rest.stop:
yield None, s[rest]
# -------- Tests.
import time
print sys.version
s = 'foo{spam eggs}bar{ham eggs}fob{beacon eggs}' * 2000 + 'xyz'
r = list(splitindex(s))
functions = [splitindex, splitview, splitpartition, splitslice]
for f in functions:
start = time.clock()
result = list(f(s))
print '%-16s: %7.5f' % (f.__name__, time.clock()-start)
assert result == r
More information about the Python-3000
mailing list