Need a specific sort of string modification. Can someone help?
Mitya Sirenef
msirenef at lightbird.net
Sun Jan 6 14:53:27 EST 2013
On 01/06/2013 01:32 AM, Mitya Sirenef wrote:
> On 01/05/2013 03:35 AM, Sia wrote:
>> I have strings such as:
> >
> > tA.-2AG.-2AG,-2ag
> > or
> > .+3ACG.+5CAACG.+3ACG.+3ACG
> >
> > The plus and minus signs are always followed by a number (say, i). I
> want python to find each single plus or minus, remove the sign, the
> number after it and remove i characters after that. So the two strings
> above become:
> >
> > tA..,
> > and
> > ...
> >
> > How can I do that?
> > Thanks.
>
>
> I think it's a bit cleaner and nicer to do something similar to
> itertools.takewhile but takewhile 'eats' a single next value.
> I was actually doing some stuff that also needed this. I wonder if
> there's a more elegant, robust way to do this?
>
> Here's what I got for now:
>
>
> class BIterator(object):
> """Iterator with 'buffered' takewhile."""
>
> def __init__(self, seq):
> self.seq = iter(seq)
> self.buffer = []
> self.end_marker = object()
> self.last = None
>
> def consume(self, n):
> for _ in range(n): self.next()
>
> def next(self):
> val = self.buffer.pop() if self.buffer else next(self.seq,
> self.end_marker)
> self.last = val
> return val
>
> def takewhile(self, test):
> lst = []
> while True:
> val = self.next()
> if val is self.end_marker:
> return lst
> elif test(val):
> lst.append(val)
> else:
> self.buffer.append(val)
> return lst
>
> def joined_takewhile(self, test):
> return ''.join(self.takewhile(test))
>
> def done(self):
> return bool(self.last is self.end_marker)
>
>
> s = ".+3ACG.+5CAACG.+3ACG.+3ACG"
> not_plusminus = lambda x: x not in "+-"
> isdigit = lambda x: x.isdigit()
>
> def process(s):
> lst = []
> s = BIterator(s)
>
> while True:
> lst.extend(s.takewhile(not_plusminus))
> if s.done(): break
> s.next()
> n = int(s.joined_takewhile(isdigit))
> s.consume(n)
>
> return ''.join(lst)
>
>
> print(process(s))
>
>
> Obviously it assumes the input is well-formed, but the logic would be
> very easy to change to, for example, check for s.done() after each step.
>
> - mitya
>
>
>
I've added some refinements:
class BIterator(object):
"""Iterator with 'buffered' takewhile and takeuntil."""
def __init__(self, seq):
self.seq = iter(seq)
self.buffer = []
self.end_marker = object()
self.last = None
def __bool__(self):
return self.last is not self.end_marker
def __next__(self):
val = self.buffer.pop() if self.buffer else next(self.seq,
self.end_marker)
self.last = val
return val
def consume(self, n):
for _ in range(n): next(self)
def takewhile(self, test):
lst = []
while True:
val = next(self)
if val is self.end_marker:
return lst
elif test(val):
lst.append(val)
else:
self.buffer.append(val)
return lst
def takeuntil(self, test):
negtest = lambda x: not test(x)
return self.takewhile(negtest)
def joined_takewhile(self, test):
return ''.join(self.takewhile(test))
def joined_takeuntil(self, test):
return ''.join(self.takeuntil(test))
def process(s):
s = BIterator(s)
lst = []
plusminus = lambda x: x in "+-"
isdigit = lambda x: x.isdigit()
while s:
lst.extend(s.takeuntil(plusminus))
next(s)
n = s.joined_takewhile(isdigit) or 0
s.consume(int(n))
return ''.join(lst)
s = ".+3ACG.+5CAACG.+3ACG.+3ACG"
print(process(s))
--
Lark's Tongue Guide to Python: http://lightbird.net/larks/
More information about the Python-list
mailing list