Need a specific sort of string modification. Can someone help?

Mitya Sirenef msirenef at lightbird.net
Sun Jan 6 20:53:27 CET 2013


On 01/06/2013 01:32 AM, Mitya Sirenef wrote:
> On 01/05/2013 03:35 AM, Sia wrote:
>> I have strings such as:
> >
> > tA.-2AG.-2AG,-2ag
> > or
> > .+3ACG.+5CAACG.+3ACG.+3ACG
> >
> > The plus and minus signs are always followed by a number (say, i). I 
> want python to find each single plus or minus, remove the sign, the 
> number after it and remove i characters after that. So the two strings 
> above become:
> >
> > tA..,
> > and
> > ...
> >
> > How can I do that?
> > Thanks.
>
>
> I think it's a bit cleaner and nicer to do something similar to
> itertools.takewhile but takewhile 'eats' a single next value.
> I was actually doing some stuff that also needed this. I wonder if
> there's a more elegant, robust way to do this?
>
> Here's what I got for now:
>
>
> class BIterator(object):
>     """Iterator with 'buffered' takewhile."""
>
>     def __init__(self, seq):
>         self.seq        = iter(seq)
>         self.buffer     = []
>         self.end_marker = object()
>         self.last       = None
>
>     def consume(self, n):
>         for _ in range(n): self.next()
>
>     def next(self):
>         val = self.buffer.pop() if self.buffer else next(self.seq, 
> self.end_marker)
>         self.last = val
>         return val
>
>     def takewhile(self, test):
>         lst = []
>         while True:
>             val = self.next()
>             if val is self.end_marker:
>                 return lst
>             elif test(val):
>                 lst.append(val)
>             else:
>                 self.buffer.append(val)
>                 return lst
>
>     def joined_takewhile(self, test):
>         return ''.join(self.takewhile(test))
>
>     def done(self):
>         return bool(self.last is self.end_marker)
>
>
> s = ".+3ACG.+5CAACG.+3ACG.+3ACG"
> not_plusminus = lambda x: x not in "+-"
> isdigit       = lambda x: x.isdigit()
>
> def process(s):
>     lst = []
>     s   = BIterator(s)
>
>     while True:
>         lst.extend(s.takewhile(not_plusminus))
>         if s.done(): break
>         s.next()
>         n = int(s.joined_takewhile(isdigit))
>         s.consume(n)
>
>     return ''.join(lst)
>
>
> print(process(s))
>
>
> Obviously it assumes the input is well-formed, but the logic would be
> very easy to change to, for example, check for s.done() after each step.
>
>  - mitya
>
>
>

I've added some refinements:



class BIterator(object):
     """Iterator with 'buffered' takewhile and takeuntil."""

     def __init__(self, seq):
         self.seq        = iter(seq)
         self.buffer     = []
         self.end_marker = object()
         self.last       = None

     def __bool__(self):
         return self.last is not self.end_marker

     def __next__(self):
         val = self.buffer.pop() if self.buffer else next(self.seq, 
self.end_marker)
         self.last = val
         return val

     def consume(self, n):
         for _ in range(n): next(self)

     def takewhile(self, test):
         lst = []
         while True:
             val = next(self)
             if val is self.end_marker:
                 return lst
             elif test(val):
                 lst.append(val)
             else:
                 self.buffer.append(val)
                 return lst

     def takeuntil(self, test):
         negtest = lambda x: not test(x)
         return self.takewhile(negtest)

     def joined_takewhile(self, test):
         return ''.join(self.takewhile(test))

     def joined_takeuntil(self, test):
         return ''.join(self.takeuntil(test))


def process(s):
     s         = BIterator(s)
     lst       = []
     plusminus = lambda x: x in "+-"
     isdigit   = lambda x: x.isdigit()

     while s:
         lst.extend(s.takeuntil(plusminus))
         next(s)
         n = s.joined_takewhile(isdigit) or 0
         s.consume(int(n))

     return ''.join(lst)


s = ".+3ACG.+5CAACG.+3ACG.+3ACG"
print(process(s))




-- 
Lark's Tongue Guide to Python: http://lightbird.net/larks/




More information about the Python-list mailing list