re sub help

Sat Nov 5 09:37:59 EST 2005

s99999999s2003 at yahoo.com wrote:
> hi
> 
> i have a string :
> a =
> "this\nis\na\nsentence[startdelim]this\nis\nanother[enddelim]this\nis\n"
> 
> inside the string, there are "\n". I don't want to substitute the '\n'
> in between
> the [startdelim] and [enddelim] to ''. I only want to get rid of the
> '\n' everywhere else.

Here is a solution using re.sub and a class that maintains state. It works when the input text contains multiple startdelim/enddelim pairs.

import re

a = "this\nis\na\nsentence[startdelim]this\nis\nanother[enddelim]this\nis\n" * 2

class subber(object):
    def __init__(self):
        self.delimiterSeen = False

    def __call__(self, m):
        text = m.group()
        if text == 'startdelim':
            self.delimiterSeen = True
            return text

        if text == 'enddelim':
            self.delimiterSeen = False
            return text

        if self.delimiterSeen:
            return text

        return ''

delimRe = re.compile('\n|startdelim|enddelim')

newText = delimRe.sub(subber(), a)
print repr(newText)

Kent