Regular Expression - old regex module vs. re module
Jim Segrave
jes at nl.demon.net
Fri Jun 30 06:45:51 EDT 2006
In article <1151607229.548737.145800 at d56g2000cwd.googlegroups.com>,
Steve <stever at cruzio.com> wrote:
>Hi All,
>
>I'm having a tough time converting the following regex.compile patterns
>into the new re.compile format. There is also a differences in the
>regsub.sub() vs. re.sub()
>
>Could anyone lend a hand?
>
>
>import regsub
>import regex
>
>import re # << need conversion to this module
>
>....
>
> """Convert perl style format symbology to printf tokens.
>
> Take a string and substitute computed printf tokens for perl style
> format symbology.
>
> For example:
>
> ###.## yields %6.2f
> ######## yields %8d
> <<<<< yields %-5s
> """
Perhaps not optimal, but this processes things as requested. Note that
all floats have to be done before any integer patterns are replaced.
==========================
#!/usr/local/bin/python
import re
"""Convert perl style format symbology to printf tokens.
Take a string and substitute computed printf tokens for perl style
format symbology.
For example:
###.## yields %6.2f
######## yields %8d
<<<<< yields %-5s
"""
# handle cases where there's no integer or no fractional chars
floatPattern = re.compile(r'(?<!\\)(#+\.(#*)|\.(#+))')
integerPattern = re.compile(r'(?<![\\.])(#+)(?![.#])')
leftJustifiedStringPattern = re.compile(r'(?<!\\)(<+)')
rightJustifiedStringPattern = re.compile(r'(?<!\\)(>+)')
def float_sub(matchobj):
# fractional part may be in either groups()[1] or groups()[2]
if matchobj.groups()[1] is not None:
return "%%%d.%df" % (len(matchobj.groups()[0]),
len(matchobj.groups()[1]))
else:
return "%%%d.%df" % (len(matchobj.groups()[0]),
len(matchobj.groups()[2]))
def unperl_format(s):
changed_things = 1
while changed_things:
# lather, rinse and repeat until nothing new happens
changed_things = 0
mat_obj = leftJustifiedStringPattern.search(s)
if mat_obj:
s = re.sub(leftJustifiedStringPattern, "%%-%ds" %
len(mat_obj.groups()[0]), s, 1)
changed_things = 1
mat_obj = rightJustifiedStringPattern.search(s)
if mat_obj:
s = re.sub(rightJustifiedStringPattern, "%%%ds" %
len(mat_obj.groups()[0]), s, 1)
changed_things = 1
# must do all floats before ints
mat_obj = floatPattern.search(s)
if mat_obj:
s = re.sub(floatPattern, float_sub, s, 1)
changed_things = 1
# don't fall through to the int code
continue
mat_obj = integerPattern.search(s)
if mat_obj:
s = re.sub(integerPattern, "%%%dd" % len(mat_obj.groups()[0]),
s, 1)
changed_things = 1
return s
if __name__ == '__main__':
testarray = ["integer: ####, integer # integer at end #",
"float ####.## no decimals ###. no int .### at end ###.",
"Left string <<<<<< short left string <",
"right string >>>>>> short right string >",
"escaped chars \\#### \\####.## \\<\\<<<< \\>\\><<<"]
for s in testarray:
print("Testing: %s" % s)
print "Result: %s" % unperl_format(s)
print
======================
Running this gives
Testing: integer: ####, integer # integer at end #
Result: integer: %4d, integer %1d integer at end %1d
Testing: float ####.## no decimals ###. no int .### at end ###.
Result: float %7.2f no decimals %4.0f no int %4.3f at end %4.0f
Testing: Left string <<<<<< short left string <
Result: Left string %-6s short left string %-1s
Testing: right string >>>>>> short right string >
Result: right string %6s short right string %1s
Testing: escaped chars \#### \####.## \<\<<<< \>\><<<
Result: escaped chars \#%3d \#%6.2f \<\<%-3s \>\>%-3s
--
Jim Segrave (jes at jes-2.demon.nl)
More information about the Python-list
mailing list