How to write simple code to match strings?
Stefan Behnel
stefan_ml at behnel.de
Wed Dec 30 09:34:12 EST 2009
Steven D'Aprano, 30.12.2009 07:01:
> def _re_match_items(s):
> # Setup some regular expressions.
> COMMON_RE = r'\$?([-+]?[0-9,]*\.?[0-9,]+)'
> FLOAT_RE = COMMON_RE + '$'
> BRACKETED_FLOAT_RE = r'\(' + COMMON_RE + r'\)$'
> DATE_RE = r'\d{1,2}-\w+-\d{1,2}$'
> mo = re.match(FLOAT_RE, s) # "mo" short for "match object"
> if mo:
> return float(mo.group(1).replace(',', ''))
> # Otherwise mo will be None and we go on to the next test.
> mo = re.match(BRACKETED_FLOAT_RE, s)
> if mo:
> return -float(mo.group(1).replace(',', ''))
> if re.match(DATE_RE, s):
> return dateutil.parser.parse(s, dayfirst=True)
> raise ValueError("bad string can't be matched")
Given that this is meant for converting single data items, which may happen
quite frequently in a program (depending on the size of the input), you
might want to use pre-compiled regexps here.
Also, you can convert the above into a single regexp with multiple
alternative groups and then just run the matcher once, e.g. (untested):
COMMON_RE = r'\$?([-+]?[0-9,]*\.?[0-9,]+)'
FLOAT_RE = COMMON_RE + '$'
BRACKETED_FLOAT_RE = r'\(' + COMMON_RE + r'\)$'
DATE_RE = r'(\d{1,2}-\w+-\d{1,2})$' # note the surrounding () I added
match_data_items = re.compile('|'.join(
[BRACKETED_FLOAT_RE, FLOAT_RE, DATE_RE])).match
def convert_data_item(s):
# ...
match = match_data_items(s)
if match:
bfloat_value, float_value, date_value = match.groups()
if bfloat_value:
return -float(bfloat_value.replace(',', ''))
if float_value:
return float(bfloat_value.replace(',', ''))
if date_value:
return dateutil.parser.parse(date_value, dayfirst=True)
raise ...
Stefan
More information about the Python-list
mailing list