Why would it be suboptimal to specify types for regex match groups within regex strings?
locals().update(**kwargs) doesn't work for a reason; but I don't remember what that reason is?
More test cases within in the pytest.mark.parametrize here might elucidate the situation:
import re
def test_regex_comprehension():
rgx = re.compile("(\d{2})(\d{2})(\w{2})")
teststr = "2345fg"
assert not rgx.match(teststr).groupdict()
assert rgx.match(teststr).groups() == ("23", "45", "fg")
rgx = re.compile("(?P<a>\d{2})(?P<b>\d{2})(?P<c>\w{2})")
assert rgx.match(teststr).groups() == ("23", "45", "fg")
assert rgx.match(teststr).groupdict() == dict(a="23", b="45", c="fg")
from types import SimpleNamespace
mdo = matchdictobj = SimpleNamespace(**rgx.match(teststr).groupdict())
assert mdo.a == "23"
assert mdo.b == "45"
assert mdo.c == "fg"
def cast_match_groupdict(matchobj, typemap):
matchdict = matchobj.groupdict()
if not typemap:
return matchdict
for attr, castfunc in typemap.items():
try:
matchdict[attr] = castfunc(matchdict[attr])
except ValueError as e:
raise ValueError(("attr", attr), ("rgx",
matchobj.re)) from e
return matchdict
import pytest
def test_cast_match_groupdict():
rgx = re.compile("(?P<a>\d{2})(?P<b>\d{2})(?P<c>\w{2})")
teststr = "2345fg"
matchobj = rgx.match(teststr)
with pytest.raises(ValueError):
typemap = dict(a=int, b=int, c=int)
cast_match_groupdict(matchobj, typemap)
typemap = dict(a=int, b=int, c=str)
output = cast_match_groupdict(matchobj, typemap)
assert output == dict(a=23, b=45, c="fg")
from typing import Tuple
def generate_regex_and_typemap_from_fstring(fstr) -> Tuple[str, dict]:
# raise NotImplemented
if fstr == "{a}{b}{c}":
return (r"".join(rf"(?P<{name}>.*?)" for name in "abc"), None)
elif fstr == "{a:d}{b:d}{c:d}":
return (
r"".join(rf"(?P<{name}>.*?)" for name in "abc"),
dict(a=int, b=int, c=int),
)
elif fstr == "{a:d}{b:d}{c:s}":
return (
r"".join(rf"(?P<{name}>.*?)" for name in "abc"),
dict(a=int, b=int, c=str),
)
else:
raise NotImplementedError(("fstr", fstr))
def do_fstring_regex_magic(fstrpattern, string):
rgxstr, typemap = generate_regex_and_typemap_from_fstring(fstrpattern)
rgx = re.compile(rgxstr)
matchobj = rgx.match(string)
try:
output = cast_match_groupdict(matchobj, typemap)
# update_locals() # XXX: how to test this?
return output
except ValueError as e:
raise ValueError(locals()) from e
@pytest.mark.parametrize(
"fstrpattern,string,exceptions,expoutput",
[
("{a}{b}{c}", "2345fg", None, dict(a="23", b="45", c="fg")),
("{a:d}{b:d}{c:d}", "2345fg", ValueError, None),
("{a:d}{b:d}{c:s}", "2345fg", None, dict(a="23", b="45", c="fg")),
],
)
def test_do_fstring_regex_magic(fstrpattern, string, exceptions, expoutput):
if exceptions:
with pytest.raises(exceptions):
do_fstring_regex_magic(fstrpattern, string)
else:
output = do_fstring_regex_magic(fstrpattern, string)
assert output == expoutput
def update_locals(**kwargs):
raise NotImplemented
locals().update(**kwargs)