Data Manipulation - Rows to Columns
bearophileHUGS at lycos.com
bearophileHUGS at lycos.com
Wed Feb 6 09:42:38 EST 2008
This is smells of homework. Here are few alternative solutions of mine
that I don't like. I presume a good teacher will refuse them all,
because no one of them uses the right tool :-) And every one of them
has some small problem (even if they work here).
data = """\
<item>TABLE</table>
<color>black</color>
<color>blue</color>
<color>red</color>
<item>CHAIR</table>
<color>yellow</color>
<color>black</color>
<color>red</color>
<item>SOFA</table>
<color>white</color>
<color>gray</color>
<color>pink</color>
"""
data2 = data.replace("<color>","").replace("</color>","").replace("</
table>","")
groups = [b.split() for b in data2.split("<item>") if b]
print groups
print
import re
data2 = re.sub(r"<color>|</color>|</table>", "", data)
groups = [b.split() for b in data2.split("<item>") if b]
print groups
print
import re
def splitter(data):
patt = re.compile(r"(?:<item>(.*)</table>)|(?:<color>(.*)</
color>)")
parts = []
for mo in patt.finditer(data):
p1, p2 = mo.groups()
if p1 is None:
parts.append(p2)
else:
if parts:
yield parts
parts = [p1]
if parts:
yield parts
print list(splitter(data))
print
def splitter2(items, predicate):
parts = []
for el in items:
if predicate(el):
parts.append(el)
else:
if parts:
yield parts
parts = [el]
if parts:
yield parts
import re
patt = re.compile(r"(?:<item>(.*)</table>)|(?:<color>(.*)</color>)")
xmobjects = (mo.groups() for mo in patt.finditer(data))
process = lambda group: [group[0][0]] + [part[1] for part in
group[1:]]
isstart = lambda (p1,p2): p1 is None
xgroups = (process(g) for g in splitter2(xmobjects, isstart))
print list(xgroups)
print
data2 = """
<item>TABLE</table>
<color>black</color>
<color>blue< / color>
<color>red</color>
<item>CHAIR</table>
<color>yellow</color>
<color>black</color>
<color>red</color>
<item>SOFA</table>
<color>white</color>
<color>gray</color>
< color > pink < / color >
"""
import re
patt = re.compile(r"""
\s* < \s* (item|color) \s* > \s*
(.*)
\s* < \s* / \s* (?:table|color) \s* > \s*
""", re.VERBOSE)
groups = []
for mo in patt.finditer(data2):
p1, p2 = mo.groups()
if p1 == "item":
groups.append([p2])
else:
groups[-1].append(p2)
print groups
print
Bye,
bearophile
More information about the Python-list
mailing list