[Chicago] Python Development in Chicago
Kevin L. Stern
kevin.l.stern at gmail.com
Fri Oct 26 16:07:55 CEST 2007
I'm doing some research in graph theory and I have a set of graphs (known as
the AT&T set) that is in a seemingly proprietary format - I want to swap it
into the graphml XML format. I hacked together a little Python script for
this. Would you folks say that this is 'pythonic', or does this look
newbie'ish?
____________________________________________________________________
import re, mmap, os
class token:
def __init__(self):
self.type = None
self.data = None
class tokenizer:
def __init__(self, inmap, out):
self.inmap = inmap
self.out = out
def nextToken(self):
line = inmap.readline()
if re.search("^graph\s.*\s{$", line):
ident = line[6:len(line)-3]
result = token()
result.type = 'graph'
result.data = ident
return result
elif re.search("^\s*subgraph.*{$", line):
parse = re.search("subgraph\s.*\s{$", line).group()
ident = line[10:len(line)-3]
result = token()
result.type = 'subgraph'
result.data = ident
return result
elif re.search("^\s*}$", line):
result = token()
result.type = 'endgroup'
return result
elif re.search("^\s*n\d+\s--\sn\d+;$", line):
parse = re.search("n\d+\s--\sn\d+", line).group()
split = parse.partition('--')
first = re.search("\d+", split[0]).group()
last = re.search("\d+", split[2]).group()
result = token()
result.type = 'edge'
result.data = [first,last]
return result
return None
def processToken(self, t):
if not t:
return
if t.type == 'graph':
self.out.write('<graph id="%s">\n' % t.data)
elif t.type == 'subgraph':
self.out.write('<graph id="%s">\n' % t.data)
self.sg += 1
elif t.type == 'endgroup':
self.out.write('</graph>\n')
if self.sg > 0:
self.sg -= 1
elif t.type == 'edge':
self.out.write('<edge source="%s" target="%s"/>\n' %
(t.data[0], t.data[1]))
def go(self):
self.sg = 0
self.out.write("""<?xml version="1.0" encoding="UTF-8"?>
<graphml>
""")
while self.inmap.tell() < self.inmap.size():
lex.processToken(lex.nextToken())
self.out.write("</graphml>")
try:
infile = "ug.txt"
insize = os.path.getsize(infile)
fd = open(infile, "r+")
inmap = mmap.mmap(fd.fileno(), insize, None, mmap.ACCESS_READ)
outfile = "out.txt"
out = open(outfile, "r+")
lex = tokenizer(inmap, out)
lex.go()
except IOError:
print "IO Error Occurred"
finally:
inmap.close()
out.close()
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/chicago/attachments/20071026/c3a36cd5/attachment.htm
More information about the Chicago
mailing list