Precompiled regular expressions slower?

Peter Bienstman pbienst at MIT.EDU
Tue Feb 26 13:20:51 EST 2002


> Perhaps Peter should post the relevant code so we can see what is actually
> happening for ourselves?

Here is the relevant portion of the code:

###########################################################################
#
# Get all the information from input file.
#
###########################################################################

ymin, ymax = 0.0, 0.0

bands, x = 0, 0
sym_points, sym_labels = [], []
xtics, labels = [], []
data, kpts = {}, {}
bands_min, bands_max = {}, {}

recip = zeros([3,3], Float)
last_k = zeros([3], Float)

last_label, gap_label = "", ""

title = replace(strip(os.popen("pwd").readline()), "_", "-")

print "---- reading data from", input_fname, "..."

line = inputfile.readline()
while line:

  s = search(r"^gap (.+) with", line)
  if s: gap_label = "Gap: " + s.group(1)

  s = search(r"^Computing (\S+) bands", line)
  if s: bands = int(s.group(1))

  if search(r"^Reciprocal lattice vectors \(\/ 2 pi\):", line):
    for i in range(3):
      s = search(r"\((\S+), (\S+), (\S+)\)", inputfile.readline())
      recip[0,i] = float(s.group(1))
      recip[1,i] = float(s.group(2))
      recip[2,i] = float(s.group(3))

  s = search(r"Gnuplot Title: +(.+)$", line)
  if s: title = s.group(1)

  s = search(r"^Gap from band (\d+) \((\S+)\) to band (\d+) \((\S+)\)",
line)
  if s:
    bands_max[int(s.group(1))] = float(s.group(2))
    bands_min[int(s.group(3))] = float(s.group(4))

  s = search(r"\#\(\S+ +\S+ +\S+\) (\".+\") k-point$", line)
  if s:
    sym_labels.append(s.group(1))
    s = search(r"\#\((\S+) +(\S+) +(\S+)\)", line)
    sp = array([float(s.group(1)),float(s.group(2)),float(s.group(3))])
    sym_points.append(sp)

  s = search(r"^t?e?m?freqs:, (\d+), (\S+), (\S+), (\S+),", line)
  if s:
    ik = int(s.group(1)) - 1 # We always count from zero.
    k = array([float(s.group(2)),float(s.group(3)),float(s.group(4))])

    label = ""
    for i in range(len(sym_points)):
      if sum((sym_points[i] - k)**2) < 1e-6:
        label = sym_labels[i]
    
    if kpts.has_key(ik):
      x = kpts[ik]
    elif last_label and label == last_label:
      kpts[ik] = x # Do nothing, as this is the same point as the last
one.
    else:
      x += sqrt(sum(matrixmultiply(recip, k - last_k)**2))
      kpts[ik] = x
      if label:
        xtics.append(x)
        labels.append(label)
      
    data[ik] = zeros([bands], Float)
    f = string.split(line, ",")
    for n in range(6,len(f)):
      data[ik][n-6] = float(f[n])
    
    ymin = min(ymin, min(data[ik]))
    ymax = max(ymax, max(data[ik]))
    
    last_k     = k
    last_label = label
    
  line = inputfile.readline()







More information about the Python-list mailing list