Program inefficiency?

Grant Edwards grante at visi.com
Sat Sep 29 11:35:36 EDT 2007


> [...]
> the program works great except for one thing. It's significantly
> slower through the later files in the search then through the early
> ones... Before anyone criticizes, I recognize that that middle section
> could be simplified with a for loop... I just haven't cleaned it
> up...
>
> The problem is that the first 300 files take about 10-15 seconds and
> the last 300 take about 2 minutes... If we do more than about 1500
> files in one run, it just hangs up and never finishes...
>
> Is there a solution here that I'm missing? What am I doing that is so
> inefficient?

The only thing I see is that you compile all of the RE's every
time you call replacecycle().  They really only need to be
compiled once, but I don't know why that would cause the
progressive slowing.

FWIW, it seems to me like a shell+sed script would be the
obvious solution to the problem.

> # File: masseditor.py
>
> import re
> import os
> import time
>
> def massreplace():
>     editfile = open("pathname\editfile.txt")
>     filestring = editfile.read()
>     filelist = filestring.splitlines()
> ##    errorcheck = re.compile('(a name=)+(.*)(-)+(.*)(></a>)+')
>     for i in range(len(filelist)):
>         source = open(filelist[i])
>         starttext = source.read()
>         interimtext = replacecycle(starttext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         interimtext = replacecycle(interimtext)
>         finaltext = replacecycle(interimtext)
>         source.close()
>         source = open(filelist[i],"w")
>         source.write(finaltext)
>         source.close()
> ##        if errorcheck.findall(finaltext)!=[]:
> ##            print errorcheck.findall(finaltext)
> ##            print filelist[i]
>         if i == 100:
>             print "done 100"
>             print time.clock()
>         elif i == 300:
>             print "done 300"
>             print time.clock()
>         elif i == 600:
>             print "done 600"
>             print time.clock()
>         elif i == 1000:
>             print "done 1000"
>             print time.clock()
>     print "done"
>     print i
>     print time.clock()
>
> def replacecycle(starttext):
>     p1= re.compile('(href=|HREF=)+(.*)(#)+(.*)( )+(.*)(">)+')
>     p2= re.compile('(name=")+(.*)( )+(.*)(">)+')
>     p3= re.compile('(href=|HREF=)+(.*)(#)+(.*)(\')+(.*)(">)+')
>     p4= re.compile('(name=")+(.*)(\')+(.*)(">)+')
>     p5= re.compile('(href=|HREF=)+(.*)(#)+(.*)(-)+(.*)(">)+')
>     p6= re.compile('(name=")+(.*)(-)+(.*)(">)+')
>     p7= re.compile('(href=|HREF=)+(.*)(#)+(.*)(<)+(.*)(">)+')
>     p8= re.compile('(name=")+(.*)(<)+(.*)(">)+')
>     p7= re.compile('(href=|HREF=")+(.*)(#)+(.*)(:)+(.*)(">)+')
>     p8= re.compile('(name=")+(.*)(:)+(.*)(">)+')
>     p9= re.compile('(href=|HREF=")+(.*)(#)+(.*)(\?)+(.*)(">)+')
>     p10= re.compile('(name=")+(.*)(\?)+(.*)(">)+')
>     p100= re.compile('(a name=)+(.*)(-)+(.*)(></a>)+')
>     q1= r"\1\2\3\4_\6\7"
>     q2= r"\1\2_\4\5"
>     interimtext = p1.sub(q1, starttext)
>     interimtext = p2.sub(q2, interimtext)
>     interimtext = p3.sub(q1, interimtext)
>     interimtext = p4.sub(q2, interimtext)
>     interimtext = p5.sub(q1, interimtext)
>     interimtext = p6.sub(q2, interimtext)
>     interimtext = p7.sub(q1, interimtext)
>     interimtext = p8.sub(q2, interimtext)
>     interimtext = p9.sub(q1, interimtext)
>     interimtext = p10.sub(q2, interimtext)
>     interimtext = p100.sub(q2, interimtext)
>
>     return interimtext
>
> massreplace()
>


-- 
Grant Edwards                   grante             Yow!  Are you still
                                  at               SEXUALLY ACTIVE? Did you
                               visi.com            BRING th' REINFORCEMENTS?



More information about the Python-list mailing list