<div dir="ltr">I would like some feedback on possible solutions to make this script run faster. <div style>The system is pegged at 100% CPU and it takes a long time to complete. </div><div style><br></div><div style><br></div>
<div style><div>#!/usr/bin/env python</div><div><br></div><div>import gzip</div><div>import re</div><div>import os</div><div>import sys</div><div>from datetime import datetime</div><div>import argparse</div><div><br></div>
<div>if __name__ == '__main__':</div><div> parser = argparse.ArgumentParser()</div><div> parser.add_argument('-f', dest='inputfile', type=str, help='data file to parse')</div><div> parser.add_argument('-o', dest='outputdir', type=str, default=os.getcwd(), help='Output directory')</div>
<div> args = parser.parse_args()</div><div><br></div><div> if len(sys.argv[1:]) < 1:</div><div> parser.print_usage()</div><div> sys.exit(-1)</div><div><br></div><div> print(args)</div><div> if args.inputfile and os.path.exists(args.inputfile):</div>
<div> try:</div><div> with gzip.open(args.inputfile) as datafile:</div><div> for line in datafile:</div><div> line = line.replace('<a href="http://mediacdn.xxx.com">mediacdn.xxx.com</a>', '<a href="http://media.xxx.com">media.xxx.com</a>')</div>
<div> line = line.replace('<a href="http://staticcdn.xxx.co.uk">staticcdn.xxx.co.uk</a>', '<a href="http://static.xxx.co.uk">static.xxx.co.uk</a>')</div><div> line = line.replace('cdn.xxx', 'www.xxx')</div>
<div> line = line.replace('cdn.xxx', 'www.xxx')</div><div> line = line.replace('cdn.xx', 'www.xx')</div><div> siteurl = line.split()[6].split('/')[2]</div>
<div> line = re.sub(r'\bhttps?://%s\b' % siteurl, "", line, 1)</div><div><br></div><div> (day, month, year, hour, minute, second) = (line.split()[3]).replace('[','').replace(':','/').split('/')</div>
<div> datelog = '{} {} {}'.format(month, day, year)</div><div> dateobj = datetime.strptime(datelog, '%b %d %Y')</div><div><br></div><div> outfile = '{}{}{}_combined.log'.format(dateobj.year, dateobj.month, dateobj.day)</div>
<div> outdir = (args.outputdir + os.sep + siteurl)</div><div><br></div><div> if not os.path.exists(outdir):</div><div> os.makedirs(outdir)</div><div><br></div>
<div>
with open(outdir + os.sep + outfile, 'w+') as outf:</div><div> outf.write(line)</div><div><br></div><div> except IOError, err:</div><div> sys.stderr.write("Error unable to read or extract inputfile: {} {}\n".format(args.inputfile, err))</div>
<div> sys.exit(-1)</div><div><br></div></div></div>