Memory error due to big input file
sityee kong
skong1 at gmail.com
Mon Jul 13 13:10:00 EDT 2009
Hi All,
I have a similar problem that many new python users might encounter. I would
really appreciate if you could help me fix the error.
I have a big text file with size more than 2GB. It turned out memory error
when reading in this file. Here is my python script, the error occurred at
line -- self.fh.readlines().
import math
import time
class textfile:
def __init__(self,fname):
self.name=fname
self.fh=open(fname)
self.fh.readline()
self.lines=self.fh.readlines()
a=textfile("/home/sservice/nfbc/GenoData/CompareCalls3.diff")
lfile=len(a.lines)
def myfun(snp,start,end):
subdata=a.lines[start:end+1]
NEWmiss=0
OLDmiss=0
DIFF=0
for row in subdata:
k=row.split()
if (k[3]=="0/0") & (k[4]!="0/0"):
NEWmiss=NEWmiss+1
elif (k[3]!="0/0") & (k[4]=="0/0"):
OLDmiss=OLDmiss+1
elif (k[3]!="0/0") & (k[4]!="0/0"):
DIFF=DIFF+1
result.write(snp+" "+str(NEWmiss)+" "+str(OLDmiss)+" "+str(DIFF)+"\n")
result=open("Summary_noLoop_diff3.txt","w")
result.write("SNP NEWmiss OLDmiss DIFF\n")
start=0
snp=0
for i in range(lfile):
if (i==0): continue
after=a.lines[i].split()
before=a.lines[i-1].split()
if (before[0]==after[0]):
if (i!=(lfile-1)): continue
else:
end=lfile-1
myfun(before[0],start,end)
snp=snp+1
else:
end=i-1
myfun(before[0],start,end)
snp=snp+1
start=i
if (i ==(lfile-1)):
myfun(after[0],start,start)
snp=snp+1
result.close()
sincerely, phoebe
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20090713/79fc0e80/attachment.html>
More information about the Python-list
mailing list