reading large file

guillaume g_alleon at yahoo.fr
Wed Sep 3 08:00:39 EDT 2003


I have to read and process a large ASCII file containing a mesh : a
list of points and triangles.
The file is 100 MBytes.
 
I first tried to do it in memory but I think I am running out of
memory therefore I decide to use the shelve
module to store my points and elements on disks.
Despite the fact it is slow ... Any hint ? I think I have the same
memory problem but I don't understand why
since  my aPoint should be removed by the gc.

Have you any idea ?

Thanks

Guillaume

PS :
here is the code for your info




import string
import os
import sys
import time
import resource
import shelve
import psyco
 
psyco.full()
 
class point:
  def __init__(self,x,y,z):
    self.x = x
    self.y = y
    self.z = z
    
 
def SFMImport(filename):
  print 'UNV Import ("%s")' % filename
 
  db = shelve.open('points.db')
 
  file = open(filename, "r")
 
  linenumber = 1
  nbpoints   = 0
  nbfaces    = 0
 
  pointList = []
  faceList  = []
 
  line  = file.readline()
  words = string.split(line)
  nbpoints = string.atoi(words[1])
  nbtrias  = string.atoi(words[0])
 
  print "found %s points and %s triangles" % (nbpoints, nbtrias)
 
  t1 = time.time()
  for i in range(nbpoints):
    line  = file.readline()
    words = string.split(line)
 
    x = string.atof(words[1].replace("D","E"))
    y = string.atof(words[2].replace("D","E"))
    z = string.atof(words[3].replace("D","E"))
 
    aPoint = point(x, y, z)
 
    as = "point%s" % i
 
    if (i%250000 == 0):
      print "%7d points <%s>" % (i, time.time() - t1)
      t1 = time.time()
 
    db[as] = aPoint
 
  print "%s points read in %s seconds" % (nbpoints, time.time() - t1)
  bd.close()
 
  t1 = time.time()
  t2 = time.time()
  for i in range(nbtrias):
    line  = file.readline()
    words = string.split(line)
 
    i1    = string.atoi(words[0])
    i2    = string.atoi(words[1])
    i3    = string.atoi(words[2])
 
    faceList.append((i1,i2,i3))
 
    if (i%100000 == 0):
      print "%s faces <%s>" % (i, time.time() - t1)
      t1 = time.time()
 
  print "%s points read in %s seconds" % (nbpoints, time.time() - t2)
 
  file.close()
 
def callback(fs):
    filename = fs.filename
    UNVImport(filename)

              
if __name__ == "__main__":
#    try:
#        import GUI
#    except:
#        print "This script is only working with the new GUI module
..."
#    else:
#        fs = GUI.FileSelector()
#        fs.activate(callback, fs)
  print sys.argv[0]
  SFMImport(sys.argv[1])




More information about the Python-list mailing list