memory leak in python extension; mysqlclient & numpy

John Hunter jdhunter at ace.bsd.uchicago.edu
Tue Oct 8 13:35:44 EDT 2002


I am writing a python extension to load some data from a mysql
database directly into some numpy arrays.  There is a memory leak,
which I see when running top while repeatedly calling the function
get_quotes included below.  Can anyone offer some advice about where
this leak is occurring?

// get x[i] from a 1d numpy array of type xtype
#define get1d(x,i,xtype) \
*(xtype *)(x->data+i*x->strides[0])


typedef struct {
  PyObject_HEAD
  PyObject	*x_attr;	/* Attributes dictionary */
  MYSQL mysql,*sock;
} dbaseobject;


... snip ...

#define SELECT_QUERY \
"select price,volume,UNIX_TIMESTAMP(trade_date) from short_quote where ticker=\"%s\" "\
"and trade_date>\"%s\" and trade_date<\"%s\""

static PyObject *
dbase_get_quotes(dbaseobject *self, PyObject *args)
{
  dbaseobject *db = (dbaseobject *)self;

  MYSQL_RES *res;
  MYSQL_ROW row;
  char qbuf[512];
  char errstr[512];
  int dimensions[1];
  unsigned int count;
  PyArrayObject *p,*v,*t;
  char *ticker, *dateb, *datee;
  
  if (!PyArg_ParseTuple(args, "sss", &ticker,&dateb,&datee)) 
    return NULL;
  
  sprintf(qbuf, SELECT_QUERY, ticker, dateb, datee);

  if (mysql_query(db->sock,qbuf)) {
    sprintf(errstr, "Query failed:\n\t%s\n", qbuf);
    PyErr_SetString(PyExc_ValueError, errstr);
    return NULL;
  }

  if (!(res=mysql_store_result(db->sock))) {
    sprintf(errstr, "Couldn't get result from %s\n", mysql_error(db->sock));
    PyErr_SetString(PyExc_ValueError, errstr);
    return NULL;
  }
    
  dimensions[0] = mysql_num_rows(res);
  
  p = (PyArrayObject *)PyArray_FromDims(1,dimensions,PyArray_FLOAT);
  v = (PyArrayObject *)PyArray_FromDims(1,dimensions,PyArray_INT);
  t = (PyArrayObject *)PyArray_FromDims(1,dimensions,PyArray_LONG);
  
  count = 0;
  while ((row = mysql_fetch_row(res)) != NULL) {
    get1d(p,count,float) = atof(row[0]);
    get1d(v,count,int) = atoi(row[1]);
    //18000 is 5 hour offset to GMT
    get1d(t,count,long) = atoi(row[2]) - 18000;  
    //printf("%1.4f\t%d\n", atof(row[0]),atoi(row[1]));
    ++count;
  }
    
  mysql_free_result(res);
  return Py_BuildValue("(OOO)", p,v,t);
}


The script loops over a bunch of stock tickers and gets the results
for each ticker


import tradepy
from TradePy.Tickers import get_equity_tickers
from mx.DateTime import gmtime
import jdhscipy
from scipy import arange, shape, array, zeros, diff, Float

db = tradepy.Dbase('somehost, 'someuser, 'somepass', 'sometable')

tickers = get_equity_tickers()

dateb = '2002-01-01'
datee = '2002-10-05'
maxlag = 60*60    # 60 min 
binsize = 60      # 1 minute in seconds

for ticker in tickers:
  (prices,volumes,times) = db.get_quotes(ticker, dateb, datee)
  print 'Computing autocorr for %s with %d quotes' % (ticker, len(prices))




More information about the Python-list mailing list