[Pypi-checkins] r767 - trunk/pypi/tools
martin.von.loewis
python-checkins at python.org
Fri Jul 9 01:24:58 CEST 2010
Author: martin.von.loewis
Date: Fri Jul 9 01:24:57 2010
New Revision: 767
Added:
trunk/pypi/tools/integratestats (contents, props changed)
Modified:
trunk/pypi/tools/apache_stats.py
Log:
Add integratestats tool.
Modified: trunk/pypi/tools/apache_stats.py
==============================================================================
--- trunk/pypi/tools/apache_stats.py (original)
+++ trunk/pypi/tools/apache_stats.py Fri Jul 9 01:24:57 2010
@@ -37,13 +37,6 @@
current line. if the callable returns True,
the line is not included
"""
- if isinstance(fileobj, str):
- fileobj = self._get_file_obj(fileobj, 'w', compression)
- file_created = True
- else:
- file_created = False
-
- writer = csv.writer(fileobj)
downloads = {}
for log in self._get_logs(logfile, files_url):
if filter is not None:
@@ -58,6 +51,16 @@
downloads[key] += count
else:
downloads[key] = count
+ self._write_stats(fileobj, downloads)
+
+ def _write_stats(self, fileobj, downloads, compression=None):
+ if isinstance(fileobj, str):
+ fileobj = self._get_file_obj(fileobj, 'w', compression)
+ file_created = True
+ else:
+ file_created = False
+
+ writer = csv.writer(fileobj)
filenames = downloads.keys()
filenames.sort()
for key in filenames:
@@ -107,9 +110,17 @@
yield {'packagename': line[0],
'filename': line[1],
'useragent': line[2],
- 'count': line[3]}
+ 'count': int(line[3])}
#reader.close()
+ def read_stats_dict(self, stats_file):
+ res = {}
+ for r in self.read_stats(stats_file):
+ key = (r['packagename'], r['filename'], r['useragent'])
+ value = r['count']
+ res[key] = value
+ return res
+
def build_local_stats(self, year, month, day, logfile, directory=None):
"""builds local stats with default values"""
filename = '%d-%.2d-%.2d.bz2' % (year, month, day)
@@ -119,6 +130,27 @@
self.build_daily_stats(year, month, day, logfile, filename,
compression='bz2')
+ def integrate_stats(self, targetdir, year, month, day, fd):
+ new = self.read_stats_dict(fd)
+ oldpath = "%s/days/%s-%.2s-%.2s.bz2" % (targetdir, year, month, day)
+ if os.path.exists(oldpath):
+ old = self.read_stats_dict(oldpath)
+ for k, v in new.items():
+ old[k] = old.get(k, 0) + v
+ else:
+ old = new
+ self._write_stats(oldpath, old, 'bz2')
+ monthpath = "%s/months/%s-%.2s.bz2" % (targetdir, year, month)
+ if os.path.exists(monthpath):
+ old = self.read_stats_dict(monthpath)
+ for k, v in new.items():
+ old[k] = old.get(k, 0) + v
+ else:
+ old = new
+ self._write_stats(monthpath, old, 'bz2')
+ return new
+
+
class ApacheLocalStats(LocalStats):
"""concrete class that uses the ApacheLogReader"""
def _get_logs(self, logfile, files_url):
Added: trunk/pypi/tools/integratestats
==============================================================================
--- (empty file)
+++ trunk/pypi/tools/integratestats Fri Jul 9 01:24:57 2010
@@ -0,0 +1,65 @@
+#!/usr/bin/python
+import sys, os, socket, psycopg2, urllib, re, bz2, cStringIO, ConfigParser
+sys.path.append(os.path.dirname(__file__)+"/..")
+import apache_stats
+
+statsdir = '/data/pypi/stats/'
+
+def integrate(config, data):
+ # Setup database connection
+ c = ConfigParser.ConfigParser({'user':'', 'password':''})
+ c.read(config)
+ dbname = c.get('database', 'name')
+ dbuser = c.get('database', 'user')
+ dbpass = c.get('database', 'password')
+ dbconn = psycopg2.connect(database=dbname, user=dbuser, password=dbpass)
+ cursor = dbconn.cursor()
+ for (package, filename, browser), count in data.items():
+ cursor.execute('update release_files set downloads=downloads+%s where filename=%s',
+ (count, filename))
+ dbconn.commit()
+ dbconn.close()
+
+def integrate_remote(config, host, dbupdate=True):
+ index = urllib.urlopen('http://%s.pypi.python.org/local-stats/days' % host).read()
+ files = set(re.findall('href=.(20..-..-..).bz2', index))
+ try:
+ integrated = open('/data/pypi/stats/integrated/'+host).readlines()
+ integrated = set([x.strip() for x in integrated])
+ except IOError:
+ integrated = set()
+ missing = files-integrated
+ stats = apache_stats.LocalStats()
+ for m in missing:
+ data = urllib.urlopen('http://%s.pypi.python.org/local-stats/days/%s.bz2' % (host, m)).read()
+ data = bz2.decompress(data)
+ data = cStringIO.StringIO(data)
+ year, month, day = m.split('-')
+ # index integration
+ delta = stats.integrate_stats(statsdir, year, month, day, data)
+ if dbupdate:
+ # database integration
+ integrate(config, delta)
+ integrated.add(m)
+ open('/data/pypi/stats/integrated/'+host, 'w').write('\n'.join(sorted(integrated)))
+
+def main():
+ lasts = socket.gethostbyname_ex('last.pypi.python.org')
+ # look for name X.pypi.python.org
+ lasts = [lasts[0]] + lasts[1]
+ for last in lasts:
+ if last[1:] == '.pypi.python.org':
+ break
+ else:
+ raise ValueError, "Could not properly resolve last mirror name"
+ last = last.split('.')[0]
+ integrate_remote(None, 'a', False)
+ host = 'b'
+ while True:
+ integrate_remote(sys.argv[1], host)
+ host = chr(ord(host)+1)
+ if host == last:
+ break
+
+main()
+
More information about the Pypi-checkins
mailing list