[Pypi-checkins] r866 - trunk/pypi/tools
martin.von.loewis
python-checkins at python.org
Mon Sep 20 10:46:00 CEST 2010
Author: martin.von.loewis
Date: Mon Sep 20 10:46:00 2010
New Revision: 866
Added:
trunk/pypi/tools/mirrorlib.py (contents, props changed)
Log:
Create library to deal with PyPI mirrors.
Added: trunk/pypi/tools/mirrorlib.py
==============================================================================
--- (empty file)
+++ trunk/pypi/tools/mirrorlib.py Mon Sep 20 10:46:00 2010
@@ -0,0 +1,158 @@
+'''Library to support tools that access PyPI mirrors. The following
+functional areas are covered:
+- mirror selection (find_mirror)
+- mirror verification
+- key rollover
+'''
+
+################## Mirror Selection ##############################
+import socket, time, datetime, errno, select
+
+def _mirror_list(first):
+ '''Generator producing all mirror names'''
+ ord_a = ord('a')
+ last = socket.gethostbyname_ex('last.pypi.python.org')
+ cur_index = ord(first)-ord_a
+ cur = first+'.pypi.python.org'
+ while last[0] != cur:
+ yield cur, socket.gethostbyname(cur)
+ cur_index += 1
+ if cur_index < 26:
+ # a..z
+ cur = chr(ord_a+cur_index)
+ elif cur_index > 701:
+ raise ValueError, 'too many mirrors'
+ else:
+ # aa, ab, ... zz
+ cur = divmod(cur_index, 26)
+ cur = chr(ord_a-1+cur[0])+chr(ord_a+cur[1])
+ cur += '.pypi.python.org'
+ yield last[0], last[2][0]
+
+class _Mirror:
+ # status values:
+ # 0: wants to send
+ # 1: wants to recv
+ # 2: completed, ok
+ # 3: completed, failed
+ def __init__(self, name, ip):
+ self.name = name
+ self.ip = ip
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.socket.setblocking(0)
+ self.started = time.time()
+ try:
+ self.socket.connect((name, 80))
+ except socket.error, e:
+ if e.errno != errno.EINPROGRESS:
+ raise
+ # now need to select for writing
+ self.status = 0
+
+ def write(self):
+ self.socket.send('GET /last-modified HTTP/1.0\r\n'
+ 'Host: %s\r\n'
+ '\r\n' % self.name)
+ self.status = 1
+
+ def read(self):
+ data = self.socket.recv(1200)
+ self.response_time = time.time()-self.started
+ # response should be much shorter
+ assert len(data) < 1200
+ self.socket.close()
+ data = data.splitlines()
+ if data[0].split()[1] == '200':
+ # ok
+ data = data[-1]
+ try:
+ self.last_modified = datetime.datetime.strptime(data, "%Y%m%dT%H:%M:%S")
+ self.status = 2 # complete
+ except ValueError:
+ self.status = 3 # failed
+ else:
+ self.status = 3
+
+ def failed(self):
+ self.socket.close()
+ self.status = failed()
+
+ def results(self):
+ return self.name, self.ip, self.response_time, self.last_modified
+
+def _select(mirrors):
+ # perform select call on mirrors dictionary
+ rlist = []
+ wlist = []
+ xlist = []
+ for m in mirrors.values():
+ if m.status == 0:
+ wlist.append(m.socket)
+ xlist.append(m.socket)
+ elif m.status == 1:
+ rlist.append(m.socket)
+ xlist.append(m.socket)
+ rlist, wlist, xlist = select.select(rlist, wlist, xlist, 0)
+ completed = []
+ for s in wlist:
+ mirrors[s].write()
+ for s in rlist:
+ m = mirrors[s]
+ del mirrors[s]
+ m.read()
+ if m.status == 2:
+ completed.append(m)
+ for s in xlist:
+ mirrors[s].failed()
+ del mirrors[s]
+ return completed
+
+def _close(mirrors):
+ for m in mirrors:
+ m.close()
+
+def _newest(mirrors):
+ if not mirrors:
+ raise ValueError, "no mirrors found"
+ mirrors.sort(key=lambda m:m.last_modified)
+ return mirrors[-1].results()
+
+def find_mirror(start_with='a',
+ good_response_time = 1,
+ good_age = 30*60,
+ max_wait = 5):
+ '''find_mirror(start_with, good_response_time, good_age, max_wait) -> name, IP, response_time, last_modified
+ Find a PyPI mirror matching given criteria.
+ start_with indicates the first mirror that should be considered (defaults to 'a').
+ good_response_time is the maximum response time which lets this algorithm look no further;
+ likewise, good_age is the maximum age acceptable to the caller.
+ If this procedure goes on for longer than max_wait (default 5s), return even if
+ not all mirrors have been responding.
+ If no matching mirror can be found, the newest one that did response is returned.'''
+ started = time.time()
+ good_mirrors = []
+ pending_mirrors = {} # socket:mirror
+ good_last_modified = datetime.datetime.utcnow()-datetime.timedelta(seconds=good_age)
+ for host, ip in _mirror_list(start_with):
+ m = _Mirror(host, ip)
+ pending_mirrors[m.socket] = m
+ for m in _select(pending_mirrors):
+ if m.response_time < good_response_time and m.last_modified > good_last_modified:
+ _close(pending_mirrors)
+ return m.results()
+ else:
+ good_mirrors.append(m)
+
+ while pending_mirrors:
+ if time.time() > started+max_wait and good_mirrors:
+ # if we have looked for 5s for a mirror, and we already have one
+ # return the newest one
+ _close(pending)
+ return _newest(good_mirrors)
+ for m in _select(pending_mirrors):
+ if m.response_time < good_response_time and m.last_modified > good_last_modified:
+ _close(pending_mirrors)
+ return m.results()
+ else:
+ good_mirrors.append(m)
+ return _newest(good_mirrors)
More information about the Pypi-checkins
mailing list