# Copyright (C) 1998,1999,2000 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """Portable (?) file locking with timeouts. This code should work reliably with all versions of NFS. Based on the original LockFile.py, of which the algorithm was supposedly suggested by the GNU/Linux open() man page, and Harald Meland's LockFile.py, of which the algorithm was based on Exim's locking style. The algorithm used here is originally taken from qmail. Make sure no malicious people have access to link() to the lock file. """ import socket, os, time import string import errno from stat import ST_NLINK, ST_INO # default intervals are both specified in seconds, and can be floating point # values. DEFAULT_HUNG_TIMEOUT specifies the default length of time that a # lock is expecting to hold the lock -- this can be set in the constructor, # or changed via a mutator. DEFAULT_SLEEP_INTERVAL is the average amount of # time to sleep before checking the lock's status, if we were not the # winning claimant the previous time around. The sleep interval is modified # based on the process-id to prevent unwanted stride effects. DEFAULT_LOCK_LIFETIME = 15 DEFAULT_SLEEP_INTERVAL = 0.75 from Mailman.Logging.StampedLogger import StampedLogger _logfile = None # exceptions which can be raised class LockError(Exception): """Base class for all exceptions in this module.""" pass class AlreadyLockedError(LockError): """Raised when a lock is attempted on an already locked object.""" pass class NotLockedError(LockError): """Raised when an unlock is attempted on an objec that isn't locked.""" pass class TimeOutError(LockError): """Raised when a lock was attempted, but did not succeed in the given amount of time. """ pass class StaleLockFileError(LockError): """Raised when a stale hardlink lock file was found.""" pass class LockFile: """A portable way to lock resources by way of the file system.""" def __init__(self, lockfile, lifetime=DEFAULT_LOCK_LIFETIME, sleep_interval=DEFAULT_SLEEP_INTERVAL, withlogging=0): """Creates a lock file using the specified file. lifetime is the maximum length of time expected to keep this lock. This value is written into the lock file so that other claimants on the lock know when it is safe to steal the lock, should the lock holder be wedged. sleep_interval is how often to wake up and check the lock file """ self.__lockfile = lockfile self.__lifetime = lifetime self.__sleep_interval = sleep_interval + (os.getpid()%32 - 16) / 100. self.__tmpfname = "%s.%s.%d" % (lockfile, socket.gethostname(), os.getpid()) self.__withlogging = withlogging self.__logprefix = os.path.split(self.__lockfile)[1] self.__writelog("sleep_interval = %f"%self.__sleep_interval) def set_lifetime(self, lifetime): """Reset the lifetime of the lock. Takes affect the next time the file is locked. """ self.__lifetime = lifetime def __writelog(self, msg): global _logfile if self.__withlogging: if not _logfile: _logfile = StampedLogger('locks', manual_reprime=0) _logfile.write('%s %s\n' % (self.__logprefix, msg)) def refresh(self, newlifetime=None): """Refresh the lock. This writes a new release time into the lock file. Use this if a process suddenly realizes it needs more time to do its work. With optional newlifetime, this resets the lock lifetime value too. NotLockedError is raised if we don't already own the lock. """ if not self.locked(): raise NotLockedError if newlifetime is not None: self.set_lifetime(newlifetime) self.__refresh() def __refresh(self): """Internal function to refresh the lock. Does not check for lock. Only call while holding the lock!""" tmpfname = self.__tmpfname + ".tmp" tmplock = self.__lockfile + ".tmp" self.__write(tmpfname) if os.path.exists(tmplock): os.unlink(tmplock) os.link(tmpfname, tmplock) os.rename(tmplock, self.__lockfile) os.rename(tmpfname, self.__tmpfname) self.__writelog('lock lifetime refreshed for %d seconds' % self.__lifetime) def __del__(self): if self.locked(): self.unlock() def __write(self, filename = None): # we expect to release our lock some time in the future. we want to # give other claimants some clue as to when we think we're going to be # done with it, so they don't try to steal it out from underneath us # unless we've actually been wedged. lockrelease = time.time() + self.__lifetime # make sure it's group writable oldmask = os.umask(002) if not filename: filename = self.__tmpfname try: fp = open(filename, 'w') fp.write('%d %s %f\n' % (os.getpid(), self.__tmpfname, lockrelease)) fp.close() finally: os.umask(oldmask) def __read(self): # can raise ValueError in two situations: # # either first element wasn't an integer (a valid pid), or we didn't # get a sequence of the right size from the string.split. Either way, # the data in the file is bogus, but this is caught and handled higher # up. # # NOTE: Reads the real lockfile now, not the symlink. try: fp = open(self.__lockfile, 'r') pid, winner, lockrelease = string.split(string.strip(fp.read())) except IOError, err: if err.errno == errno.ENOENT: raise ValueError # IOerror, but not 'ENOENT'. 'permission denied' ? raise return int(pid), winner, float(lockrelease) def __break(self): # We apparently have had enough of it, we wish to break the lock. try: winner = None try: pid, winner, lockrelease = self.__read() except ValueError: pass os.unlink(self.__lockfile) if winner: os.unlink(winner) except OSError, err: if err.errno <> errno.ENOENT: raise # Note that no one new can grab the lock once we've opened our tmpfile # until we close it, even if we don't have the lock. So checking the PID # and stealing the lock are guaranteed to be atomic. def lock(self, timeout=0): """Blocks until the lock can be obtained. Raises a TimeOutError exception if a positive timeout value is given and that time elapses before the lock is obtained. This can possibly steal the lock from some other claimant, if the lock lifetime that was written to the file has been exceeded. Note that for this to work across machines, the clocks must be sufficiently synchronized. """ if self.locked(): self.__writelog('already locked') raise AlreadyLockedError if timeout: timeout_time = time.time() + timeout last_pid = -1 # loopcount is used to determine wether we should check the lockfile, # after a lock fails. We start with -1, so the lockfile gets checked # the first time through the loop. loopcount = -1 self.__write() # Make sure my lockfile exists, and time is up to date while 1: loopcount = loopcount + 1 # create the hard link and test for exactly 2 links to the file try: os.link(self.__tmpfname, self.__lockfile) except OSError, err: if err.errno == errno.ENOENT: # Our own lockfile vanished ?! # Re-create it, and try again. Sleep anyway, so we dont # unintendedly hog the machine. self.__writelog("own lockfile missing.") self.__write() self.__sleep() continue elif err.errno == errno.EEXIST: # We failed to get the lock. # We want to test the contents of the lock, see if it's # bogus or timed out, but we dont want to do that each # time through. if loopcount%10: self.__writelog("lock failed; skipping checks.") self.__sleep() continue else: try: os.unlink(self.__tmpfname) except: pass raise OSError, err if os.stat(self.__tmpfname)[ST_NLINK] == 2: # we have the lock (since there are no other links to the lock # file), so we can rehydrate our marker self.__writelog("got the lock") self.__refresh() if not self.locked(): self.__writelog("false positive on lock") continue break # we didn't get the lock this time. let's see if we timed out if timeout and timeout_time < time.time(): os.unlink(self.__tmpfname) self.__writelog('timed out') raise TimeOutError # someone else must have gotten the lock. let's find out who it # is. if there is some bogosity in the lock file's data then we # will break the lock. try: pid, winner, lockrelease = self.__read() except ValueError: self.__writelog("breaking lock (fawlty lockfile)") self.__break() self.__sleep() continue # here's where we potentially break the lock. If the lock has # timed out (the time written in by the original locker has come # and gone) then we let __break() deal with it. if lockrelease < time.time(): self.__writelog("breaking lock (too old)") self.__break() self.__sleep() continue # okay, someone else has the lock, we didn't steal it, and our # claim hasn't timed out yet. So let's wait a while for the owner # of the lock to give it up. self.__writelog('waiting for claim') self.__sleep() def __sleep(self): time.sleep(self.__sleep_interval) # This could error if the lock is stolen. You must catch it. def unlock(self): """Unlock the lock. If we don't already own the lock (either because of unbalanced unlock calls, or because the lock was stolen out from under us), raise a NotLockedError. """ if not self.locked(): raise NotLockedError try: os.unlink(self.__tmpfname) os.unlink(self.__lockfile) except OSError, err: if err.errno <> errno.ENOENT: raise self.__writelog('unlocked') def locked(self): """Returns 1 if we own the lock, 0 if we do not.""" if not os.path.exists(self.__tmpfname): return 0 if not os.path.exists(self.__lockfile): return 0 if os.stat(self.__tmpfname)[ST_NLINK] < 2: return 0 # The above check for ST_NLINK ought to be enough for everyone. # Regardless, we're very paranoid; we check the lockfile data. # If you are optimising for speed, you can 'return 1' here try: pid, winner, lockrelease = self.__read() except ValueError: # Something strange is going on. Either our own lockfile is corrupt # or someone link()ed to our lockfile, making us believe we have # the lock. if os.stat(self.__tmpfname)[ST_INO] <> os.stat(self.__lockfile)[ST_INO]: # We do not really have the lock. unlink so we get a new inode os.unlink(self.__tmpfname) self.__writelog("linkcount was 2, but we dont have the lock!") return 0 else: # It really is our lock, but the contents are corrupted. self.__refresh() self.__writelog("linkcount was 2, but our lockfile data was odd!") return 1 if pid <> os.getpid(): self.__writelog("lock seemed ok, but data wasn't our data.") return 0 return 1 # use with caution!!! def steal(self): """Explicitly break the lock. USE WITH CAUTION!""" self.__writelog("stealing the lock...") # We assume someone else sucessfully has the lock, so we try and # steal it from them. There is a race-condition involved here: # multiple processes can be attempting to steal the lock. Hence the # sleep and check if we are actually locked. # XXX Should NotLockedError be raised instead of returning 0 ? try: pid, winner, timeout = self.__read() except ValueError: winner = None if winner and os.path.exists(winner): # Someone has the lock. We steal their private lockfile, both # to clean up after us, and to steal the lock as smoothlessly as # possible. Only another process attempting a steal() or break() # could interfere, which is why we sleep after stealing. os.rename(winner, self.__tmpfname) self.__refresh() else: # The file seemed unlocked, so we bluntly lock it. Not really # proper, as we do not check if someone else had just grabbed the # lock. self.__write() os.link(self.__tmpfname, self.__lockfile + ".tmp") os.rename(self.__lockfile + ".tmp", self.__lockfile) self.__sleep() if self.locked(): self.__writelog("succesfully stolen!") return 1 else: self.__writelog("failed to steal the lock.") return 0