Tar-like module?
Drew Csillag
drew_csillag at geocities.com
Fri Dec 21 09:20:01 EST 2001
On Thu, Dec 20, 2001 at 09:24:07PM -0500, Scott Fenton wrote:
> This may be an idiotic question, but is there a module
> to read tar files in python? I googled around and
> couldn't find anything, and the Python Std Library docs
> didn't have anything. Any help would be nice.
As far as I know there isn't, but I've written the following module
which I've been using.
This (or something like it) should probably be in the standard library as
this question comes up often enough. Guido...Hint... Hint...
Cheers,
Drew
"""Module for reading tar files"""
import string
import struct
def _checksum(ck, contents):
sum = 0L
for i in contents:
sum += 0xFFL & ord(i)
for i in contents[148:156]: #treat checksum bytes as spaces
sum -= 0xFFL & ord(i)
sum += ord(' ')
return sum == ck
def _cvtnulloctal(f, k=None):
zi = f.find('\0')
if zi > -1:
f = f[:zi]
try:
return string.atoi(f, 8)
except:
return 0
def _tarstr(f):
zi = f.find('\0')
if zi > -1:
f = f[:zi]
return f
def _tf(name, *args):
print name
def readTar(fileObj, tarEater=_tf):
"""Read through the contents of a tar archive
fileObj is a file (or file-like) object
tarEater is a callable object which takes the following arguments:
name, contents, size, mode, uid, gid, mtime
typeflag, linkname, uname, gname, devmaj, devmin
"""
while 1:
header = fileObj.read(512)
if len(header) != 512:
raise EOFError, 'Unexpected end of tar stream'
(name, mode, uid, gid, size, mtime, cksum, typeflag,
linkname, ustar_p, ustar_vsn, uname, gname, devmaj,
devmin, prefix) = struct.unpack(
'100s8s8s8s12s12s8s1s100s6s6s32s32s8s8s155s', header[:504])
name, linkname, uname, gname, prefix = map(_tarstr, (
name, linkname, uname, gname, prefix))
mode, uid, gid, size, mtime, cksum, devmaj, devmin = map(
_cvtnulloctal, (mode, uid, gid, size, mtime, cksum, devmaj,
devmin))
if name:
try:
typeflag = {
'': 'regular',
'0': 'regular',
'\0': 'regular',
'1': 'link',
'2': 'symbolic link',
'3': 'character special',
'4': 'block special',
'5': 'directory',
'6': 'fifo',
'7': 'reserved',
}[typeflag]
except:
raise KeyError, 'unknown file type in tar %X <%s> ' % (ord(typeflag), name)
blocks_to_read = size / 512
if size - (blocks_to_read * 512):
blocks_to_read += 1
contents = fileObj.read(blocks_to_read * 512)
contents = contents[:size]
if prefix:
name = prefix + name
if name:
ecount = 0
else:
ecount += 1
if ecount == 2:
break
if name: #null name fields are normal in tar files, so have to check
#here you would do whatever you wanted with the information
#in: name, linkname, uname, gname, mode, uid,gid,size,mtime,devmaj
#devmin, contents
if not _checksum(cksum, header):
raise ValueError, "checksum error -- %s" % name
tarEater(name, contents, size, mode, uid, gid, mtime,
typeflag, linkname, uname, gname, devmaj, devmin)
if __name__ == '__main__':
import sys
if sys.argv[1][-2:] == 'gz':
import gzip
readTar(gzip.GzipFile(sys.argv[1]))
else:
readTar(open(sys.argv[1]))
More information about the Python-list
mailing list