7z archive reader akin to zipfile?
Wanderer
wanderer at dialup4less.com
Sat Jan 6 11:20:44 EST 2018
On Wednesday, January 3, 2018 at 1:11:31 PM UTC-5, Skip Montanaro wrote:
> The zipfile module is kind of cool because you can access elements of
> the archive without explicitly uncompressing the entire archive and
> writing the structure to disk. I've got some 7z archives I'd like to
> treat the same way (read specific elements without first extractingg
> the entire tree to disk). I see the pylzma module for compressing and
> uncompressing files, but nothing slightly higher level. Does something
> like that exist?
>
> Thx,
>
> Skip
I made this wrapper class for 7zip. It might be useful for you.
#python wrapper for 7zip
import os
import zlib
from subprocess import Popen, PIPE
"""
p = Popen(['program', 'arg1'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
output, err = p.communicate(b"input data that is passed to subprocess' stdin")
rc = p.returncode
"""
SEVEN_ZIP_PATH = "C:/Program Files/7-Zip/7z.exe"
class SevenZip(object):
def __new__(cls, ZipProgram=SEVEN_ZIP_PATH):
if os.path.isfile(ZipProgram):
return super(SevenZip, cls).__new__(cls)
else:
raise ValueError("7zip program not found in %s" %(ZipProgram))
def __init__(self, ZipProgram=SEVEN_ZIP_PATH):
self.ZipProgram = ZipProgram
self.archive = None
self.outputDir = None
self.fileList = []
self.archiveType = "zip"
def call(self, cmdList=None):
""" Used by the other methods to call the 7zip command line.
Can be used directly to run 7zip if the wrapper methods don't suffice.
cmdList -- Subprocess style list of command line options with
the first item in the list being self.ZipProgram
"""
if cmdList is not None:
zip7 = Popen(cmdList, stdin=PIPE, stdout=PIPE, stderr=PIPE )
output ,err = zip7.communicate()
rc = zip7.returncode
print "output" , output
print "return code", rc
if len(err) > 0:
print "errors found", err
def modify(self, archive=None, fileList=None, cmd=None):
""" Modify an archive (add, delete or update)
[optional]
archive -- the zip file
fileList -- a list of file paths or a single filepath
cmd -- 'a': add, 'd': delete or 'u': update
"""
if not cmd in ['a','u','d']:
raise ValueError("Invalid command %s" %cmd)
if fileList is not None:
if type(fileList) is list:
self.fileList = fileList
else:
self.fileList = [fileList]
"""
for f in self.fileList:
if not (os.path.isfile(f) or os.path.isdir(f)):
raise ValueError("File %s not found" %f)
"""
if archive is not None:
self.archive = archive
if self.archive is not None:
if os.path.isfile(self.archive) or cmd == 'a':
cmdList = [self.ZipProgram, cmd, '-y']
if self.archiveType is not None:
cmdList.append("-t"+self.archiveType)
cmdList.append(self.archive)
cmdList.extend(self.fileList)
print cmdList
self.call(cmdList)
else:
raise ValueError("Archive not found in %s" %(self.archive))
def usage(self):
""" Returns the 7zip command line usage text.
These options can be accessed directly with call.
7-Zip [64] 9.20 Copyright (c) 1999-2010 Igor Pavlov 2010-11-18
Usage: 7z <command> [<switches>...] <archive_name> [<file_names>...]
[<@listfiles...>]
<Commands>
a: Add files to archive
b: Benchmark
d: Delete files from archive
e: Extract files from archive (without using directory names)
l: List contents of archive
t: Test integrity of archive
u: Update files to archive
x: eXtract files with full paths
<Switches>
-ai[r[-|0]]{@listfile|!wildcard}: Include archives
-ax[r[-|0]]{@listfile|!wildcard}: eXclude archives
-bd: Disable percentage indicator
-i[r[-|0]]{@listfile|!wildcard}: Include filenames
-m{Parameters}: set compression Method
-o{Directory}: set Output directory
-p{Password}: set Password
-r[-|0]: Recurse subdirectories
-scs{UTF-8 | WIN | DOS}: set charset for list files
-sfx[{name}]: Create SFX archive
-si[{name}]: read data from stdin
-slt: show technical information for l (List) command
-so: write data to stdout
-ssc[-]: set sensitive case mode
-ssw: compress shared files
-t{Type}: Set type of archive
-u[-][p#][q#][r#][x#][y#][z#][!newArchiveName]: Update options
-v{Size}[b|k|m|g]: Create volumes
-w[{path}]: assign Work directory. Empty path means a temporary directory
-x[r[-|0]]]{@listfile|!wildcard}: eXclude filenames
-y: assume Yes on all queries
"""
cmdList = [self.ZipProgram]
self.call(cmdList)
def add(self, archive=None, fileList=None):
""" Add a file or list of files to an archive
[optional]
archive -- the zip file
fileList -- a list of file paths or a single filepath
"""
self.modify(archive, fileList, 'a')
def benchmark(self, archive):
print "not implemented"
def delete(self, archive=None, fileList=None):
""" delete a file or list of files to an archive
[optional]
archive -- the zip file
fileList -- a list of file paths or a single filepath
"""
self.modify(archive, fileList, 'd')
def list(self, archive=None):
""" List the contents of an archive
[optional]
archive -- the zip file
"""
if archive is not None:
self.archive = archive
if self.archive is not None:
if os.path.isfile(self.archive):
cmdList = [self.ZipProgram, "l", self.archive]
self.call(cmdList)
else:
raise ValueError("Archive not found in %s" %(self.archive))
def test(self, archive=None):
""" Test an archive for errors
[optional]
archive -- the zip file
"""
if archive is not None:
self.archive = archive
if self.archive is not None:
if os.path.isfile(self.archive):
cmdList = [self.ZipProgram, "t", self.archive]
self.call(cmdList)
else:
raise ValueError("Archive not found in %s" %(self.archive))
def update(self, archive=None, fileList=None):
""" Update a file or list of files to an archive
only if the file does not exist or is newer than the existing file
[optional]
archive -- the zip file
fileList -- a list of file paths or a single filepath
"""
self.modify(archive, fileList, 'u')
def extract(self, archive=None, fullpath=True, outputDir=None):
""" extract the contents of an archive
[optional]
archive -- the zip file
fullpath -- extract with fullpaths
outputDir -- specify the output directory
"""
cmdList = [self.ZipProgram]
if fullpath:
cmdList.append('x')
else:
cmdList.append('e')
cmdList.append('-y')
if outputDir is not None:
self.outputDir = outputDir
if self.outputDir is not None:
cmdList.append('-o'+self.outputDir)
if archive is not None:
self.archive = archive
if self.archive is not None:
if os.path.isfile(self.archive):
cmdList.append(self.archive)
print cmdList
self.call(cmdList)
else:
raise ValueError("Archive not found in %s" %(self.archive))
def crc(self, archive=None):
""" Return the checksum of the archive
archive -- the zip file
"""
if archive is not None:
self.archive = archive
if self.archive is not None:
if os.path.isfile(self.archive):
prev = 0
f = open(self.archive,"rb")
for eachLine in f:
prev = zlib.crc32(eachLine, prev)
f.close()
return "%X"%(prev & 0xFFFFFFFF)
else:
raise ValueError("Archive not found in %s" %(self.archive))
More information about the Python-list
mailing list