[Tutor] Adding support for password protection to zipfile (long)

Charlie Clark charlie@begeistert.org
Fri May 16 10:01:34 2003


Dear list,

I've started looking at the source to Unzip and I think I've found the 
appropriate part:

#if CRYPT
                /* GRR:  yes, this is highly insecure, but dozens of people
                 * have pestered us for this, so here we go... */
                case ('P'):
                    if (negative) {   /* negative not allowed with -P 
passwd */
                        Info(slide, 0x401, ((char *)slide,
                          LoadFarString(MustGivePasswd)));
                        return(PK_PARAM);  /* don't extract here by 
accident */
                    }
                    if (uO.pwdarg != (char *)NULL) {
/*
                        GRR:  eventually support multiple passwords?
                        Info(slide, 0x401, ((char *)slide,
                          LoadFarString(OnlyOnePasswd)));
                        return(PK_PARAM);
 */
                    } else {
                        /* first check for "-Ppasswd", then for "-P passwd" 
*/
                        uO.pwdarg = s;
                        if (*uO.pwdarg == '\0') {
                            if (argc > 1) {
                                --argc;
                                uO.pwdarg = *++argv;
                                if (*uO.pwdarg == '-') {
                                    Info(slide, 0x401, ((char *)slide,
                                      LoadFarString(MustGivePasswd)));
                                    return(PK_PARAM);
                                }
                                /* else pwdarg points at decryption 
password */
                            } else {
                                Info(slide, 0x401, ((char *)slide,
                                  LoadFarString(MustGivePasswd)));
                                return(PK_PARAM);
                            }
                        }
                        /* pwdarg now points at decryption password 
(-Ppasswd or
                         *  -P passwd); point s at end of passwd to avoid 
mis-
                         *  interpretation of passwd characters as more 
options
                         */
                        if (*s != 0)
                            while (*++s != 0)
                                ;
                    }
                    break;

Now while I've never worked with C this looks doable to me.
Here's my attempt to turn this into Python

Assuming unzip is called like this
unzip -p secret bilder.zip
the Python unzip.py has the same interface
python unzip -p secret bilder.zip


if sys.argv[1] == "-P":
	if negative:		# what is negative?
	"""I think we check for a password here.
		This function is called often, let's give it a name
		call_info"""
		Info(slide, 0x401, ((char *)slide,		# what is slide?
                          LoadFarString(MustGivePasswd)) 
		return PK_PARAM 

	if sys.argv[2]:
		"""This is commented out but seems to refer to several password
			being supplied which is an error. It only uses
			call_info anyway"""
	
	if len(sys.argv[1]) > 2:
		"""check to see whether we have -P password or -Ppassword"""	
		password = sys.argv[1][2:]
		if not password:
			"""call_info is called again"""
	else:
		"""call_info is called again"""

As far as I can tell the work is done by this following:

#if CRYPT
# ifdef PASSWD_FROM_STDIN
        Info(slide, 0, ((char *)slide, LoadFarString(CompileOptFormat),
          LoadFarStringSmall(PasswdStdin)));
# endif
        Info(slide, 0, ((char *)slide, LoadFarString(Decryption),
          CR_MAJORVER, CR_MINORVER, CR_BETA_VER,
          LoadFarStringSmall(CryptDate)));
        ++numopts;
#endif /* CRYPT */

I think this is something like

if crypt:
	if password:
		"""check the format, assign the password"""
	"""check the encryption version"""

mm, apart from the fact that reading command line arguments seems to be a 
real pain in C compared to Python I think I'm getting somewhere although 
I've yet to find the part where we actually decrypt the password.

Looking at the source of zipfile we find the Zipfile class. As you only 
need the password to extract individual files, we only need to overwrite 
the read() or write() methods.

Rolling my own extension would be

class EncryptedZipfile(zipfile.ZipFile):
    def __init__(self, password=None):
		zipfile.ZipFile.__init__(self)
		self.password = password


	This is the source for Zipfile.read()
    def read(self, name):
        """Return file bytes (as a string) for name."""
        if self.mode not in ("r", "a"):
            raise RuntimeError, 'read() requires mode "r" or "a"'
        if not self.fp:
            raise RuntimeError, \
                  "Attempt to read ZIP archive that was already closed"
		if self.password:
			# do the decryption thing
        zinfo = self.getinfo(name)
        filepos = self.fp.tell()
        self.fp.seek(zinfo.file_offset, 0)
        bytes = self.fp.read(zinfo.compress_size)
        self.fp.seek(filepos, 0)
        if zinfo.compress_type == ZIP_STORED:
            pass
        elif zinfo.compress_type == ZIP_DEFLATED:
            if not zlib:
                raise RuntimeError, \
                      "De-compression requires the (missing) zlib module"
            # zlib compress/decompress code by Jeremy Hylton of CNRI
            dc = zlib.decompressobj(-15)
            bytes = dc.decompress(bytes)
            # need to feed in unused pad byte so that zlib won't choke
            ex = dc.decompress('Z') + dc.flush()
            if ex:
                bytes = bytes + ex
        else:
            raise BadZipfile, \
                  "Unsupported compression method %d for file %s" % \
            (zinfo.compress_type, name)
        crc = binascii.crc32(bytes)
        if crc != zinfo.CRC:
            raise BadZipfile, "Bad CRC-32 for file %s" % name
        return bytes

I'm not sure whether I'll ever succeed with this but I'd like to try. Any 
comments on my analysis or assumptions are welcome, please cc: to me as I'm 
a digest reader.

Thanx

Charlie