[pypy-svn] r56511 - in pypy/dist/pypy/lib: . test2

hruske at codespeak.net hruske at codespeak.net
Sun Jul 13 16:01:24 CEST 2008


Author: hruske
Date: Sun Jul 13 16:01:24 2008
New Revision: 56511

Added:
   pypy/dist/pypy/lib/_hashlib.py
   pypy/dist/pypy/lib/test2/test_hashlib.py
Modified:
   pypy/dist/pypy/lib/hashlib.py
Log:

Adding my _hashlib implementation based on _hashopenssl.c, works on Linux,
on OS X only limited functionality since the OS X bundled library only
provides MD5 and SHA1 hashes. Also worth noting is that MD5 and SHA1 will
always work. Also adding unmodified test_hashlib.py from Python 2.5


Added: pypy/dist/pypy/lib/_hashlib.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/lib/_hashlib.py	Sun Jul 13 16:01:24 2008
@@ -0,0 +1,160 @@
+from ctypes import *
+import ctypes.util
+
+# Note: OpenSSL on OS X only provides md5 and sha1
+libpath = ctypes.util.find_library('ssl')
+lib = CDLL(libpath) # Linux, OS X
+
+
+# FIXME do we really need this anywhere here?
+class ENV_MD(Structure):
+	# XXX Should there be more to this object?.
+	_fields_ = [
+		('type', c_int),
+		('pkey_type', c_int),
+		('md_size', c_int),
+	]
+
+class _dummy_env_md(Structure):
+	# XXX used for OS X, a bit hairy
+	_fields_ = [
+		('digest', ENV_MD),
+		('two', c_int),
+		('three', c_int),
+		('four', c_int),
+		('five', c_int),
+		]
+
+def _new_ENV_MD():
+	return _dummy_env_md()
+
+# OpenSSL initialization
+lib.OpenSSL_add_all_digests()
+
+def _get_digest(ctx):
+	return ctx.digest
+
+# taken from evp.h, max size is 512 bit, 64 chars
+lib.EVP_MAX_MD_SIZE = 64
+
+class hash(object):
+	"""
+	A hash represents the object used to calculate a checksum of a
+	string of information.
+	
+	Methods:
+	
+	update() -- updates the current digest with an additional string
+	digest() -- return the current digest value
+	hexdigest() -- return the current digest as a string of hexadecimal digits
+	copy() -- return a copy of the current hash object
+	
+	Attributes:
+	
+	name -- the hash algorithm being used by this object
+	digest_size -- number of bytes in this hashes output
+	"""
+	def __init__(self, obj, name):
+		self.name = name # part of API
+		#print 'obj is ', obj
+		if isinstance(obj, _dummy_env_md):
+			self._obj = obj.digest
+		else:
+			self._obj = obj  # private
+	
+	def __repr__(self):
+		# format is not the same as in C module
+		return "<%s HASH object>" % (self.name)
+	
+	def copy(self):
+		"Return a copy of the hash object."
+		ctxnew = _new_ENV_MD()
+		lib.EVP_MD_CTX_copy(byref(ctxnew), byref(self._obj))
+		return hash(ctxnew, self.name)
+	
+	def hexdigest(self):
+		"Return the digest value as a string of hexadecimal digits."
+		dig = self.digest()
+		a = []
+		for x in dig:
+			a.append('%.2x' % ord(x))
+		#print '\n--- %r \n' % ''.join(a)
+		return ''.join(a)
+	
+	def digest(self):
+		"Return the digest value as a string of binary data."
+		tmpctx = self.copy()
+		digest_size = tmpctx.digest_size
+		dig = create_string_buffer(lib.EVP_MAX_MD_SIZE)
+		lib.EVP_DigestFinal(byref(tmpctx._obj), dig, None)
+		lib.EVP_MD_CTX_cleanup(byref(tmpctx._obj))
+		return dig.raw[:digest_size]
+	
+	def digest_size(self):
+		# XXX This isn't the nicest way, but the EVP_MD_size OpenSSL function
+		# XXX is defined as a C macro on OS X and would be significantly 
+		# XXX harder to implement in another way.
+		# Values are digest sizes in bytes
+		return {
+			'md5': 16,
+			'sha1': 20,
+			'sha224': 28,
+			'sha256': 32,
+			'sha384': 48,
+			'sha512': 64,
+			}.get(self.name, 0)
+	digest_size = property(digest_size, None, None) # PEP 247
+	digestsize = digest_size # deprecated, was once defined by sha module
+	
+	def block_size(self):
+		return lib.EVP_MD_CTX_block_size(byref(self._obj))
+	block_size = property(block_size, None, None)
+	
+	def update(self, string):
+		"Update this hash object's state with the provided string."
+		lib.EVP_DigestUpdate(byref(self._obj), c_char_p(string), c_uint(len(string)))
+
+def new(name, string=''):
+	"""
+	Return a new hash object using the named algorithm.
+	An optional string argument may be provided and will be
+	automatically hashed.
+	
+	The MD5 and SHA1 algorithms are always supported.
+	"""
+	digest = lib.EVP_get_digestbyname(c_char_p(name))
+	
+	if not isinstance(name, str):
+		raise TypeError("name must be a string")
+	if not digest:
+		raise ValueError("unknown hash function")
+	
+	ctx = _new_ENV_MD()
+	lib.EVP_DigestInit(pointer(ctx), digest)
+	
+	h = hash(_get_digest(ctx), name)
+	if string:
+		if not isinstance(string, str):
+			raise ValueError("hash content is not string")
+		h.update(string)
+	return hash(ctx, name)
+
+# shortcut functions
+def openssl_md5(string=''):
+	return new('md5', string)
+
+def openssl_sha1(string=''):
+	return new('sha1', string)
+
+def openssl_sha224(string=''):
+	return new('sha224', string)
+
+def openssl_sha256(string=''):
+	return new('sha256', string)
+
+def openssl_sha384(string=''):
+	return new('sha384', string)
+
+def openssl_sha512(string=''):
+	return new('sha512', string)
+

Modified: pypy/dist/pypy/lib/hashlib.py
==============================================================================
--- pypy/dist/pypy/lib/hashlib.py	(original)
+++ pypy/dist/pypy/lib/hashlib.py	Sun Jul 13 16:01:24 2008
@@ -114,6 +114,7 @@
     del funcName
 
 except ImportError:
+    raise # XXX Don't try to load nonexistent C modules on PyPy
     # We don't have the _hashlib OpenSSL module?
     # use the built in legacy interfaces via a wrapper function
     new = __py_new
@@ -121,7 +122,7 @@
     # lookup the C function to use directly for the named constructors
     md5 = __get_builtin_constructor('md5')
     sha1 = __get_builtin_constructor('sha1')
-    #sha224 = __get_builtin_constructor('sha224')
-    #sha256 = __get_builtin_constructor('sha256')
-    #sha384 = __get_builtin_constructor('sha384')
-    #sha512 = __get_builtin_constructor('sha512')
+    sha224 = __get_builtin_constructor('sha224')
+    sha256 = __get_builtin_constructor('sha256')
+    sha384 = __get_builtin_constructor('sha384')
+    sha512 = __get_builtin_constructor('sha512')

Added: pypy/dist/pypy/lib/test2/test_hashlib.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/lib/test2/test_hashlib.py	Sun Jul 13 16:01:24 2008
@@ -0,0 +1,191 @@
+# Test hashlib module
+#
+# $Id: test_hashlib.py 39316 2005-08-21 18:45:59Z greg $
+#
+#  Copyright (C) 2005   Gregory P. Smith (greg at electricrain.com)
+#  Licensed to PSF under a Contributor Agreement.
+#
+
+import hashlib
+import unittest
+from test import test_support
+
+
+def hexstr(s):
+    import string
+    h = string.hexdigits
+    r = ''
+    for c in s:
+        i = ord(c)
+        r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
+    return r
+
+
+class HashLibTestCase(unittest.TestCase):
+    supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1',
+                             'sha224', 'SHA224', 'sha256', 'SHA256',
+                             'sha384', 'SHA384', 'sha512', 'SHA512' )
+
+    def test_unknown_hash(self):
+        try:
+            hashlib.new('spam spam spam spam spam')
+        except ValueError:
+            pass
+        else:
+            self.assert_(0 == "hashlib didn't reject bogus hash name")
+
+    def test_hexdigest(self):
+        for name in self.supported_hash_names:
+            h = hashlib.new(name)
+            self.assert_(hexstr(h.digest()) == h.hexdigest())
+
+
+    def test_large_update(self):
+        aas = 'a' * 128
+        bees = 'b' * 127
+        cees = 'c' * 126
+
+        for name in self.supported_hash_names:
+            m1 = hashlib.new(name)
+            m1.update(aas)
+            m1.update(bees)
+            m1.update(cees)
+
+            m2 = hashlib.new(name)
+            m2.update(aas + bees + cees)
+            self.assertEqual(m1.digest(), m2.digest())
+
+
+    def check(self, name, data, digest):
+        # test the direct constructors
+        computed = getattr(hashlib, name)(data).hexdigest()
+        self.assert_(computed == digest)
+        # test the general new() interface
+        computed = hashlib.new(name, data).hexdigest()
+        self.assert_(computed == digest)
+
+
+    def test_case_md5_0(self):
+        self.check('md5', '', 'd41d8cd98f00b204e9800998ecf8427e')
+
+    def test_case_md5_1(self):
+        self.check('md5', 'abc', '900150983cd24fb0d6963f7d28e17f72')
+
+    def test_case_md5_2(self):
+        self.check('md5', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
+                   'd174ab98d277d9f5a5611c2c9f419d9f')
+
+
+    # use the three examples from Federal Information Processing Standards
+    # Publication 180-1, Secure Hash Standard,  1995 April 17
+    # http://www.itl.nist.gov/div897/pubs/fip180-1.htm
+
+    def test_case_sha1_0(self):
+        self.check('sha1', "",
+                   "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+
+    def test_case_sha1_1(self):
+        self.check('sha1', "abc",
+                   "a9993e364706816aba3e25717850c26c9cd0d89d")
+
+    def test_case_sha1_2(self):
+        self.check('sha1', "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+                   "84983e441c3bd26ebaae4aa1f95129e5e54670f1")
+
+    def test_case_sha1_3(self):
+        self.check('sha1', "a" * 1000000,
+                   "34aa973cd4c4daa4f61eeb2bdbad27316534016f")
+
+
+    # use the examples from Federal Information Processing Standards
+    # Publication 180-2, Secure Hash Standard,  2002 August 1
+    # http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
+
+    def test_case_sha224_0(self):
+        self.check('sha224', "",
+          "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f")
+
+    def test_case_sha224_1(self):
+        self.check('sha224', "abc",
+          "23097d223405d8228642a477bda255b32aadbce4bda0b3f7e36c9da7")
+
+    def test_case_sha224_2(self):
+        self.check('sha224',
+          "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+          "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525")
+
+    def test_case_sha224_3(self):
+        self.check('sha224', "a" * 1000000,
+          "20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67")
+
+
+    def test_case_sha256_0(self):
+        self.check('sha256', "",
+          "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
+
+    def test_case_sha256_1(self):
+        self.check('sha256', "abc",
+          "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad")
+
+    def test_case_sha256_2(self):
+        self.check('sha256',
+          "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+          "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1")
+
+    def test_case_sha256_3(self):
+        self.check('sha256', "a" * 1000000,
+          "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0")
+
+
+    def test_case_sha384_0(self):
+        self.check('sha384', "",
+          "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da"+
+          "274edebfe76f65fbd51ad2f14898b95b")
+
+    def test_case_sha384_1(self):
+        self.check('sha384', "abc",
+          "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed"+
+          "8086072ba1e7cc2358baeca134c825a7")
+
+    def test_case_sha384_2(self):
+        self.check('sha384',
+                   "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"+
+                   "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu",
+          "09330c33f71147e83d192fc782cd1b4753111b173b3b05d22fa08086e3b0f712"+
+          "fcc7c71a557e2db966c3e9fa91746039")
+
+    def test_case_sha384_3(self):
+        self.check('sha384', "a" * 1000000,
+          "9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b"+
+          "07b8b3dc38ecc4ebae97ddd87f3d8985")
+
+
+    def test_case_sha512_0(self):
+        self.check('sha512', "",
+          "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce"+
+          "47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e")
+
+    def test_case_sha512_1(self):
+        self.check('sha512', "abc",
+          "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a"+
+          "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f")
+
+    def test_case_sha512_2(self):
+        self.check('sha512',
+                   "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"+
+                   "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu",
+          "8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299aeadb6889018"+
+          "501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26545e96e55b874be909")
+
+    def test_case_sha512_3(self):
+        self.check('sha512', "a" * 1000000,
+          "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+
+          "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b")
+
+
+def test_main():
+    test_support.run_unittest(HashLibTestCase)
+
+
+if __name__ == "__main__":
+    test_main()



More information about the Pypy-commit mailing list