[Python-checkins] [3.8] bpo-38153: Normalize hashlib algorithm names (GH-16083) (GH-16144)

Christian Heimes webhook-mailer at python.org
Mon Sep 16 08:08:59 EDT 2019


https://github.com/python/cpython/commit/e8d7fa2db8a6c641019b06943852492f24ac3e69
commit: e8d7fa2db8a6c641019b06943852492f24ac3e69
branch: 3.8
author: Christian Heimes <christian at python.org>
committer: GitHub <noreply at github.com>
date: 2019-09-16T14:08:55+02:00
summary:

[3.8] bpo-38153: Normalize hashlib algorithm names (GH-16083) (GH-16144)

Signed-off-by: Christian Heimes <christian at python.org>
(cherry picked from commit 995b5d38e7cc24cac3de8dfd516115f86b0bcf80)

Co-authored-by: Christian Heimes <christian at python.org>

files:
A Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst
M Lib/hashlib.py
M Lib/test/test_hashlib.py
M Modules/_hashopenssl.c

diff --git a/Lib/hashlib.py b/Lib/hashlib.py
index 4e783a86a34e..56873b7278b6 100644
--- a/Lib/hashlib.py
+++ b/Lib/hashlib.py
@@ -70,37 +70,44 @@
 
 __builtin_constructor_cache = {}
 
+__block_openssl_constructor = {
+    'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
+    'shake_128', 'shake_256',
+    'blake2b', 'blake2s',
+}
+
 def __get_builtin_constructor(name):
     cache = __builtin_constructor_cache
     constructor = cache.get(name)
     if constructor is not None:
         return constructor
     try:
-        if name in ('SHA1', 'sha1'):
+        if name in {'SHA1', 'sha1'}:
             import _sha1
             cache['SHA1'] = cache['sha1'] = _sha1.sha1
-        elif name in ('MD5', 'md5'):
+        elif name in {'MD5', 'md5'}:
             import _md5
             cache['MD5'] = cache['md5'] = _md5.md5
-        elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
+        elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}:
             import _sha256
             cache['SHA224'] = cache['sha224'] = _sha256.sha224
             cache['SHA256'] = cache['sha256'] = _sha256.sha256
-        elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
+        elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}:
             import _sha512
             cache['SHA384'] = cache['sha384'] = _sha512.sha384
             cache['SHA512'] = cache['sha512'] = _sha512.sha512
-        elif name in ('blake2b', 'blake2s'):
+        elif name in {'blake2b', 'blake2s'}:
             import _blake2
             cache['blake2b'] = _blake2.blake2b
             cache['blake2s'] = _blake2.blake2s
-        elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
-                      'shake_128', 'shake_256'}:
+        elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}:
             import _sha3
             cache['sha3_224'] = _sha3.sha3_224
             cache['sha3_256'] = _sha3.sha3_256
             cache['sha3_384'] = _sha3.sha3_384
             cache['sha3_512'] = _sha3.sha3_512
+        elif name in {'shake_128', 'shake_256'}:
+            import _sha3
             cache['shake_128'] = _sha3.shake_128
             cache['shake_256'] = _sha3.shake_256
     except ImportError:
@@ -114,8 +121,8 @@ def __get_builtin_constructor(name):
 
 
 def __get_openssl_constructor(name):
-    if name in {'blake2b', 'blake2s'}:
-        # Prefer our blake2 implementation.
+    if name in __block_openssl_constructor:
+        # Prefer our blake2 and sha3 implementation.
         return __get_builtin_constructor(name)
     try:
         f = getattr(_hashlib, 'openssl_' + name)
@@ -140,8 +147,8 @@ def __hash_new(name, data=b'', **kwargs):
     """new(name, data=b'') - Return a new hashing object using the named algorithm;
     optionally initialized with data (which must be a bytes-like object).
     """
-    if name in {'blake2b', 'blake2s'}:
-        # Prefer our blake2 implementation.
+    if name in __block_openssl_constructor:
+        # Prefer our blake2 and sha3 implementation
         # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.
         # It does neither support keyed blake2 nor advanced features like
         # salt, personal, tree hashing or SSE.
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py
index b7b04a37f0d0..e7cc6ccdc602 100644
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -26,6 +26,11 @@
 c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib'])
 py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib'])
 
+try:
+    from _hashlib import HASH
+except ImportError:
+    HASH = None
+
 try:
     import _blake2
 except ImportError:
@@ -377,6 +382,9 @@ def check_sha3(self, name, capacity, rate, suffix):
         constructors = self.constructors_to_test[name]
         for hash_object_constructor in constructors:
             m = hash_object_constructor()
+            if HASH is not None and isinstance(m, HASH):
+                # _hashopenssl's variant does not have extra SHA3 attributes
+                continue
             self.assertEqual(capacity + rate, 1600)
             self.assertEqual(m._capacity_bits, capacity)
             self.assertEqual(m._rate_bits, rate)
@@ -976,6 +984,10 @@ def test_scrypt(self):
                 hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1,
                                dklen=dklen)
 
+    def test_normalized_name(self):
+        self.assertNotIn("blake2b512", hashlib.algorithms_available)
+        self.assertNotIn("sha3-512", hashlib.algorithms_available)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst b/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst
new file mode 100644
index 000000000000..8a483c760adc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst
@@ -0,0 +1,3 @@
+Names of hashing algorithms frome OpenSSL are now normalized to follow
+Python's naming conventions. For example OpenSSL uses sha3-512 instead of
+sha3_512 or blake2b512 instead of blake2b. 
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index 88bf8da03b64..11a26ce8c0bb 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -34,6 +34,14 @@
 
 #define MUNCH_SIZE INT_MAX
 
+#if defined(NID_sha3_224) && defined(EVP_MD_FLAG_XOF)
+#define PY_OPENSSL_HAS_SHA3 1
+#endif
+
+#ifdef NID_blake2b512
+#define PY_OPENSSL_HAS_BLAKE2 1
+#endif
+
 typedef struct {
     PyObject_HEAD
     EVP_MD_CTX          *ctx;   /* OpenSSL message digest context */
@@ -82,6 +90,135 @@ _setException(PyObject *exc)
 }
 /* LCOV_EXCL_STOP */
 
+static PyObject*
+py_digest_name(const EVP_MD *md)
+{
+    int nid = EVP_MD_nid(md);
+    const char *name = NULL;
+
+    /* Hard-coded names for well-known hashing algorithms.
+     * OpenSSL uses slightly different names algorithms like SHA3.
+     */
+    switch (nid) {
+    case NID_md5:
+        name = "md5";
+        break;
+    case NID_sha1:
+        name = "sha1";
+        break;
+    case NID_sha224:
+        name ="sha224";
+        break;
+    case NID_sha256:
+        name ="sha256";
+        break;
+    case NID_sha384:
+        name ="sha384";
+        break;
+    case NID_sha512:
+        name ="sha512";
+        break;
+#ifdef NID_sha512_224
+    case NID_sha512_224:
+        name ="sha512_224";
+        break;
+    case NID_sha512_256:
+        name ="sha512_256";
+        break;
+#endif
+#ifdef PY_OPENSSL_HAS_SHA3
+    case NID_sha3_224:
+        name ="sha3_224";
+        break;
+    case NID_sha3_256:
+        name ="sha3_256";
+        break;
+    case NID_sha3_384:
+        name ="sha3_384";
+        break;
+    case NID_sha3_512:
+        name ="sha3_512";
+        break;
+    case NID_shake128:
+        name ="shake_128";
+        break;
+    case NID_shake256:
+        name ="shake_256";
+        break;
+#endif
+#ifdef PY_OPENSSL_HAS_BLAKE2
+    case NID_blake2s256:
+        name ="blake2s";
+        break;
+    case NID_blake2b512:
+        name ="blake2b";
+        break;
+#endif
+    default:
+        /* Ignore aliased names and only use long, lowercase name. The aliases
+         * pollute the list and OpenSSL appears to have its own definition of
+         * alias as the resulting list still contains duplicate and alternate
+         * names for several algorithms.
+         */
+        name = OBJ_nid2ln(nid);
+        if (name == NULL)
+            name = OBJ_nid2sn(nid);
+        break;
+    }
+
+    return PyUnicode_FromString(name);
+}
+
+static const EVP_MD*
+py_digest_by_name(const char *name)
+{
+    const EVP_MD *digest = EVP_get_digestbyname(name);
+
+    /* OpenSSL uses dash instead of underscore in names of some algorithms
+     * like SHA3 and SHAKE. Detect different spellings. */
+    if (digest == NULL) {
+#ifdef NID_sha512_224
+        if (!strcmp(name, "sha512_224") || !strcmp(name, "SHA512_224")) {
+            digest = EVP_sha512_224();
+        }
+        else if (!strcmp(name, "sha512_256") || !strcmp(name, "SHA512_256")) {
+            digest = EVP_sha512_256();
+        }
+#endif
+#ifdef PY_OPENSSL_HAS_SHA3
+        /* could be sha3_ or shake_, Python never defined upper case */
+        else if (!strcmp(name, "sha3_224")) {
+            digest = EVP_sha3_224();
+        }
+        else if (!strcmp(name, "sha3_256")) {
+            digest = EVP_sha3_256();
+        }
+        else if (!strcmp(name, "sha3_384")) {
+            digest = EVP_sha3_384();
+        }
+        else if (!strcmp(name, "sha3_512")) {
+            digest = EVP_sha3_512();
+        }
+        else if (!strcmp(name, "shake_128")) {
+            digest = EVP_shake128();
+        }
+        else if (!strcmp(name, "shake_256")) {
+            digest = EVP_shake256();
+        }
+#endif
+#ifdef PY_OPENSSL_HAS_BLAKE2
+        else if (!strcmp(name, "blake2s256")) {
+            digest = EVP_blake2s256();
+        }
+        else if (!strcmp(name, "blake2b512")) {
+            digest = EVP_blake2b512();
+        }
+#endif
+    }
+
+    return digest;
+}
+
 static EVPobject *
 newEVPobject(void)
 {
@@ -304,16 +441,7 @@ EVP_get_digest_size(EVPobject *self, void *closure)
 static PyObject *
 EVP_get_name(EVPobject *self, void *closure)
 {
-    const char *name = EVP_MD_name(EVP_MD_CTX_md(self->ctx));
-    PyObject *name_obj, *name_lower;
-
-    name_obj = PyUnicode_FromString(name);
-    if (!name_obj) {
-        return NULL;
-    }
-    name_lower = PyObject_CallMethod(name_obj, "lower", NULL);
-    Py_DECREF(name_obj);
-    return name_lower;
+    return py_digest_name(EVP_MD_CTX_md(self->ctx));
 }
 
 static PyGetSetDef EVP_getseters[] = {
@@ -337,7 +465,7 @@ static PyObject *
 EVP_repr(EVPobject *self)
 {
     PyObject *name_obj, *repr;
-    name_obj = EVP_get_name(self, NULL);
+    name_obj = py_digest_name(EVP_MD_CTX_md(self->ctx));
     if (!name_obj) {
         return NULL;
     }
@@ -403,6 +531,7 @@ static PyTypeObject EVPtype = {
     0,                  /* tp_dictoffset */
 };
 
+\
 static PyObject *
 EVPnew(const EVP_MD *digest,
        const unsigned char *cp, Py_ssize_t len)
@@ -475,7 +604,7 @@ EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj)
     if (data_obj)
         GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view);
 
-    digest = EVP_get_digestbyname(name);
+    digest = py_digest_by_name(name);
 
     ret_obj = EVPnew(digest, (unsigned char*)view.buf, view.len);
 
@@ -889,21 +1018,17 @@ typedef struct _internal_name_mapper_state {
 
 /* A callback function to pass to OpenSSL's OBJ_NAME_do_all(...) */
 static void
-_openssl_hash_name_mapper(const OBJ_NAME *openssl_obj_name, void *arg)
+_openssl_hash_name_mapper(const EVP_MD *md, const char *from,
+                          const char *to, void *arg)
 {
     _InternalNameMapperState *state = (_InternalNameMapperState *)arg;
     PyObject *py_name;
 
     assert(state != NULL);
-    if (openssl_obj_name == NULL)
-        return;
-    /* Ignore aliased names, they pollute the list and OpenSSL appears to
-     * have its own definition of alias as the resulting list still
-     * contains duplicate and alternate names for several algorithms.     */
-    if (openssl_obj_name->alias)
+    if (md == NULL)
         return;
 
-    py_name = PyUnicode_FromString(openssl_obj_name->name);
+    py_name = py_digest_name(md);
     if (py_name == NULL) {
         state->error = 1;
     } else {
@@ -925,7 +1050,7 @@ generate_hash_name_list(void)
         return NULL;
     state.error = 0;
 
-    OBJ_NAME_do_all(OBJ_NAME_TYPE_MD_METH, &_openssl_hash_name_mapper, &state);
+    EVP_MD_do_all(&_openssl_hash_name_mapper, &state);
 
     if (state.error) {
         Py_DECREF(state.set);



More information about the Python-checkins mailing list