[Python-checkins] bpo-32433: Optimized HMAC digest (#5023)

Christian Heimes webhook-mailer at python.org
Sat Jan 27 03:53:48 EST 2018


https://github.com/python/cpython/commit/2f050c7e1b36bf641e7023f7b28b451454c6b98a
commit: 2f050c7e1b36bf641e7023f7b28b451454c6b98a
branch: master
author: Christian Heimes <christian at python.org>
committer: GitHub <noreply at github.com>
date: 2018-01-27T09:53:43+01:00
summary:

bpo-32433: Optimized HMAC digest (#5023)

The hmac module now has hmac.digest(), which provides an optimized HMAC
digest for short messages. hmac.digest() is up to three times faster
than hmac.HMAC().digest().

Signed-off-by: Christian Heimes <christian at python.org>

files:
A Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst
M Doc/library/hmac.rst
M Doc/whatsnew/3.7.rst
M Lib/hmac.py
M Lib/test/test_hmac.py
M Modules/_hashopenssl.c
M Modules/clinic/_hashopenssl.c.h

diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst
index adbf78a7b46..fcda86cf797 100644
--- a/Doc/library/hmac.rst
+++ b/Doc/library/hmac.rst
@@ -31,6 +31,21 @@ This module implements the HMAC algorithm as described by :rfc:`2104`.
       MD5 as implicit default digest for *digestmod* is deprecated.
 
 
+.. function:: digest(key, msg, digest)
+
+   Return digest of *msg* for given secret *key* and *digest*. The
+   function is equivalent to ``HMAC(key, msg, digest).digest()``, but
+   uses an optimized C or inline implementation, which is faster for messages
+   that fit into memory. The parameters *key*, *msg*, and *digest* have
+   the same meaning as in :func:`~hmac.new`.
+
+   CPython implementation detail, the optimized C implementation is only used
+   when *digest* is a string and name of a digest algorithm, which is
+   supported by OpenSSL.
+
+   .. versionadded:: 3.7
+
+
 An HMAC object has the following methods:
 
 .. method:: HMAC.update(msg)
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 43fbd013856..133975a04dd 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -492,6 +492,13 @@ and the ``--directory`` to the command line of the module :mod:`~http.server`.
 With this parameter, the server serves the specified directory, by default it uses the current working directory.
 (Contributed by Stéphane Wirtel and Julien Palard in :issue:`28707`.)
 
+hmac
+----
+
+The hmac module now has an optimized one-shot :func:`~hmac.digest` function,
+which is up to three times faster than :func:`~hmac.HMAC`.
+(Contributed by Christian Heimes in :issue:`32433`.)
+
 importlib
 ---------
 
diff --git a/Lib/hmac.py b/Lib/hmac.py
index 121029aa670..93c084e4ae8 100644
--- a/Lib/hmac.py
+++ b/Lib/hmac.py
@@ -5,6 +5,13 @@
 
 import warnings as _warnings
 from _operator import _compare_digest as compare_digest
+try:
+    import _hashlib as _hashopenssl
+except ImportError:
+    _hashopenssl = None
+    _openssl_md_meths = None
+else:
+    _openssl_md_meths = frozenset(_hashopenssl.openssl_md_meth_names)
 import hashlib as _hashlib
 
 trans_5C = bytes((x ^ 0x5C) for x in range(256))
@@ -142,3 +149,38 @@ def new(key, msg = None, digestmod = None):
     method.
     """
     return HMAC(key, msg, digestmod)
+
+
+def digest(key, msg, digest):
+    """Fast inline implementation of HMAC
+
+    key:    key for the keyed hash object.
+    msg:    input message
+    digest: A hash name suitable for hashlib.new() for best performance. *OR*
+            A hashlib constructor returning a new hash object. *OR*
+            A module supporting PEP 247.
+
+    Note: key and msg must be a bytes or bytearray objects.
+    """
+    if (_hashopenssl is not None and
+            isinstance(digest, str) and digest in _openssl_md_meths):
+        return _hashopenssl.hmac_digest(key, msg, digest)
+
+    if callable(digest):
+        digest_cons = digest
+    elif isinstance(digest, str):
+        digest_cons = lambda d=b'': _hashlib.new(digest, d)
+    else:
+        digest_cons = lambda d=b'': digest.new(d)
+
+    inner = digest_cons()
+    outer = digest_cons()
+    blocksize = getattr(inner, 'block_size', 64)
+    if len(key) > blocksize:
+        key = digest_cons(key).digest()
+    key = key + b'\x00' * (blocksize - len(key))
+    inner.update(key.translate(trans_36))
+    outer.update(key.translate(trans_5C))
+    inner.update(msg)
+    outer.update(inner.digest())
+    return outer.digest()
diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py
index 067e13f1079..4e4ef0ec0e8 100644
--- a/Lib/test/test_hmac.py
+++ b/Lib/test/test_hmac.py
@@ -1,7 +1,9 @@
+import binascii
 import functools
 import hmac
 import hashlib
 import unittest
+import unittest.mock
 import warnings
 
 
@@ -23,16 +25,27 @@ def test_md5_vectors(self):
         def md5test(key, data, digest):
             h = hmac.HMAC(key, data, digestmod=hashlib.md5)
             self.assertEqual(h.hexdigest().upper(), digest.upper())
+            self.assertEqual(h.digest(), binascii.unhexlify(digest))
             self.assertEqual(h.name, "hmac-md5")
             self.assertEqual(h.digest_size, 16)
             self.assertEqual(h.block_size, 64)
 
             h = hmac.HMAC(key, data, digestmod='md5')
             self.assertEqual(h.hexdigest().upper(), digest.upper())
+            self.assertEqual(h.digest(), binascii.unhexlify(digest))
             self.assertEqual(h.name, "hmac-md5")
             self.assertEqual(h.digest_size, 16)
             self.assertEqual(h.block_size, 64)
 
+            self.assertEqual(
+                hmac.digest(key, data, digest='md5'),
+                binascii.unhexlify(digest)
+            )
+            with unittest.mock.patch('hmac._openssl_md_meths', {}):
+                self.assertEqual(
+                    hmac.digest(key, data, digest='md5'),
+                    binascii.unhexlify(digest)
+                )
 
         md5test(b"\x0b" * 16,
                 b"Hi There",
@@ -67,16 +80,23 @@ def test_sha_vectors(self):
         def shatest(key, data, digest):
             h = hmac.HMAC(key, data, digestmod=hashlib.sha1)
             self.assertEqual(h.hexdigest().upper(), digest.upper())
+            self.assertEqual(h.digest(), binascii.unhexlify(digest))
             self.assertEqual(h.name, "hmac-sha1")
             self.assertEqual(h.digest_size, 20)
             self.assertEqual(h.block_size, 64)
 
             h = hmac.HMAC(key, data, digestmod='sha1')
             self.assertEqual(h.hexdigest().upper(), digest.upper())
+            self.assertEqual(h.digest(), binascii.unhexlify(digest))
             self.assertEqual(h.name, "hmac-sha1")
             self.assertEqual(h.digest_size, 20)
             self.assertEqual(h.block_size, 64)
 
+            self.assertEqual(
+                hmac.digest(key, data, digest='sha1'),
+                binascii.unhexlify(digest)
+            )
+
 
         shatest(b"\x0b" * 20,
                 b"Hi There",
@@ -122,6 +142,24 @@ def hmactest(key, data, hexdigests):
             self.assertEqual(h.digest_size, digest_size)
             self.assertEqual(h.block_size, block_size)
 
+            self.assertEqual(
+                hmac.digest(key, data, digest=hashfunc),
+                binascii.unhexlify(hexdigests[hashfunc])
+            )
+            self.assertEqual(
+                hmac.digest(key, data, digest=hash_name),
+                binascii.unhexlify(hexdigests[hashfunc])
+            )
+
+            with unittest.mock.patch('hmac._openssl_md_meths', {}):
+                self.assertEqual(
+                    hmac.digest(key, data, digest=hashfunc),
+                    binascii.unhexlify(hexdigests[hashfunc])
+                )
+                self.assertEqual(
+                    hmac.digest(key, data, digest=hash_name),
+                    binascii.unhexlify(hexdigests[hashfunc])
+                )
 
         # 4.2.  Test Case 1
         hmactest(key = b'\x0b'*20,
diff --git a/Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst b/Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst
new file mode 100644
index 00000000000..d9b326e128e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst
@@ -0,0 +1,2 @@
+The hmac module now has hmac.digest(), which provides an optimized HMAC
+digest.
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index c8d175860b7..50fe9d5a10b 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -21,6 +21,7 @@
 
 /* EVP is the preferred interface to hashing in OpenSSL */
 #include <openssl/evp.h>
+#include <openssl/hmac.h>
 /* We use the object interface to discover what hashes OpenSSL supports. */
 #include <openssl/objects.h>
 #include "openssl/err.h"
@@ -528,8 +529,6 @@ EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
     return ret_obj;
 }
 
-
-
 #if (OPENSSL_VERSION_NUMBER >= 0x10000000 && !defined(OPENSSL_NO_HMAC) \
      && !defined(OPENSSL_NO_SHA))
 
@@ -849,6 +848,61 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt,
 }
 #endif
 
+/* Fast HMAC for hmac.digest()
+ */
+
+/*[clinic input]
+_hashlib.hmac_digest
+
+    key: Py_buffer
+    msg: Py_buffer
+    digest: str
+
+Single-shot HMAC
+[clinic start generated code]*/
+
+static PyObject *
+_hashlib_hmac_digest_impl(PyObject *module, Py_buffer *key, Py_buffer *msg,
+                          const char *digest)
+/*[clinic end generated code: output=75630e684cdd8762 input=10e964917921e2f2]*/
+{
+    unsigned char md[EVP_MAX_MD_SIZE] = {0};
+    unsigned int md_len = 0;
+    unsigned char *result;
+    const EVP_MD *evp;
+
+    evp = EVP_get_digestbyname(digest);
+    if (evp == NULL) {
+        PyErr_SetString(PyExc_ValueError, "unsupported hash type");
+        return NULL;
+    }
+    if (key->len > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "key is too long.");
+        return NULL;
+    }
+    if (msg->len > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "msg is too long.");
+        return NULL;
+    }
+
+    Py_BEGIN_ALLOW_THREADS
+    result = HMAC(
+        evp,
+        (const void*)key->buf, (int)key->len,
+        (const unsigned char*)msg->buf, (int)msg->len,
+        md, &md_len
+    );
+    Py_END_ALLOW_THREADS
+
+    if (result == NULL) {
+        _setException(PyExc_ValueError);
+        return NULL;
+    }
+    return PyBytes_FromStringAndSize((const char*)md, md_len);
+}
+
 /* State for our callback function so that it can accumulate a result. */
 typedef struct _internal_name_mapper_state {
     PyObject *set;
@@ -982,6 +1036,7 @@ static struct PyMethodDef EVP_functions[] = {
      pbkdf2_hmac__doc__},
 #endif
     _HASHLIB_SCRYPT_METHODDEF
+    _HASHLIB_HMAC_DIGEST_METHODDEF
     CONSTRUCTOR_METH_DEF(md5),
     CONSTRUCTOR_METH_DEF(sha1),
     CONSTRUCTOR_METH_DEF(sha224),
diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h
index f08d7f3afd4..cbc8638c946 100644
--- a/Modules/clinic/_hashopenssl.c.h
+++ b/Modules/clinic/_hashopenssl.c.h
@@ -54,7 +54,49 @@ _hashlib_scrypt(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj
 
 #endif /* (OPENSSL_VERSION_NUMBER > 0x10100000L && !defined(OPENSSL_NO_SCRYPT) && !defined(LIBRESSL_VERSION_NUMBER)) */
 
+PyDoc_STRVAR(_hashlib_hmac_digest__doc__,
+"hmac_digest($module, /, key, msg, digest)\n"
+"--\n"
+"\n"
+"Single-shot HMAC");
+
+#define _HASHLIB_HMAC_DIGEST_METHODDEF    \
+    {"hmac_digest", (PyCFunction)_hashlib_hmac_digest, METH_FASTCALL|METH_KEYWORDS, _hashlib_hmac_digest__doc__},
+
+static PyObject *
+_hashlib_hmac_digest_impl(PyObject *module, Py_buffer *key, Py_buffer *msg,
+                          const char *digest);
+
+static PyObject *
+_hashlib_hmac_digest(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"key", "msg", "digest", NULL};
+    static _PyArg_Parser _parser = {"y*y*s:hmac_digest", _keywords, 0};
+    Py_buffer key = {NULL, NULL};
+    Py_buffer msg = {NULL, NULL};
+    const char *digest;
+
+    if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
+        &key, &msg, &digest)) {
+        goto exit;
+    }
+    return_value = _hashlib_hmac_digest_impl(module, &key, &msg, digest);
+
+exit:
+    /* Cleanup for key */
+    if (key.obj) {
+       PyBuffer_Release(&key);
+    }
+    /* Cleanup for msg */
+    if (msg.obj) {
+       PyBuffer_Release(&msg);
+    }
+
+    return return_value;
+}
+
 #ifndef _HASHLIB_SCRYPT_METHODDEF
     #define _HASHLIB_SCRYPT_METHODDEF
 #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */
-/*[clinic end generated code: output=1ea7d0397f38e2c2 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=b5b90821caf05391 input=a9049054013a1b77]*/



More information about the Python-checkins mailing list