[Python-checkins] bpo-32107 - Improve MAC address calculation and fix test_uuid.py (#4600)

Barry Warsaw webhook-mailer at python.org
Tue Nov 28 17:26:08 EST 2017


https://github.com/python/cpython/commit/23df2d1304ece169d7e0dfc843dfb8026b413d9f
commit: 23df2d1304ece169d7e0dfc843dfb8026b413d9f
branch: master
author: Barry Warsaw <barry at python.org>
committer: GitHub <noreply at github.com>
date: 2017-11-28T17:26:04-05:00
summary:

bpo-32107 - Improve MAC address calculation and fix test_uuid.py (#4600)

``uuid.getnode()`` now preferentially returns universally administered MAC addresses if available, over locally administered MAC addresses.  This makes a better guarantee for global uniqueness of UUIDs returned from ``uuid.uuid1()``.  If only locally administered MAC addresses are available, the first such one found is returned.

Also improve internal code style by being explicit about ``return None`` rather than falling off the end of the function.

Improve the test robustness.

files:
A Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst
M Doc/library/uuid.rst
M Lib/test/test_uuid.py
M Lib/uuid.py

diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst
index ea9ea7dc7d9..8ec75a79acf 100644
--- a/Doc/library/uuid.rst
+++ b/Doc/library/uuid.rst
@@ -156,10 +156,18 @@ The :mod:`uuid` module defines the following functions:
 
    Get the hardware address as a 48-bit positive integer.  The first time this
    runs, it may launch a separate program, which could be quite slow.  If all
-   attempts to obtain the hardware address fail, we choose a random 48-bit number
-   with its eighth bit set to 1 as recommended in RFC 4122.  "Hardware address"
-   means the MAC address of a network interface, and on a machine with multiple
-   network interfaces the MAC address of any one of them may be returned.
+   attempts to obtain the hardware address fail, we choose a random 48-bit
+   number with the multicast bit (least significant bit of the first octet)
+   set to 1 as recommended in RFC 4122.  "Hardware address" means the MAC
+   address of a network interface.  On a machine with multiple network
+   interfaces, universally administered MAC addresses (i.e. where the second
+   least significant bit of the first octet is *unset*) will be preferred over
+   locally administered MAC addresses, but with no other ordering guarantees.
+
+   .. versionchanged:: 3.7
+      Universally administered MAC addresses are preferred over locally
+      administered MAC addresses, since the former are guaranteed to be
+      globally unique, while the latter are not.
 
 .. index:: single: getnode
 
diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py
index 083c2aa8aab..f113c551209 100644
--- a/Lib/test/test_uuid.py
+++ b/Lib/test/test_uuid.py
@@ -512,59 +512,57 @@ def test_find_mac(self):
 
         self.assertEqual(mac, 0x1234567890ab)
 
-    def check_node(self, node, requires=None, network=False):
+    def check_node(self, node, requires=None):
         if requires and node is None:
             self.skipTest('requires ' + requires)
         hex = '%012x' % node
         if support.verbose >= 2:
             print(hex, end=' ')
-        if network:
-            # 47 bit will never be set in IEEE 802 addresses obtained
-            # from network cards.
-            self.assertFalse(node & 0x010000000000, hex)
         self.assertTrue(0 < node < (1 << 48),
                         "%s is not an RFC 4122 node ID" % hex)
 
     @unittest.skipUnless(os.name == 'posix', 'requires Posix')
     def test_ifconfig_getnode(self):
         node = self.uuid._ifconfig_getnode()
-        self.check_node(node, 'ifconfig', True)
+        self.check_node(node, 'ifconfig')
 
     @unittest.skipUnless(os.name == 'posix', 'requires Posix')
     def test_ip_getnode(self):
         node = self.uuid._ip_getnode()
-        self.check_node(node, 'ip', True)
+        self.check_node(node, 'ip')
 
     @unittest.skipUnless(os.name == 'posix', 'requires Posix')
     def test_arp_getnode(self):
         node = self.uuid._arp_getnode()
-        self.check_node(node, 'arp', True)
+        self.check_node(node, 'arp')
 
     @unittest.skipUnless(os.name == 'posix', 'requires Posix')
     def test_lanscan_getnode(self):
         node = self.uuid._lanscan_getnode()
-        self.check_node(node, 'lanscan', True)
+        self.check_node(node, 'lanscan')
 
     @unittest.skipUnless(os.name == 'posix', 'requires Posix')
     def test_netstat_getnode(self):
         node = self.uuid._netstat_getnode()
-        self.check_node(node, 'netstat', True)
+        self.check_node(node, 'netstat')
 
     @unittest.skipUnless(os.name == 'nt', 'requires Windows')
     def test_ipconfig_getnode(self):
         node = self.uuid._ipconfig_getnode()
-        self.check_node(node, 'ipconfig', True)
+        self.check_node(node, 'ipconfig')
 
     @unittest.skipUnless(importable('win32wnet'), 'requires win32wnet')
     @unittest.skipUnless(importable('netbios'), 'requires netbios')
     def test_netbios_getnode(self):
         node = self.uuid._netbios_getnode()
-        self.check_node(node, network=True)
+        self.check_node(node)
 
     def test_random_getnode(self):
         node = self.uuid._random_getnode()
-        # Least significant bit of first octet must be set.
-        self.assertTrue(node & 0x010000000000, '%012x' % node)
+        # The multicast bit, i.e. the least significant bit of first octet,
+        # must be set for randomly generated MAC addresses.  See RFC 4122,
+        # $4.1.6.
+        self.assertTrue(node & (1 << 40), '%012x' % node)
         self.check_node(node)
 
     @unittest.skipUnless(os.name == 'posix', 'requires Posix')
diff --git a/Lib/uuid.py b/Lib/uuid.py
index 020c6e73c86..cb2bc092bdf 100644
--- a/Lib/uuid.py
+++ b/Lib/uuid.py
@@ -342,11 +342,30 @@ def _popen(command, *args):
                             env=env)
     return proc
 
+# For MAC (a.k.a. IEEE 802, or EUI-48) addresses, the second least significant
+# bit of the first octet signifies whether the MAC address is universally (0)
+# or locally (1) administered.  Network cards from hardware manufacturers will
+# always be universally administered to guarantee global uniqueness of the MAC
+# address, but any particular machine may have other interfaces which are
+# locally administered.  An example of the latter is the bridge interface to
+# the Touch Bar on MacBook Pros.
+#
+# This bit works out to be the 42nd bit counting from 1 being the least
+# significant, or 1<<41.  We'll prefer universally administered MAC addresses
+# over locally administered ones since the former are globally unique, but
+# we'll return the first of the latter found if that's all the machine has.
+#
+# See https://en.wikipedia.org/wiki/MAC_address#Universal_vs._local
+
+def _is_universal(mac):
+    return not (mac & (1 << 41))
+
 def _find_mac(command, args, hw_identifiers, get_index):
+    first_local_mac = None
     try:
         proc = _popen(command, *args.split())
         if not proc:
-            return
+            return None
         with proc:
             for line in proc.stdout:
                 words = line.lower().rstrip().split()
@@ -355,8 +374,9 @@ def _find_mac(command, args, hw_identifiers, get_index):
                         try:
                             word = words[get_index(i)]
                             mac = int(word.replace(b':', b''), 16)
-                            if mac:
+                            if _is_universal(mac):
                                 return mac
+                            first_local_mac = first_local_mac or mac
                         except (ValueError, IndexError):
                             # Virtual interfaces, such as those provided by
                             # VPNs, do not have a colon-delimited MAC address
@@ -366,6 +386,7 @@ def _find_mac(command, args, hw_identifiers, get_index):
                             pass
     except OSError:
         pass
+    return first_local_mac or None
 
 def _ifconfig_getnode():
     """Get the hardware address on Unix by running ifconfig."""
@@ -375,6 +396,7 @@ def _ifconfig_getnode():
         mac = _find_mac('ifconfig', args, keywords, lambda i: i+1)
         if mac:
             return mac
+        return None
 
 def _ip_getnode():
     """Get the hardware address on Unix by running ip."""
@@ -382,6 +404,7 @@ def _ip_getnode():
     mac = _find_mac('ip', 'link list', [b'link/ether'], lambda i: i+1)
     if mac:
         return mac
+    return None
 
 def _arp_getnode():
     """Get the hardware address on Unix by running arp."""
@@ -404,8 +427,10 @@ def _arp_getnode():
     # This works on Linux, FreeBSD and NetBSD
     mac = _find_mac('arp', '-an', [os.fsencode('(%s)' % ip_addr)],
                     lambda i: i+2)
+    # Return None instead of 0.
     if mac:
         return mac
+    return None
 
 def _lanscan_getnode():
     """Get the hardware address on Unix by running lanscan."""
@@ -415,32 +440,36 @@ def _lanscan_getnode():
 def _netstat_getnode():
     """Get the hardware address on Unix by running netstat."""
     # This might work on AIX, Tru64 UNIX.
+    first_local_mac = None
     try:
         proc = _popen('netstat', '-ia')
         if not proc:
-            return
+            return None
         with proc:
             words = proc.stdout.readline().rstrip().split()
             try:
                 i = words.index(b'Address')
             except ValueError:
-                return
+                return None
             for line in proc.stdout:
                 try:
                     words = line.rstrip().split()
                     word = words[i]
                     if len(word) == 17 and word.count(b':') == 5:
                         mac = int(word.replace(b':', b''), 16)
-                        if mac:
+                        if _is_universal(mac):
                             return mac
+                        first_local_mac = first_local_mac or mac
                 except (ValueError, IndexError):
                     pass
     except OSError:
         pass
+    return first_local_mac or None
 
 def _ipconfig_getnode():
     """Get the hardware address on Windows by running ipconfig.exe."""
     import os, re
+    first_local_mac = None
     dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
     try:
         import ctypes
@@ -458,18 +487,23 @@ def _ipconfig_getnode():
             for line in pipe:
                 value = line.split(':')[-1].strip().lower()
                 if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
-                    return int(value.replace('-', ''), 16)
+                    mac = int(value.replace('-', ''), 16)
+                    if _is_universal(mac):
+                        return mac
+                    first_local_mac = first_local_mac or mac
+    return first_local_mac or None
 
 def _netbios_getnode():
     """Get the hardware address on Windows using NetBIOS calls.
     See http://support.microsoft.com/kb/118623 for details."""
     import win32wnet, netbios
+    first_local_mac = None
     ncb = netbios.NCB()
     ncb.Command = netbios.NCBENUM
     ncb.Buffer = adapters = netbios.LANA_ENUM()
     adapters._pack()
     if win32wnet.Netbios(ncb) != 0:
-        return
+        return None
     adapters._unpack()
     for i in range(adapters.length):
         ncb.Reset()
@@ -488,7 +522,11 @@ def _netbios_getnode():
         bytes = status.adapter_address[:6]
         if len(bytes) != 6:
             continue
-        return int.from_bytes(bytes, 'big')
+        mac = int.from_bytes(bytes, 'big')
+        if _is_universal(mac):
+            return mac
+        first_local_mac = first_local_mac or mac
+    return first_local_mac or None
 
 
 _generate_time_safe = _UuidCreate = None
@@ -601,9 +639,19 @@ def _windll_getnode():
         return UUID(bytes=bytes_(_buffer.raw)).node
 
 def _random_getnode():
-    """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
+    """Get a random node ID."""
+    # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or
+    # pseudo-randomly generated value may be used; see Section 4.5.  The
+    # multicast bit must be set in such addresses, in order that they will
+    # never conflict with addresses obtained from network cards."
+    #
+    # The "multicast bit" of a MAC address is defined to be "the least
+    # significant bit of the first octet".  This works out to be the 41st bit
+    # counting from 1 being the least significant bit, or 1<<40.
+    #
+    # See https://en.wikipedia.org/wiki/MAC_address#Unicast_vs._multicast
     import random
-    return random.getrandbits(48) | 0x010000000000
+    return random.getrandbits(48) | (1 << 40)
 
 
 _node = None
@@ -626,13 +674,14 @@ def getnode():
         getters = [_unix_getnode, _ifconfig_getnode, _ip_getnode,
                    _arp_getnode, _lanscan_getnode, _netstat_getnode]
 
-    for getter in getters + [_random_getnode]:
+    for getter in getters:
         try:
             _node = getter()
         except:
             continue
         if _node is not None:
             return _node
+    return _random_getnode()
 
 
 _last_timestamp = None
diff --git a/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst b/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst
new file mode 100644
index 00000000000..b26daa7b1b3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst
@@ -0,0 +1,5 @@
+``uuid.getnode()`` now preferentially returns universally administered MAC
+addresses if available, over locally administered MAC addresses.  This makes a
+better guarantee for global uniqueness of UUIDs returned from
+``uuid.uuid1()``.  If only locally administered MAC addresses are available,
+the first such one found is returned.



More information about the Python-checkins mailing list