[Python-checkins] cpython (merge 3.4 -> default): Issue #23181: More "codepoint" -> "code point".

serhiy.storchaka python-checkins at python.org
Sun Jan 18 10:45:18 CET 2015


https://hg.python.org/cpython/rev/c79abee84a39
changeset:   94206:c79abee84a39
parent:      94204:fe0fddd6fd21
parent:      94205:0353c7e5e0c2
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Jan 18 11:33:31 2015 +0200
summary:
  Issue #23181: More "codepoint" -> "code point".

files:
  Doc/library/json.rst               |   2 +-
  Lib/codecs.py                      |   2 +-
  Lib/email/message.py               |   2 +-
  Lib/html/entities.py               |   4 ++--
  Lib/test/multibytecodec_support.py |   2 +-
  Lib/test/test_html.py              |   4 ++--
  Lib/test/test_multibytecodec.py    |   2 +-
  Lib/test/test_stringprep.py        |   2 +-
  Lib/test/test_unicode.py           |   4 ++--
  Modules/cjkcodecs/_codecs_cn.c     |   2 +-
  Modules/cjkcodecs/_codecs_hk.c     |   2 +-
  Modules/cjkcodecs/_codecs_kr.c     |   2 +-
  Modules/cjkcodecs/cjkcodecs.h      |   4 ++--
  Modules/unicodedata.c              |  14 +++++++-------
  Objects/unicodeobject.c            |   4 ++--
  Python/sysmodule.c                 |   2 +-
  Tools/unicode/gencodec.py          |   2 +-
  17 files changed, 28 insertions(+), 28 deletions(-)


diff --git a/Doc/library/json.rst b/Doc/library/json.rst
--- a/Doc/library/json.rst
+++ b/Doc/library/json.rst
@@ -514,7 +514,7 @@
 that don't correspond to valid Unicode characters (e.g. unpaired UTF-16
 surrogates), but it does note that they may cause interoperability problems.
 By default, this module accepts and outputs (when present in the original
-:class:`str`) codepoints for such sequences.
+:class:`str`) code points for such sequences.
 
 
 Infinite and NaN Number Values
diff --git a/Lib/codecs.py b/Lib/codecs.py
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -124,7 +124,7 @@
                     Python will use the official U+FFFD REPLACEMENT
                     CHARACTER for the builtin Unicode codecs on
                     decoding and '?' on encoding.
-         'surrogateescape' - replace with private codepoints U+DCnn.
+         'surrogateescape' - replace with private code points U+DCnn.
          'xmlcharrefreplace' - Replace with the appropriate XML
                                character reference (only for encoding).
          'backslashreplace'  - Replace with backslashed escape sequences
diff --git a/Lib/email/message.py b/Lib/email/message.py
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -273,7 +273,7 @@
                     bpayload = payload.encode('ascii')
                 except UnicodeError:
                     # This won't happen for RFC compliant messages (messages
-                    # containing only ASCII codepoints in the unicode input).
+                    # containing only ASCII code points in the unicode input).
                     # If it does happen, turn the string into bytes in a way
                     # guaranteed not to fail.
                     bpayload = payload.encode('raw-unicode-escape')
diff --git a/Lib/html/entities.py b/Lib/html/entities.py
--- a/Lib/html/entities.py
+++ b/Lib/html/entities.py
@@ -3,7 +3,7 @@
 __all__ = ['html5', 'name2codepoint', 'codepoint2name', 'entitydefs']
 
 
-# maps the HTML entity name to the Unicode codepoint
+# maps the HTML entity name to the Unicode code point
 name2codepoint = {
     'AElig':    0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
     'Aacute':   0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
@@ -2495,7 +2495,7 @@
     'zwnj;': '\u200c',
 }
 
-# maps the Unicode codepoint to the HTML entity name
+# maps the Unicode code point to the HTML entity name
 codepoint2name = {}
 
 # maps the HTML entity name to the character
diff --git a/Lib/test/multibytecodec_support.py b/Lib/test/multibytecodec_support.py
--- a/Lib/test/multibytecodec_support.py
+++ b/Lib/test/multibytecodec_support.py
@@ -21,7 +21,7 @@
     roundtriptest   = 1    # set if roundtrip is possible with unicode
     has_iso10646    = 0    # set if this encoding contains whole iso10646 map
     xmlcharnametest = None # string to test xmlcharrefreplace
-    unmappedunicode = '\udeee' # a unicode codepoint that is not mapped.
+    unmappedunicode = '\udeee' # a unicode code point that is not mapped.
 
     def setUp(self):
         if self.codec is None:
diff --git a/Lib/test/test_html.py b/Lib/test/test_html.py
--- a/Lib/test/test_html.py
+++ b/Lib/test/test_html.py
@@ -48,10 +48,10 @@
                 check(s % num, char)
                 for end in [' ', 'X']:
                     check((s+end) % num, char+end)
-        # check invalid codepoints
+        # check invalid code points
         for cp in [0xD800, 0xDB00, 0xDC00, 0xDFFF, 0x110000]:
             check_num(cp, '\uFFFD')
-        # check more invalid codepoints
+        # check more invalid code points
         for cp in [0x1, 0xb, 0xe, 0x7f, 0xfffe, 0xffff, 0x10fffe, 0x10ffff]:
             check_num(cp, '')
         # check invalid numbers
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -80,7 +80,7 @@
         self.assertEqual(encoder.reset(), None)
 
     def test_stateful(self):
-        # jisx0213 encoder is stateful for a few codepoints. eg)
+        # jisx0213 encoder is stateful for a few code points. eg)
         #   U+00E6 => A9DC
         #   U+00E6 U+0300 => ABC4
         #   U+0300 => ABDC
diff --git a/Lib/test/test_stringprep.py b/Lib/test/test_stringprep.py
--- a/Lib/test/test_stringprep.py
+++ b/Lib/test/test_stringprep.py
@@ -1,5 +1,5 @@
 # To fully test this module, we would need a copy of the stringprep tables.
-# Since we don't have them, this test checks only a few codepoints.
+# Since we don't have them, this test checks only a few code points.
 
 import unittest
 from test import support
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1470,9 +1470,9 @@
     def test_utf8_decode_invalid_sequences(self):
         # continuation bytes in a sequence of 2, 3, or 4 bytes
         continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]
-        # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F
+        # start bytes of a 2-byte sequence equivalent to code points < 0x7F
         invalid_2B_seq_start_bytes = [bytes([x]) for x in range(0xC0, 0xC2)]
-        # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF
+        # start bytes of a 4-byte sequence equivalent to code points > 0x10FFFF
         invalid_4B_seq_start_bytes = [bytes([x]) for x in range(0xF5, 0xF8)]
         invalid_start_bytes = (
             continuation_bytes + invalid_2B_seq_start_bytes +
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -15,7 +15,7 @@
 #undef hz
 #endif
 
-/* GBK and GB2312 map differently in few codepoints that are listed below:
+/* GBK and GB2312 map differently in few code points that are listed below:
  *
  *              gb2312                          gbk
  * A1A4         U+30FB KATAKANA MIDDLE DOT      U+00B7 MIDDLE DOT
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -171,7 +171,7 @@
         default: return 1;
         }
 
-        NEXT_IN(2); /* all decoded codepoints are pairs, above. */
+        NEXT_IN(2); /* all decoded code points are pairs, above. */
     }
 
     return 0;
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -69,7 +69,7 @@
             OUTBYTE1(EUCKR_JAMO_FIRSTBYTE);
             OUTBYTE2(EUCKR_JAMO_FILLER);
 
-            /* All codepoints in CP949 extension are in unicode
+            /* All code points in CP949 extension are in unicode
              * Hangul Syllable area. */
             assert(0xac00 <= c && c <= 0xd7a3);
             c -= 0xac00;
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -12,10 +12,10 @@
 #include "multibytecodec.h"
 
 
-/* a unicode "undefined" codepoint */
+/* a unicode "undefined" code point */
 #define UNIINV  0xFFFE
 
-/* internal-use DBCS codepoints which aren't used by any charsets */
+/* internal-use DBCS code points which aren't used by any charsets */
 #define NOCHAR  0xFFFF
 #define MULTIC  0xFFFE
 #define DBCINV  0xFFFD
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -976,7 +976,7 @@
         (0x2B740 <= code && code <= 0x2B81D);   /* CJK Ideograph Extension D */
 }
 
-/* macros used to determine if the given codepoint is in the PUA range that
+/* macros used to determine if the given code point is in the PUA range that
  * we are using to store aliases and named sequences */
 #define IS_ALIAS(cp) ((cp >= aliases_start) && (cp < aliases_end))
 #define IS_NAMED_SEQ(cp) ((cp >= named_sequences_start) && \
@@ -986,7 +986,7 @@
 _getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
            int with_alias_and_seq)
 {
-    /* Find the name associated with the given codepoint.
+    /* Find the name associated with the given code point.
      * If with_alias_and_seq is 1, check for names in the Private Use Area 15
      * that we are using for aliases and named sequences. */
     int offset;
@@ -997,7 +997,7 @@
     if (code >= 0x110000)
         return 0;
 
-    /* XXX should we just skip all the codepoints in the PUAs here? */
+    /* XXX should we just skip all the code points in the PUAs here? */
     if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code)))
         return 0;
 
@@ -1125,8 +1125,8 @@
     /* check if named sequences are allowed */
     if (!with_named_seq && IS_NAMED_SEQ(cp))
         return 0;
-    /* if the codepoint is in the PUA range that we use for aliases,
-     * convert it to obtain the right codepoint */
+    /* if the code point is in the PUA range that we use for aliases,
+     * convert it to obtain the right code point */
     if (IS_ALIAS(cp))
         *code = name_aliases[cp-aliases_start];
     else
@@ -1138,9 +1138,9 @@
 _getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
          int with_named_seq)
 {
-    /* Return the codepoint associated with the given name.
+    /* Return the code point associated with the given name.
      * Named aliases are resolved too (unless self != NULL (i.e. we are using
-     * 3.2.0)).  If with_named_seq is 1, returns the PUA codepoint that we are
+     * 3.2.0)).  If with_named_seq is 1, returns the PUA code point that we are
      * using for the named sequence, and the caller must then convert it. */
     unsigned int h, v;
     unsigned int mask = code_size-1;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5013,7 +5013,7 @@
         }
 
         if (Py_UNICODE_IS_SURROGATE(ch)) {
-            errmsg = "codepoint in surrogate code point range(0xd800, 0xe000)";
+            errmsg = "code point in surrogate code point range(0xd800, 0xe000)";
             startinpos = ((const char *)q) - starts;
             endinpos = startinpos + 4;
         }
@@ -5032,7 +5032,7 @@
                 q += 4;
                 continue;
             }
-            errmsg = "codepoint not in range(0x110000)";
+            errmsg = "code point not in range(0x110000)";
             startinpos = ((const char *)q) - starts;
             endinpos = startinpos + 4;
         }
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1379,7 +1379,7 @@
 implementation -- Python implementation information.\n\
 int_info -- a struct sequence with information about the int implementation.\n\
 maxsize -- the largest supported length of containers.\n\
-maxunicode -- the value of the largest Unicode codepoint\n\
+maxunicode -- the value of the largest Unicode code point\n\
 platform -- platform identifier\n\
 prefix -- prefix used to find the Python library\n\
 thread_info -- a struct sequence with information about the thread implementation.\n\
diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py
--- a/Tools/unicode/gencodec.py
+++ b/Tools/unicode/gencodec.py
@@ -34,7 +34,7 @@
 # Standard undefined Unicode code point
 UNI_UNDEFINED = chr(0xFFFE)
 
-# Placeholder for a missing codepoint
+# Placeholder for a missing code point
 MISSING_CODE = -1
 
 mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list