[Python-checkins] bpo-44688: Remove ASCII limitation from `sqlite3` collation names (GH-27395)

encukou webhook-mailer at python.org
Thu Jul 29 03:48:16 EDT 2021


https://github.com/python/cpython/commit/5269c091458c5ea76eb625e4fabc9980b6309266
commit: 5269c091458c5ea76eb625e4fabc9980b6309266
branch: main
author: Erlend Egeberg Aasland <erlend.aasland at innova.no>
committer: encukou <encukou at gmail.com>
date: 2021-07-29T09:47:56+02:00
summary:

bpo-44688: Remove ASCII limitation from `sqlite3` collation names (GH-27395)

files:
A Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst
M Doc/library/sqlite3.rst
M Doc/whatsnew/3.11.rst
M Lib/sqlite3/test/hooks.py
M Lib/sqlite3/test/regression.py
M Modules/_sqlite/clinic/connection.c.h
M Modules/_sqlite/connection.c

diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index 05064e4c17fd7..6399bed7ed52c 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -402,6 +402,10 @@ Connection Objects
 
          con.create_collation("reverse", None)
 
+      .. versionchanged:: 3.11
+         The collation name can contain any Unicode character.  Earlier, only
+         ASCII characters were allowed.
+
 
    .. method:: interrupt()
 
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index b29d0cbe1538d..e97162a17dbdc 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -213,6 +213,11 @@ sqlite3
   :meth:`~sqlite3.Connection.set_authorizer`.
   (Contributed by Erlend E. Aasland in :issue:`44491`.)
 
+* Collation name :meth:`~sqlite3.Connection.create_collation` can now
+  contain any Unicode character.  Collation names with invalid characters
+  now raise :exc:`UnicodeEncodeError` instead of :exc:`sqlite3.ProgrammingError`.
+  (Contributed by Erlend E. Aasland in :issue:`44688`.)
+
 
 Removed
 =======
diff --git a/Lib/sqlite3/test/hooks.py b/Lib/sqlite3/test/hooks.py
index 520a5b9f11cd4..1be6d380abd20 100644
--- a/Lib/sqlite3/test/hooks.py
+++ b/Lib/sqlite3/test/hooks.py
@@ -40,8 +40,7 @@ def test_create_collation_not_callable(self):
 
     def test_create_collation_not_ascii(self):
         con = sqlite.connect(":memory:")
-        with self.assertRaises(sqlite.ProgrammingError):
-            con.create_collation("collä", lambda x, y: (x > y) - (x < y))
+        con.create_collation("collä", lambda x, y: (x > y) - (x < y))
 
     def test_create_collation_bad_upper(self):
         class BadUpperStr(str):
diff --git a/Lib/sqlite3/test/regression.py b/Lib/sqlite3/test/regression.py
index 417a53109c87c..6c093d7c2c36e 100644
--- a/Lib/sqlite3/test/regression.py
+++ b/Lib/sqlite3/test/regression.py
@@ -278,7 +278,7 @@ def test_connection_call(self):
     def test_collation(self):
         def collation_cb(a, b):
             return 1
-        self.assertRaises(sqlite.ProgrammingError, self.con.create_collation,
+        self.assertRaises(UnicodeEncodeError, self.con.create_collation,
             # Lone surrogate cannot be encoded to the default encoding (utf8)
             "\uDC80", collation_cb)
 
diff --git a/Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst b/Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst
new file mode 100644
index 0000000000000..15f6a521f2d4f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst
@@ -0,0 +1,2 @@
+:meth:`sqlite3.Connection.create_collation` now accepts non-ASCII collation
+names. Patch by Erlend E. Aasland.
diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h
index ec0a43a17fbe9..1626e1c59c06c 100644
--- a/Modules/_sqlite/clinic/connection.c.h
+++ b/Modules/_sqlite/clinic/connection.c.h
@@ -722,13 +722,14 @@ PyDoc_STRVAR(pysqlite_connection_create_collation__doc__,
 
 static PyObject *
 pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
-                                          PyObject *name, PyObject *callable);
+                                          const char *name,
+                                          PyObject *callable);
 
 static PyObject *
 pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
-    PyObject *name;
+    const char *name;
     PyObject *callable;
 
     if (!_PyArg_CheckPositional("create_collation", nargs, 2, 2)) {
@@ -738,10 +739,15 @@ pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const
         _PyArg_BadArgument("create_collation", "argument 1", "str", args[0]);
         goto exit;
     }
-    if (PyUnicode_READY(args[0]) == -1) {
+    Py_ssize_t name_length;
+    name = PyUnicode_AsUTF8AndSize(args[0], &name_length);
+    if (name == NULL) {
+        goto exit;
+    }
+    if (strlen(name) != (size_t)name_length) {
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
         goto exit;
     }
-    name = args[0];
     callable = args[1];
     return_value = pysqlite_connection_create_collation_impl(self, name, callable);
 
@@ -811,4 +817,4 @@ pysqlite_connection_exit(pysqlite_Connection *self, PyObject *const *args, Py_ss
 #ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
     #define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
 #endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */
-/*[clinic end generated code: output=30f11f2d8f09bdf0 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=a7a899c4e41381ac input=a9049054013a1b77]*/
diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c
index af093c3319c8b..85b666a41958f 100644
--- a/Modules/_sqlite/connection.c
+++ b/Modules/_sqlite/connection.c
@@ -1720,7 +1720,7 @@ pysqlite_connection_backup_impl(pysqlite_Connection *self,
 /*[clinic input]
 _sqlite3.Connection.create_collation as pysqlite_connection_create_collation
 
-    name: unicode
+    name: str
     callback as callable: object
     /
 
@@ -1729,61 +1729,26 @@ Creates a collation function. Non-standard.
 
 static PyObject *
 pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
-                                          PyObject *name, PyObject *callable)
-/*[clinic end generated code: output=0f63b8995565ae22 input=5c3898813a776cf2]*/
+                                          const char *name,
+                                          PyObject *callable)
+/*[clinic end generated code: output=a4ceaff957fdef9a input=301647aab0f2fb1d]*/
 {
-    PyObject* uppercase_name = 0;
-    Py_ssize_t i, len;
-    _Py_IDENTIFIER(upper);
-    const char *uppercase_name_str;
-    int rc;
-    unsigned int kind;
-    const void *data;
-
     if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
-        goto finally;
-    }
-
-    uppercase_name = _PyObject_CallMethodIdOneArg((PyObject *)&PyUnicode_Type,
-                                                  &PyId_upper, name);
-    if (!uppercase_name) {
-        goto finally;
-    }
-
-    if (PyUnicode_READY(uppercase_name))
-        goto finally;
-    len = PyUnicode_GET_LENGTH(uppercase_name);
-    kind = PyUnicode_KIND(uppercase_name);
-    data = PyUnicode_DATA(uppercase_name);
-    for (i=0; i<len; i++) {
-        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        if ((ch >= '0' && ch <= '9')
-         || (ch >= 'A' && ch <= 'Z')
-         || (ch == '_'))
-        {
-            continue;
-        } else {
-            PyErr_SetString(self->ProgrammingError,
-                            "invalid character in collation name");
-            goto finally;
-        }
+        return NULL;
     }
 
-    uppercase_name_str = PyUnicode_AsUTF8(uppercase_name);
-    if (!uppercase_name_str)
-        goto finally;
-
+    int rc;
     int flags = SQLITE_UTF8;
     if (callable == Py_None) {
-        rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags,
+        rc = sqlite3_create_collation_v2(self->db, name, flags,
                                          NULL, NULL, NULL);
     }
     else {
         if (!PyCallable_Check(callable)) {
             PyErr_SetString(PyExc_TypeError, "parameter must be callable");
-            goto finally;
+            return NULL;
         }
-        rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags,
+        rc = sqlite3_create_collation_v2(self->db, name, flags,
                                          Py_NewRef(callable),
                                          &pysqlite_collation_callback,
                                          &_destructor);
@@ -1798,16 +1763,10 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
             Py_DECREF(callable);
         }
         _pysqlite_seterror(self->db);
-        goto finally;
-    }
-
-finally:
-    Py_XDECREF(uppercase_name);
-
-    if (PyErr_Occurred()) {
         return NULL;
     }
-    return Py_NewRef(Py_None);
+
+    Py_RETURN_NONE;
 }
 
 /*[clinic input]



More information about the Python-checkins mailing list