[Python-checkins] gh-69093: Support basic incremental I/O to blobs in `sqlite3` (GH-30680)

JelleZijlstra webhook-mailer at python.org
Thu Apr 14 20:03:03 EDT 2022


https://github.com/python/cpython/commit/ee475430d431814cbb6eb5e8a6c0ae51943349d4
commit: ee475430d431814cbb6eb5e8a6c0ae51943349d4
branch: main
author: Erlend Egeberg Aasland <erlend.aasland at innova.no>
committer: JelleZijlstra <jelle.zijlstra at gmail.com>
date: 2022-04-14T17:02:56-07:00
summary:

gh-69093: Support basic incremental I/O to blobs in `sqlite3` (GH-30680)

Authored-by: Aviv Palivoda <palaviv at gmail.com>
Co-authored-by: Erlend E. Aasland <erlend.aasland at innova.no>
Co-authored-by: palaviv <palaviv at gmail.com>
Co-authored-by: Kumar Aditya <59607654+kumaraditya303 at users.noreply.github.com>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra at gmail.com>

files:
A Doc/includes/sqlite3/blob.py
A Misc/NEWS.d/next/Library/2018-04-18-16-15-55.bpo-24905.jYqjYx.rst
A Modules/_sqlite/blob.c
A Modules/_sqlite/blob.h
A Modules/_sqlite/clinic/blob.c.h
M Doc/library/sqlite3.rst
M Doc/whatsnew/3.11.rst
M Lib/test/test_sqlite3/test_dbapi.py
M Modules/_sqlite/clinic/connection.c.h
M Modules/_sqlite/connection.c
M Modules/_sqlite/connection.h
M Modules/_sqlite/module.c
M Modules/_sqlite/module.h
M PCbuild/_sqlite3.vcxproj
M PCbuild/_sqlite3.vcxproj.filters
M setup.py

diff --git a/Doc/includes/sqlite3/blob.py b/Doc/includes/sqlite3/blob.py
new file mode 100644
index 0000000000000..61994fb82dd72
--- /dev/null
+++ b/Doc/includes/sqlite3/blob.py
@@ -0,0 +1,12 @@
+import sqlite3
+
+con = sqlite3.connect(":memory:")
+con.execute("create table test(blob_col blob)")
+con.execute("insert into test(blob_col) values (zeroblob(10))")
+
+blob = con.blobopen("test", "blob_col", 1)
+blob.write(b"Hello")
+blob.write(b"World")
+blob.seek(0)
+print(blob.read())  # will print b"HelloWorld"
+blob.close()
diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index 60dfbefd2e255..d0274fb79744d 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -394,6 +394,20 @@ Connection Objects
       supplied, this must be a callable returning an instance of :class:`Cursor`
       or its subclasses.
 
+   .. method:: blobopen(table, column, row, /, *, readonly=False, name="main")
+
+      Open a :class:`Blob` handle to the :abbr:`BLOB (Binary Large OBject)`
+      located in row *row*, column *column*, table *table* of database *name*.
+      When *readonly* is :const:`True` the blob is opened without write
+      permissions.
+
+      .. note::
+
+         The blob size cannot be changed using the :class:`Blob` class.
+         Use the SQL function ``zeroblob`` to create a blob with a fixed size.
+
+      .. versionadded:: 3.11
+
    .. method:: commit()
 
       This method commits the current transaction. If you don't call this method,
@@ -1088,6 +1102,58 @@ Exceptions
    transactions turned off.  It is a subclass of :exc:`DatabaseError`.
 
 
+.. _sqlite3-blob-objects:
+
+Blob Objects
+------------
+
+.. versionadded:: 3.11
+
+.. class:: Blob
+
+   A :class:`Blob` instance is a :term:`file-like object` that can read and write
+   data in an SQLite :abbr:`BLOB (Binary Large OBject)`.  Call ``len(blob)`` to
+   get the size (number of bytes) of the blob.
+
+   .. method:: close()
+
+      Close the blob.
+
+      The blob will be unusable from this point onward.  An
+      :class:`~sqlite3.Error` (or subclass) exception will be raised if any
+      further operation is attempted with the blob.
+
+   .. method:: read(length=-1, /)
+
+      Read *length* bytes of data from the blob at the current offset position.
+      If the end of the blob is reached, the data up to
+      :abbr:`EOF (End of File)` will be returned.  When *length* is not
+      specified, or is negative, :meth:`~Blob.read` will read until the end of
+      the blob.
+
+   .. method:: write(data, /)
+
+      Write *data* to the blob at the current offset.  This function cannot
+      change the blob length.  Writing beyond the end of the blob will raise
+      :exc:`ValueError`.
+
+   .. method:: tell()
+
+      Return the current access position of the blob.
+
+   .. method:: seek(offset, origin=os.SEEK_SET, /)
+
+      Set the current access position of the blob to *offset*.  The *origin*
+      argument defaults to :data:`os.SEEK_SET` (absolute blob positioning).
+      Other values for *origin* are :data:`os.SEEK_CUR` (seek relative to the
+      current position) and :data:`os.SEEK_END` (seek relative to the blob’s
+      end).
+
+   :class:`Blob` example:
+
+      .. literalinclude:: ../includes/sqlite3/blob.py
+
+
 .. _sqlite3-types:
 
 SQLite and Python types
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index f8e86f6ba349b..dba554cc834ec 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -393,6 +393,10 @@ sqlite3
   :class:`sqlite3.Connection` for creating aggregate window functions.
   (Contributed by Erlend E. Aasland in :issue:`34916`.)
 
+* Add :meth:`~sqlite3.Connection.blobopen` to :class:`sqlite3.Connection`.
+  :class:`sqlite3.Blob` allows incremental I/O operations on blobs.
+  (Contributed by Aviv Palivoda and Erlend E. Aasland in :issue:`24905`)
+
 
 sys
 ---
diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py
index 2d2e58a3d44f5..faaa3713cb510 100644
--- a/Lib/test/test_sqlite3/test_dbapi.py
+++ b/Lib/test/test_sqlite3/test_dbapi.py
@@ -33,6 +33,8 @@
     check_disallow_instantiation,
     threading_helper,
 )
+from _testcapi import INT_MAX
+from os import SEEK_SET, SEEK_CUR, SEEK_END
 from test.support.os_helper import TESTFN, unlink, temp_dir
 
 
@@ -1041,11 +1043,163 @@ def test_same_query_in_multiple_cursors(self):
             self.assertEqual(cu.fetchall(), [(1,)])
 
 
+class BlobTests(unittest.TestCase):
+    def setUp(self):
+        self.cx = sqlite.connect(":memory:")
+        self.cx.execute("create table test(b blob)")
+        self.data = b"this blob data string is exactly fifty bytes long!"
+        self.cx.execute("insert into test(b) values (?)", (self.data,))
+        self.blob = self.cx.blobopen("test", "b", 1)
+
+    def tearDown(self):
+        self.blob.close()
+        self.cx.close()
+
+    def test_blob_seek_and_tell(self):
+        self.blob.seek(10)
+        self.assertEqual(self.blob.tell(), 10)
+
+        self.blob.seek(10, SEEK_SET)
+        self.assertEqual(self.blob.tell(), 10)
+
+        self.blob.seek(10, SEEK_CUR)
+        self.assertEqual(self.blob.tell(), 20)
+
+        self.blob.seek(-10, SEEK_END)
+        self.assertEqual(self.blob.tell(), 40)
+
+    def test_blob_seek_error(self):
+        msg_oor = "offset out of blob range"
+        msg_orig = "'origin' should be os.SEEK_SET, os.SEEK_CUR, or os.SEEK_END"
+        msg_of = "seek offset results in overflow"
+
+        dataset = (
+            (ValueError, msg_oor, lambda: self.blob.seek(1000)),
+            (ValueError, msg_oor, lambda: self.blob.seek(-10)),
+            (ValueError, msg_orig, lambda: self.blob.seek(10, -1)),
+            (ValueError, msg_orig, lambda: self.blob.seek(10, 3)),
+        )
+        for exc, msg, fn in dataset:
+            with self.subTest(exc=exc, msg=msg, fn=fn):
+                self.assertRaisesRegex(exc, msg, fn)
+
+        # Force overflow errors
+        self.blob.seek(1, SEEK_SET)
+        with self.assertRaisesRegex(OverflowError, msg_of):
+            self.blob.seek(INT_MAX, SEEK_CUR)
+        with self.assertRaisesRegex(OverflowError, msg_of):
+            self.blob.seek(INT_MAX, SEEK_END)
+
+    def test_blob_read(self):
+        buf = self.blob.read()
+        self.assertEqual(buf, self.data)
+
+    def test_blob_read_oversized(self):
+        buf = self.blob.read(len(self.data) * 2)
+        self.assertEqual(buf, self.data)
+
+    def test_blob_read_advance_offset(self):
+        n = 10
+        buf = self.blob.read(n)
+        self.assertEqual(buf, self.data[:n])
+        self.assertEqual(self.blob.tell(), n)
+
+    def test_blob_read_at_offset(self):
+        self.blob.seek(10)
+        self.assertEqual(self.blob.read(10), self.data[10:20])
+
+    def test_blob_read_error_row_changed(self):
+        self.cx.execute("update test set b='aaaa' where rowid=1")
+        with self.assertRaises(sqlite.OperationalError):
+            self.blob.read()
+
+    def test_blob_write(self):
+        new_data = b"new data".ljust(50)
+        self.blob.write(new_data)
+        row = self.cx.execute("select b from test").fetchone()
+        self.assertEqual(row[0], new_data)
+
+    def test_blob_write_at_offset(self):
+        new_data = b"c" * 25
+        self.blob.seek(25)
+        self.blob.write(new_data)
+        row = self.cx.execute("select b from test").fetchone()
+        self.assertEqual(row[0], self.data[:25] + new_data)
+
+    def test_blob_write_advance_offset(self):
+        self.blob.write(b"d"*10)
+        self.assertEqual(self.blob.tell(), 10)
+
+    def test_blob_write_error_length(self):
+        with self.assertRaisesRegex(ValueError, "data longer than blob"):
+            self.blob.write(b"a" * 1000)
+
+    def test_blob_write_error_row_changed(self):
+        self.cx.execute("update test set b='aaaa' where rowid=1")
+        with self.assertRaises(sqlite.OperationalError):
+            self.blob.write(b"aaa")
+
+    def test_blob_write_error_readonly(self):
+        ro_blob = self.cx.blobopen("test", "b", 1, readonly=True)
+        with self.assertRaisesRegex(sqlite.OperationalError, "readonly"):
+            ro_blob.write(b"aaa")
+        ro_blob.close()
+
+    def test_blob_open_error(self):
+        dataset = (
+            (("test", "b", 1), {"name": "notexisting"}),
+            (("notexisting", "b", 1), {}),
+            (("test", "notexisting", 1), {}),
+            (("test", "b", 2), {}),
+        )
+        regex = "no such"
+        for args, kwds in dataset:
+            with self.subTest(args=args, kwds=kwds):
+                with self.assertRaisesRegex(sqlite.OperationalError, regex):
+                    self.cx.blobopen(*args, **kwds)
+
+    def test_blob_sequence_not_supported(self):
+        with self.assertRaises(TypeError):
+            self.blob + self.blob
+        with self.assertRaises(TypeError):
+            self.blob * 5
+        with self.assertRaises(TypeError):
+            b"a" in self.blob
+
+    def test_blob_closed(self):
+        with memory_database() as cx:
+            cx.execute("create table test(b blob)")
+            cx.execute("insert into test values (zeroblob(100))")
+            blob = cx.blobopen("test", "b", 1)
+            blob.close()
+
+            msg = "Cannot operate on a closed blob"
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob.read()
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob.write(b"")
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob.seek(0)
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob.tell()
+
+    def test_blob_closed_db_read(self):
+        with memory_database() as cx:
+            cx.execute("create table test(b blob)")
+            cx.execute("insert into test(b) values (zeroblob(100))")
+            blob = cx.blobopen("test", "b", 1)
+            cx.close()
+            self.assertRaisesRegex(sqlite.ProgrammingError,
+                                   "Cannot operate on a closed database",
+                                   blob.read)
+
+
 class ThreadTests(unittest.TestCase):
     def setUp(self):
         self.con = sqlite.connect(":memory:")
         self.cur = self.con.cursor()
-        self.cur.execute("create table test(name text)")
+        self.cur.execute("create table test(name text, b blob)")
+        self.cur.execute("insert into test values('blob', zeroblob(1))")
 
     def tearDown(self):
         self.cur.close()
@@ -1080,6 +1234,7 @@ def test_check_connection_thread(self):
             lambda: self.con.create_collation("foo", None),
             lambda: self.con.setlimit(sqlite.SQLITE_LIMIT_LENGTH, -1),
             lambda: self.con.getlimit(sqlite.SQLITE_LIMIT_LENGTH),
+            lambda: self.con.blobopen("test", "b", 1),
         ]
         if hasattr(sqlite.Connection, "serialize"):
             fns.append(lambda: self.con.serialize())
diff --git a/Misc/NEWS.d/next/Library/2018-04-18-16-15-55.bpo-24905.jYqjYx.rst b/Misc/NEWS.d/next/Library/2018-04-18-16-15-55.bpo-24905.jYqjYx.rst
new file mode 100644
index 0000000000000..0a57f90c12378
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-04-18-16-15-55.bpo-24905.jYqjYx.rst
@@ -0,0 +1,3 @@
+Add :meth:`~sqlite3.Connection.blobopen` to :class:`sqlite3.Connection`.
+:class:`sqlite3.Blob` allows incremental I/O operations on blobs.
+Patch by Aviv Palivoda and Erlend E. Aasland.
diff --git a/Modules/_sqlite/blob.c b/Modules/_sqlite/blob.c
new file mode 100644
index 0000000000000..821295cee813f
--- /dev/null
+++ b/Modules/_sqlite/blob.c
@@ -0,0 +1,351 @@
+#include "blob.h"
+#include "util.h"
+
+#define clinic_state() (pysqlite_get_state_by_type(Py_TYPE(self)))
+#include "clinic/blob.c.h"
+#undef clinic_state
+
+/*[clinic input]
+module _sqlite3
+class _sqlite3.Blob "pysqlite_Blob *" "clinic_state()->BlobType"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=908d3e16a45f8da7]*/
+
+static void
+close_blob(pysqlite_Blob *self)
+{
+    if (self->blob) {
+        sqlite3_blob *blob = self->blob;
+        self->blob = NULL;
+
+        Py_BEGIN_ALLOW_THREADS
+        sqlite3_blob_close(blob);
+        Py_END_ALLOW_THREADS
+    }
+}
+
+static int
+blob_traverse(pysqlite_Blob *self, visitproc visit, void *arg)
+{
+    Py_VISIT(Py_TYPE(self));
+    Py_VISIT(self->connection);
+    return 0;
+}
+
+static int
+blob_clear(pysqlite_Blob *self)
+{
+    Py_CLEAR(self->connection);
+    return 0;
+}
+
+static void
+blob_dealloc(pysqlite_Blob *self)
+{
+    PyTypeObject *tp = Py_TYPE(self);
+    PyObject_GC_UnTrack(self);
+
+    close_blob(self);
+
+    if (self->in_weakreflist != NULL) {
+        PyObject_ClearWeakRefs((PyObject*)self);
+    }
+    tp->tp_clear((PyObject *)self);
+    tp->tp_free(self);
+    Py_DECREF(tp);
+}
+
+// Return 1 if the blob object is usable, 0 if not.
+static int
+check_blob(pysqlite_Blob *self)
+{
+    if (!pysqlite_check_connection(self->connection) ||
+        !pysqlite_check_thread(self->connection)) {
+        return 0;
+    }
+    if (self->blob == NULL) {
+        pysqlite_state *state = self->connection->state;
+        PyErr_SetString(state->ProgrammingError,
+                        "Cannot operate on a closed blob.");
+        return 0;
+    }
+    return 1;
+}
+
+
+/*[clinic input]
+_sqlite3.Blob.close as blob_close
+
+Close the blob.
+[clinic start generated code]*/
+
+static PyObject *
+blob_close_impl(pysqlite_Blob *self)
+/*[clinic end generated code: output=848accc20a138d1b input=7bc178a402a40bd8]*/
+{
+    if (!pysqlite_check_connection(self->connection) ||
+        !pysqlite_check_thread(self->connection))
+    {
+        return NULL;
+    }
+    close_blob(self);
+    Py_RETURN_NONE;
+};
+
+void
+pysqlite_close_all_blobs(pysqlite_Connection *self)
+{
+    for (int i = 0; i < PyList_GET_SIZE(self->blobs); i++) {
+        PyObject *weakref = PyList_GET_ITEM(self->blobs, i);
+        PyObject *blob = PyWeakref_GetObject(weakref);
+        if (!Py_IsNone(blob)) {
+            close_blob((pysqlite_Blob *)blob);
+        }
+    }
+}
+
+static void
+blob_seterror(pysqlite_Blob *self, int rc)
+{
+    assert(self->connection != NULL);
+#if SQLITE_VERSION_NUMBER < 3008008
+    // SQLite pre 3.8.8 does not set this blob error on the connection
+    if (rc == SQLITE_ABORT) {
+        PyErr_SetString(self->connection->OperationalError,
+                        "Cannot operate on an expired blob handle");
+        return;
+    }
+#endif
+    _pysqlite_seterror(self->connection->state, self->connection->db);
+}
+
+static PyObject *
+inner_read(pysqlite_Blob *self, int length, int offset)
+{
+    PyObject *buffer = PyBytes_FromStringAndSize(NULL, length);
+    if (buffer == NULL) {
+        return NULL;
+    }
+
+    char *raw_buffer = PyBytes_AS_STRING(buffer);
+    int rc;
+    Py_BEGIN_ALLOW_THREADS
+    rc = sqlite3_blob_read(self->blob, raw_buffer, length, offset);
+    Py_END_ALLOW_THREADS
+
+    if (rc != SQLITE_OK) {
+        Py_DECREF(buffer);
+        blob_seterror(self, rc);
+        return NULL;
+    }
+    return buffer;
+}
+
+
+/*[clinic input]
+_sqlite3.Blob.read as blob_read
+
+    length: int = -1
+        Read length in bytes.
+    /
+
+Read data at the current offset position.
+
+If the end of the blob is reached, the data up to end of file will be returned.
+When length is not specified, or is negative, Blob.read() will read until the
+end of the blob.
+[clinic start generated code]*/
+
+static PyObject *
+blob_read_impl(pysqlite_Blob *self, int length)
+/*[clinic end generated code: output=1fc99b2541360dde input=f2e4aa4378837250]*/
+{
+    if (!check_blob(self)) {
+        return NULL;
+    }
+
+    /* Make sure we never read past "EOB". Also read the rest of the blob if a
+     * negative length is specified. */
+    int blob_len = sqlite3_blob_bytes(self->blob);
+    int max_read_len = blob_len - self->offset;
+    if (length < 0 || length > max_read_len) {
+        length = max_read_len;
+    }
+
+    PyObject *buffer = inner_read(self, length, self->offset);
+    if (buffer == NULL) {
+        return NULL;
+    }
+    self->offset += length;
+    return buffer;
+};
+
+static int
+inner_write(pysqlite_Blob *self, const void *buf, Py_ssize_t len, int offset)
+{
+    int remaining_len = sqlite3_blob_bytes(self->blob) - self->offset;
+    if (len > remaining_len) {
+        PyErr_SetString(PyExc_ValueError, "data longer than blob length");
+        return -1;
+    }
+
+    int rc;
+    Py_BEGIN_ALLOW_THREADS
+    rc = sqlite3_blob_write(self->blob, buf, (int)len, offset);
+    Py_END_ALLOW_THREADS
+
+    if (rc != SQLITE_OK) {
+        blob_seterror(self, rc);
+        return -1;
+    }
+    return 0;
+}
+
+
+/*[clinic input]
+_sqlite3.Blob.write as blob_write
+
+    data: Py_buffer
+    /
+
+Write data at the current offset.
+
+This function cannot change the blob length.  Writing beyond the end of the
+blob will result in an exception being raised.
+[clinic start generated code]*/
+
+static PyObject *
+blob_write_impl(pysqlite_Blob *self, Py_buffer *data)
+/*[clinic end generated code: output=b34cf22601b570b2 input=a84712f24a028e6d]*/
+{
+    if (!check_blob(self)) {
+        return NULL;
+    }
+
+    int rc = inner_write(self, data->buf, data->len, self->offset);
+    if (rc < 0) {
+        return NULL;
+    }
+    self->offset += (int)data->len;
+    Py_RETURN_NONE;
+}
+
+
+/*[clinic input]
+_sqlite3.Blob.seek as blob_seek
+
+    offset: int
+    origin: int = 0
+    /
+
+Set the current access position to offset.
+
+The origin argument defaults to os.SEEK_SET (absolute blob positioning).
+Other values for origin are os.SEEK_CUR (seek relative to the current position)
+and os.SEEK_END (seek relative to the blob's end).
+[clinic start generated code]*/
+
+static PyObject *
+blob_seek_impl(pysqlite_Blob *self, int offset, int origin)
+/*[clinic end generated code: output=854c5a0e208547a5 input=5da9a07e55fe6bb6]*/
+{
+    if (!check_blob(self)) {
+        return NULL;
+    }
+
+    int blob_len = sqlite3_blob_bytes(self->blob);
+    switch (origin) {
+        case SEEK_SET:
+            break;
+        case SEEK_CUR:
+            if (offset > INT_MAX - self->offset) {
+                goto overflow;
+            }
+            offset += self->offset;
+            break;
+        case SEEK_END:
+            if (offset > INT_MAX - blob_len) {
+                goto overflow;
+            }
+            offset += blob_len;
+            break;
+        default:
+            PyErr_SetString(PyExc_ValueError,
+                            "'origin' should be os.SEEK_SET, os.SEEK_CUR, or "
+                            "os.SEEK_END");
+            return NULL;
+    }
+
+    if (offset < 0 || offset > blob_len) {
+        PyErr_SetString(PyExc_ValueError, "offset out of blob range");
+        return NULL;
+    }
+
+    self->offset = offset;
+    Py_RETURN_NONE;
+
+overflow:
+    PyErr_SetString(PyExc_OverflowError, "seek offset results in overflow");
+    return NULL;
+}
+
+
+/*[clinic input]
+_sqlite3.Blob.tell as blob_tell
+
+Return the current access position for the blob.
+[clinic start generated code]*/
+
+static PyObject *
+blob_tell_impl(pysqlite_Blob *self)
+/*[clinic end generated code: output=3d3ba484a90b3a99 input=7e34057aa303612c]*/
+{
+    if (!check_blob(self)) {
+        return NULL;
+    }
+    return PyLong_FromLong(self->offset);
+}
+
+
+static PyMethodDef blob_methods[] = {
+    BLOB_CLOSE_METHODDEF
+    BLOB_READ_METHODDEF
+    BLOB_SEEK_METHODDEF
+    BLOB_TELL_METHODDEF
+    BLOB_WRITE_METHODDEF
+    {NULL, NULL}
+};
+
+static struct PyMemberDef blob_members[] = {
+    {"__weaklistoffset__", T_PYSSIZET, offsetof(pysqlite_Blob, in_weakreflist), READONLY},
+    {NULL},
+};
+
+static PyType_Slot blob_slots[] = {
+    {Py_tp_dealloc, blob_dealloc},
+    {Py_tp_traverse, blob_traverse},
+    {Py_tp_clear, blob_clear},
+    {Py_tp_methods, blob_methods},
+    {Py_tp_members, blob_members},
+    {0, NULL},
+};
+
+static PyType_Spec blob_spec = {
+    .name = MODULE_NAME ".Blob",
+    .basicsize = sizeof(pysqlite_Blob),
+    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+              Py_TPFLAGS_IMMUTABLETYPE),
+    .slots = blob_slots,
+};
+
+int
+pysqlite_blob_setup_types(PyObject *mod)
+{
+    PyObject *type = PyType_FromModuleAndSpec(mod, &blob_spec, NULL);
+    if (type == NULL) {
+        return -1;
+    }
+    pysqlite_state *state = pysqlite_get_state(mod);
+    state->BlobType = (PyTypeObject *)type;
+    return 0;
+}
diff --git a/Modules/_sqlite/blob.h b/Modules/_sqlite/blob.h
new file mode 100644
index 0000000000000..418ca03bdb51d
--- /dev/null
+++ b/Modules/_sqlite/blob.h
@@ -0,0 +1,24 @@
+#ifndef PYSQLITE_BLOB_H
+#define PYSQLITE_BLOB_H
+
+#include "Python.h"
+#include "sqlite3.h"
+#include "connection.h"
+
+#define BLOB_SEEK_START 0
+#define BLOB_SEEK_CUR   1
+#define BLOB_SEEK_END   2
+
+typedef struct {
+    PyObject_HEAD
+    pysqlite_Connection *connection;
+    sqlite3_blob *blob;
+    int offset;
+
+    PyObject *in_weakreflist;
+} pysqlite_Blob;
+
+int pysqlite_blob_setup_types(PyObject *mod);
+void pysqlite_close_all_blobs(pysqlite_Connection *self);
+
+#endif
diff --git a/Modules/_sqlite/clinic/blob.c.h b/Modules/_sqlite/clinic/blob.c.h
new file mode 100644
index 0000000000000..30b3e3c194739
--- /dev/null
+++ b/Modules/_sqlite/clinic/blob.c.h
@@ -0,0 +1,165 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+PyDoc_STRVAR(blob_close__doc__,
+"close($self, /)\n"
+"--\n"
+"\n"
+"Close the blob.");
+
+#define BLOB_CLOSE_METHODDEF    \
+    {"close", (PyCFunction)blob_close, METH_NOARGS, blob_close__doc__},
+
+static PyObject *
+blob_close_impl(pysqlite_Blob *self);
+
+static PyObject *
+blob_close(pysqlite_Blob *self, PyObject *Py_UNUSED(ignored))
+{
+    return blob_close_impl(self);
+}
+
+PyDoc_STRVAR(blob_read__doc__,
+"read($self, length=-1, /)\n"
+"--\n"
+"\n"
+"Read data at the current offset position.\n"
+"\n"
+"  length\n"
+"    Read length in bytes.\n"
+"\n"
+"If the end of the blob is reached, the data up to end of file will be returned.\n"
+"When length is not specified, or is negative, Blob.read() will read until the\n"
+"end of the blob.");
+
+#define BLOB_READ_METHODDEF    \
+    {"read", (PyCFunction)(void(*)(void))blob_read, METH_FASTCALL, blob_read__doc__},
+
+static PyObject *
+blob_read_impl(pysqlite_Blob *self, int length);
+
+static PyObject *
+blob_read(pysqlite_Blob *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    int length = -1;
+
+    if (!_PyArg_CheckPositional("read", nargs, 0, 1)) {
+        goto exit;
+    }
+    if (nargs < 1) {
+        goto skip_optional;
+    }
+    length = _PyLong_AsInt(args[0]);
+    if (length == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+skip_optional:
+    return_value = blob_read_impl(self, length);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(blob_write__doc__,
+"write($self, data, /)\n"
+"--\n"
+"\n"
+"Write data at the current offset.\n"
+"\n"
+"This function cannot change the blob length.  Writing beyond the end of the\n"
+"blob will result in an exception being raised.");
+
+#define BLOB_WRITE_METHODDEF    \
+    {"write", (PyCFunction)blob_write, METH_O, blob_write__doc__},
+
+static PyObject *
+blob_write_impl(pysqlite_Blob *self, Py_buffer *data);
+
+static PyObject *
+blob_write(pysqlite_Blob *self, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    Py_buffer data = {NULL, NULL};
+
+    if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) {
+        goto exit;
+    }
+    if (!PyBuffer_IsContiguous(&data, 'C')) {
+        _PyArg_BadArgument("write", "argument", "contiguous buffer", arg);
+        goto exit;
+    }
+    return_value = blob_write_impl(self, &data);
+
+exit:
+    /* Cleanup for data */
+    if (data.obj) {
+       PyBuffer_Release(&data);
+    }
+
+    return return_value;
+}
+
+PyDoc_STRVAR(blob_seek__doc__,
+"seek($self, offset, origin=0, /)\n"
+"--\n"
+"\n"
+"Set the current access position to offset.\n"
+"\n"
+"The origin argument defaults to os.SEEK_SET (absolute blob positioning).\n"
+"Other values for origin are os.SEEK_CUR (seek relative to the current position)\n"
+"and os.SEEK_END (seek relative to the blob\'s end).");
+
+#define BLOB_SEEK_METHODDEF    \
+    {"seek", (PyCFunction)(void(*)(void))blob_seek, METH_FASTCALL, blob_seek__doc__},
+
+static PyObject *
+blob_seek_impl(pysqlite_Blob *self, int offset, int origin);
+
+static PyObject *
+blob_seek(pysqlite_Blob *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    int offset;
+    int origin = 0;
+
+    if (!_PyArg_CheckPositional("seek", nargs, 1, 2)) {
+        goto exit;
+    }
+    offset = _PyLong_AsInt(args[0]);
+    if (offset == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    origin = _PyLong_AsInt(args[1]);
+    if (origin == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+skip_optional:
+    return_value = blob_seek_impl(self, offset, origin);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(blob_tell__doc__,
+"tell($self, /)\n"
+"--\n"
+"\n"
+"Return the current access position for the blob.");
+
+#define BLOB_TELL_METHODDEF    \
+    {"tell", (PyCFunction)blob_tell, METH_NOARGS, blob_tell__doc__},
+
+static PyObject *
+blob_tell_impl(pysqlite_Blob *self);
+
+static PyObject *
+blob_tell(pysqlite_Blob *self, PyObject *Py_UNUSED(ignored))
+{
+    return blob_tell_impl(self);
+}
+/*[clinic end generated code: output=d3a02b127f2cfa58 input=a9049054013a1b77]*/
diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h
index 2b933f8522465..d4597086f4c9a 100644
--- a/Modules/_sqlite/clinic/connection.c.h
+++ b/Modules/_sqlite/clinic/connection.c.h
@@ -145,6 +145,110 @@ pysqlite_connection_cursor(pysqlite_Connection *self, PyObject *const *args, Py_
     return return_value;
 }
 
+PyDoc_STRVAR(blobopen__doc__,
+"blobopen($self, table, column, row, /, *, readonly=False, name=\'main\')\n"
+"--\n"
+"\n"
+"Open and return a BLOB object.\n"
+"\n"
+"  table\n"
+"    Table name.\n"
+"  column\n"
+"    Column name.\n"
+"  row\n"
+"    Row index.\n"
+"  readonly\n"
+"    Open the BLOB without write permissions.\n"
+"  name\n"
+"    Database name.");
+
+#define BLOBOPEN_METHODDEF    \
+    {"blobopen", (PyCFunction)(void(*)(void))blobopen, METH_FASTCALL|METH_KEYWORDS, blobopen__doc__},
+
+static PyObject *
+blobopen_impl(pysqlite_Connection *self, const char *table, const char *col,
+              int row, int readonly, const char *name);
+
+static PyObject *
+blobopen(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"", "", "", "readonly", "name", NULL};
+    static _PyArg_Parser _parser = {NULL, _keywords, "blobopen", 0};
+    PyObject *argsbuf[5];
+    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3;
+    const char *table;
+    const char *col;
+    int row;
+    int readonly = 0;
+    const char *name = "main";
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 3, 3, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("blobopen", "argument 1", "str", args[0]);
+        goto exit;
+    }
+    Py_ssize_t table_length;
+    table = PyUnicode_AsUTF8AndSize(args[0], &table_length);
+    if (table == NULL) {
+        goto exit;
+    }
+    if (strlen(table) != (size_t)table_length) {
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[1])) {
+        _PyArg_BadArgument("blobopen", "argument 2", "str", args[1]);
+        goto exit;
+    }
+    Py_ssize_t col_length;
+    col = PyUnicode_AsUTF8AndSize(args[1], &col_length);
+    if (col == NULL) {
+        goto exit;
+    }
+    if (strlen(col) != (size_t)col_length) {
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        goto exit;
+    }
+    row = _PyLong_AsInt(args[2]);
+    if (row == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_kwonly;
+    }
+    if (args[3]) {
+        readonly = _PyLong_AsInt(args[3]);
+        if (readonly == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (!PyUnicode_Check(args[4])) {
+        _PyArg_BadArgument("blobopen", "argument 'name'", "str", args[4]);
+        goto exit;
+    }
+    Py_ssize_t name_length;
+    name = PyUnicode_AsUTF8AndSize(args[4], &name_length);
+    if (name == NULL) {
+        goto exit;
+    }
+    if (strlen(name) != (size_t)name_length) {
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        goto exit;
+    }
+skip_optional_kwonly:
+    return_value = blobopen_impl(self, table, col, row, readonly, name);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(pysqlite_connection_close__doc__,
 "close($self, /)\n"
 "--\n"
@@ -1041,4 +1145,4 @@ getlimit(pysqlite_Connection *self, PyObject *arg)
 #ifndef DESERIALIZE_METHODDEF
     #define DESERIALIZE_METHODDEF
 #endif /* !defined(DESERIALIZE_METHODDEF) */
-/*[clinic end generated code: output=b9af1b52fda808bf input=a9049054013a1b77]*/
+/*[clinic end generated code: output=be2f526e78fa65b1 input=a9049054013a1b77]*/
diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c
index d7c0a9e46161c..85fb128fc7f1c 100644
--- a/Modules/_sqlite/connection.c
+++ b/Modules/_sqlite/connection.c
@@ -26,6 +26,7 @@
 #include "connection.h"
 #include "statement.h"
 #include "cursor.h"
+#include "blob.h"
 #include "prepare_protocol.h"
 #include "util.h"
 
@@ -234,10 +235,17 @@ pysqlite_connection_init_impl(pysqlite_Connection *self,
         return -1;
     }
 
-    // Create list of weak references to cursors.
+    /* Create lists of weak references to cursors and blobs */
     PyObject *cursors = PyList_New(0);
     if (cursors == NULL) {
-        Py_DECREF(statement_cache);
+        Py_XDECREF(statement_cache);
+        return -1;
+    }
+
+    PyObject *blobs = PyList_New(0);
+    if (blobs == NULL) {
+        Py_XDECREF(statement_cache);
+        Py_XDECREF(cursors);
         return -1;
     }
 
@@ -250,6 +258,7 @@ pysqlite_connection_init_impl(pysqlite_Connection *self,
     self->thread_ident = PyThread_get_thread_ident();
     self->statement_cache = statement_cache;
     self->cursors = cursors;
+    self->blobs = blobs;
     self->created_cursors = 0;
     self->row_factory = Py_NewRef(Py_None);
     self->text_factory = Py_NewRef(&PyUnicode_Type);
@@ -291,6 +300,7 @@ connection_traverse(pysqlite_Connection *self, visitproc visit, void *arg)
     Py_VISIT(Py_TYPE(self));
     Py_VISIT(self->statement_cache);
     Py_VISIT(self->cursors);
+    Py_VISIT(self->blobs);
     Py_VISIT(self->row_factory);
     Py_VISIT(self->text_factory);
     VISIT_CALLBACK_CONTEXT(self->trace_ctx);
@@ -314,6 +324,7 @@ connection_clear(pysqlite_Connection *self)
 {
     Py_CLEAR(self->statement_cache);
     Py_CLEAR(self->cursors);
+    Py_CLEAR(self->blobs);
     Py_CLEAR(self->row_factory);
     Py_CLEAR(self->text_factory);
     clear_callback_context(self->trace_ctx);
@@ -429,6 +440,76 @@ pysqlite_connection_cursor_impl(pysqlite_Connection *self, PyObject *factory)
     return cursor;
 }
 
+/*[clinic input]
+_sqlite3.Connection.blobopen as blobopen
+
+    table: str
+        Table name.
+    column as col: str
+        Column name.
+    row: int
+        Row index.
+    /
+    *
+    readonly: bool(accept={int}) = False
+        Open the BLOB without write permissions.
+    name: str = "main"
+        Database name.
+
+Open and return a BLOB object.
+[clinic start generated code]*/
+
+static PyObject *
+blobopen_impl(pysqlite_Connection *self, const char *table, const char *col,
+              int row, int readonly, const char *name)
+/*[clinic end generated code: output=0c8e2e58516d0b5c input=1e7052516acfc94d]*/
+{
+    if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
+        return NULL;
+    }
+
+    int rc;
+    sqlite3_blob *blob;
+
+    Py_BEGIN_ALLOW_THREADS
+    rc = sqlite3_blob_open(self->db, name, table, col, row, !readonly, &blob);
+    Py_END_ALLOW_THREADS
+
+    if (rc != SQLITE_OK) {
+        _pysqlite_seterror(self->state, self->db);
+        return NULL;
+    }
+
+    pysqlite_Blob *obj = PyObject_GC_New(pysqlite_Blob, self->state->BlobType);
+    if (obj == NULL) {
+        goto error;
+    }
+
+    obj->connection = (pysqlite_Connection *)Py_NewRef(self);
+    obj->blob = blob;
+    obj->offset = 0;
+    obj->in_weakreflist = NULL;
+
+    PyObject_GC_Track(obj);
+
+    // Add our blob to connection blobs list
+    PyObject *weakref = PyWeakref_NewRef((PyObject *)obj, NULL);
+    if (weakref == NULL) {
+        goto error;
+    }
+    rc = PyList_Append(self->blobs, weakref);
+    Py_DECREF(weakref);
+    if (rc < 0) {
+        goto error;
+    }
+
+    return (PyObject *)obj;
+
+error:
+    Py_XDECREF(obj);
+    return NULL;
+}
+
 /*[clinic input]
 _sqlite3.Connection.close as pysqlite_connection_close
 
@@ -451,6 +532,7 @@ pysqlite_connection_close_impl(pysqlite_Connection *self)
         return NULL;
     }
 
+    pysqlite_close_all_blobs(self);
     Py_CLEAR(self->statement_cache);
     connection_close(self);
 
@@ -2257,6 +2339,7 @@ static PyMethodDef connection_methods[] = {
     SERIALIZE_METHODDEF
     DESERIALIZE_METHODDEF
     CREATE_WINDOW_FUNCTION_METHODDEF
+    BLOBOPEN_METHODDEF
     {NULL, NULL}
 };
 
diff --git a/Modules/_sqlite/connection.h b/Modules/_sqlite/connection.h
index 84f1f095cb386..2b946ff3c7369 100644
--- a/Modules/_sqlite/connection.h
+++ b/Modules/_sqlite/connection.h
@@ -63,8 +63,9 @@ typedef struct
 
     PyObject *statement_cache;
 
-    /* Lists of weak references to statements and cursors used within this connection */
-    PyObject* cursors;
+    /* Lists of weak references to cursors and blobs used within this connection */
+    PyObject *cursors;
+    PyObject *blobs;
 
     /* Counters for how many cursors were created in the connection. May be
      * reset to 0 at certain intervals */
diff --git a/Modules/_sqlite/module.c b/Modules/_sqlite/module.c
index ffda836d7a3cc..d355c2be37a2a 100644
--- a/Modules/_sqlite/module.c
+++ b/Modules/_sqlite/module.c
@@ -27,6 +27,7 @@
 #include "prepare_protocol.h"
 #include "microprotocols.h"
 #include "row.h"
+#include "blob.h"
 
 #if SQLITE_VERSION_NUMBER < 3007015
 #error "SQLite 3.7.15 or higher required"
@@ -582,6 +583,7 @@ module_traverse(PyObject *module, visitproc visit, void *arg)
     Py_VISIT(state->Warning);
 
     // Types
+    Py_VISIT(state->BlobType);
     Py_VISIT(state->ConnectionType);
     Py_VISIT(state->CursorType);
     Py_VISIT(state->PrepareProtocolType);
@@ -614,6 +616,7 @@ module_clear(PyObject *module)
     Py_CLEAR(state->Warning);
 
     // Types
+    Py_CLEAR(state->BlobType);
     Py_CLEAR(state->ConnectionType);
     Py_CLEAR(state->CursorType);
     Py_CLEAR(state->PrepareProtocolType);
@@ -687,7 +690,8 @@ module_exec(PyObject *module)
         (pysqlite_cursor_setup_types(module) < 0) ||
         (pysqlite_connection_setup_types(module) < 0) ||
         (pysqlite_statement_setup_types(module) < 0) ||
-        (pysqlite_prepare_protocol_setup_types(module) < 0)
+        (pysqlite_prepare_protocol_setup_types(module) < 0) ||
+        (pysqlite_blob_setup_types(module) < 0)
        ) {
         goto error;
     }
diff --git a/Modules/_sqlite/module.h b/Modules/_sqlite/module.h
index fcea7096924ce..7deba22ffec1b 100644
--- a/Modules/_sqlite/module.h
+++ b/Modules/_sqlite/module.h
@@ -53,6 +53,7 @@ typedef struct {
     int BaseTypeAdapted;
     int enable_callback_tracebacks;
 
+    PyTypeObject *BlobType;
     PyTypeObject *ConnectionType;
     PyTypeObject *CursorType;
     PyTypeObject *PrepareProtocolType;
diff --git a/PCbuild/_sqlite3.vcxproj b/PCbuild/_sqlite3.vcxproj
index 9cff43f73e5be..804aa07367a02 100644
--- a/PCbuild/_sqlite3.vcxproj
+++ b/PCbuild/_sqlite3.vcxproj
@@ -106,6 +106,7 @@
     <ClInclude Include="..\Modules\_sqlite\row.h" />
     <ClInclude Include="..\Modules\_sqlite\statement.h" />
     <ClInclude Include="..\Modules\_sqlite\util.h" />
+    <ClInclude Include="..\Modules\_sqlite\blob.h" />
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\Modules\_sqlite\connection.c" />
@@ -116,6 +117,7 @@
     <ClCompile Include="..\Modules\_sqlite\row.c" />
     <ClCompile Include="..\Modules\_sqlite\statement.c" />
     <ClCompile Include="..\Modules\_sqlite\util.c" />
+    <ClCompile Include="..\Modules\_sqlite\blob.c" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\PC\python_nt.rc" />
diff --git a/PCbuild/_sqlite3.vcxproj.filters b/PCbuild/_sqlite3.vcxproj.filters
index 79fc17b53fb50..f4a265eba7dd8 100644
--- a/PCbuild/_sqlite3.vcxproj.filters
+++ b/PCbuild/_sqlite3.vcxproj.filters
@@ -36,6 +36,9 @@
     <ClInclude Include="..\Modules\_sqlite\util.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\Modules\_sqlite\blob.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\Modules\_sqlite\connection.c">
@@ -62,6 +65,9 @@
     <ClCompile Include="..\Modules\_sqlite\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\Modules\_sqlite\blob.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\PC\python_nt.rc">
diff --git a/setup.py b/setup.py
index 1e1627ec3927d..60a45cf260f6b 100644
--- a/setup.py
+++ b/setup.py
@@ -1256,6 +1256,7 @@ def detect_dbm_gdbm(self):
 
     def detect_sqlite(self):
         sources = [
+            "_sqlite/blob.c",
             "_sqlite/connection.c",
             "_sqlite/cursor.c",
             "_sqlite/microprotocols.c",



More information about the Python-checkins mailing list