[Python-checkins] bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)
sweeneyde
webhook-mailer at python.org
Tue Mar 1 23:46:34 EST 2022
https://github.com/python/cpython/commit/6ddb09f35b922a3bbb59e408a3ca7636a6938468
commit: 6ddb09f35b922a3bbb59e408a3ca7636a6938468
branch: main
author: Dennis Sweeney <36520290+sweeneyde at users.noreply.github.com>
committer: sweeneyde <36520290+sweeneyde at users.noreply.github.com>
date: 2022-03-01T23:46:30-05:00
summary:
bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)
Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().
files:
A Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst
M Include/cpython/bytesobject.h
M Modules/mmapmodule.c
M Objects/bytesobject.c
diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h
index 6b3f55224fc55..38a0fe0af660f 100644
--- a/Include/cpython/bytesobject.h
+++ b/Include/cpython/bytesobject.h
@@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str,
const void *bytes,
Py_ssize_t size);
+
+/* Substring Search.
+
+ Returns the index of the first occurence of
+ a substring ("needle") in a larger text ("haystack").
+ If the needle is not found, return -1.
+ If the needle is found, add offset to the index.
+*/
+
+PyAPI_FUNC(Py_ssize_t)
+_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset);
+
+/* Same as above, but search right-to-left */
+PyAPI_FUNC(Py_ssize_t)
+_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset);
diff --git a/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst b/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst
new file mode 100644
index 0000000000000..bd20a843ab6ce
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst
@@ -0,0 +1,3 @@
+For performance, use the optimized string-searching implementations
+from :meth:`~bytes.find` and :meth:`~bytes.rfind`
+for :meth:`~mmap.find` and :meth:`~mmap.rfind`.
diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c
index 26cedf1b9006d..6a038e72f93cf 100644
--- a/Modules/mmapmodule.c
+++ b/Modules/mmapmodule.c
@@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
&view, &start, &end)) {
return NULL;
- } else {
- const char *p, *start_p, *end_p;
- int sign = reverse ? -1 : 1;
- const char *needle = view.buf;
- Py_ssize_t len = view.len;
-
+ }
+ else {
if (start < 0)
start += self->size;
if (start < 0)
@@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
else if (end > self->size)
end = self->size;
- start_p = self->data + start;
- end_p = self->data + end;
-
- for (p = (reverse ? end_p - len : start_p);
- (p >= start_p) && (p + len <= end_p); p += sign) {
- Py_ssize_t i;
- for (i = 0; i < len && needle[i] == p[i]; ++i)
- /* nothing */;
- if (i == len) {
- PyBuffer_Release(&view);
- return PyLong_FromSsize_t(p - self->data);
- }
+ Py_ssize_t res;
+ if (reverse) {
+ res = _PyBytes_ReverseFind(
+ self->data + start, end - start,
+ view.buf, view.len, start);
+ }
+ else {
+ res = _PyBytes_Find(
+ self->data + start, end - start,
+ view.buf, view.len, start);
}
PyBuffer_Release(&view);
- return PyLong_FromLong(-1);
+ return PyLong_FromSsize_t(res);
}
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 3d8a21696d1c8..4c67b8f7af213 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
#undef STRINGLIB_GET_EMPTY
+Py_ssize_t
+_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset)
+{
+ return stringlib_find(haystack, len_haystack,
+ needle, len_needle, offset);
+}
+
+Py_ssize_t
+_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset)
+{
+ return stringlib_rfind(haystack, len_haystack,
+ needle, len_needle, offset);
+}
+
PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes)
{
More information about the Python-checkins
mailing list