[Python-checkins] Ensure the str member of the tokenizer is always initialised (GH-29681)

pablogsal webhook-mailer at python.org
Sat Nov 20 21:06:44 EST 2021


https://github.com/python/cpython/commit/4f006a789a35f5d1a7ef142bd1304ce167392457
commit: 4f006a789a35f5d1a7ef142bd1304ce167392457
branch: main
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-11-21T02:06:39Z
summary:

Ensure the str member of the tokenizer is always initialised (GH-29681)

files:
M Parser/pegen_errors.c
M Parser/tokenizer.c
M Parser/tokenizer.h

diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index 6eeab0a97226f..694184a03b075 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -245,7 +245,7 @@ get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
      * (multi-line) statement are stored in p->tok->interactive_src_start.
      * If not, we're parsing from a string, which means that the whole source
      * is stored in p->tok->str. */
-    assert(p->tok->fp == NULL || p->tok->fp == stdin);
+    assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);
 
     char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
     assert(cur_line != NULL);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 69d2c08b43926..6358cdf654e18 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -87,7 +87,7 @@ tok_new(void)
     tok->async_def_indent = 0;
     tok->async_def_nl = 0;
     tok->interactive_underflow = IUNDERFLOW_NORMAL;
-
+    tok->str = NULL;
     return tok;
 }
 
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 1d1cfd639d9d5..0cb665104b2b8 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -71,7 +71,7 @@ struct tok_state {
     PyObject *decoding_readline; /* open(...).readline */
     PyObject *decoding_buffer;
     const char* enc;        /* Encoding for the current str. */
-    char* str;
+    char* str;          /* Source string being tokenized (if tokenizing from a string)*/
     char* input;       /* Tokenizer's newline translated copy of the string. */
 
     int type_comments;      /* Whether to look for type comments */



More information about the Python-checkins mailing list