[Python-checkins] bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
Miss Islington (bot)
webhook-mailer at python.org
Thu Apr 19 02:49:18 EDT 2018
https://github.com/python/cpython/commit/a4fb580f701df5bf07ce569a4f43abfb05c92759
commit: a4fb580f701df5bf07ce569a4f43abfb05c92759
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2018-04-18T23:49:15-07:00
summary:
bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
as docstrings and translatable strings, and rejects
bytes literals and f-string expressions.
(cherry picked from commit 69524821a87251b7aee966f6e46b3810ff5aaa64)
Co-authored-by: Serhiy Storchaka <storchaka at gmail.com>
files:
A Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst
M Lib/test/test_tools/test_i18n.py
M Tools/i18n/pygettext.py
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 56a273429898..8b2b90d6142b 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -3,7 +3,7 @@
import os
import sys
import unittest
-import textwrap
+from textwrap import dedent
from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
@@ -109,9 +109,68 @@ def test_POT_Creation_Date(self):
# This will raise if the date format does not exactly match.
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
+ def test_funcdocstring(self):
+ for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
+ with self.subTest(doc):
+ msgids = self.extract_docstrings_from_str(dedent('''\
+ def foo(bar):
+ %s
+ ''' % doc))
+ self.assertIn('doc', msgids)
+
+ def test_funcdocstring_bytes(self):
+ msgids = self.extract_docstrings_from_str(dedent('''\
+ def foo(bar):
+ b"""doc"""
+ '''))
+ self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+ def test_funcdocstring_fstring(self):
+ msgids = self.extract_docstrings_from_str(dedent('''\
+ def foo(bar):
+ f"""doc"""
+ '''))
+ self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+ def test_classdocstring(self):
+ for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
+ with self.subTest(doc):
+ msgids = self.extract_docstrings_from_str(dedent('''\
+ class C:
+ %s
+ ''' % doc))
+ self.assertIn('doc', msgids)
+
+ def test_classdocstring_bytes(self):
+ msgids = self.extract_docstrings_from_str(dedent('''\
+ class C:
+ b"""doc"""
+ '''))
+ self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+ def test_classdocstring_fstring(self):
+ msgids = self.extract_docstrings_from_str(dedent('''\
+ class C:
+ f"""doc"""
+ '''))
+ self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+ def test_msgid(self):
+ msgids = self.extract_docstrings_from_str(
+ '''_("""doc""" r'str' u"ing")''')
+ self.assertIn('docstring', msgids)
+
+ def test_msgid_bytes(self):
+ msgids = self.extract_docstrings_from_str('_(b"""doc""")')
+ self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+ def test_msgid_fstring(self):
+ msgids = self.extract_docstrings_from_str('_(f"""doc""")')
+ self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
def test_funcdocstring_annotated_args(self):
""" Test docstrings for functions with annotated args """
- msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+ msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar: str):
"""doc"""
'''))
@@ -119,7 +178,7 @@ def foo(bar: str):
def test_funcdocstring_annotated_return(self):
""" Test docstrings for functions with annotated return type """
- msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+ msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar) -> str:
"""doc"""
'''))
@@ -127,7 +186,7 @@ def foo(bar) -> str:
def test_funcdocstring_defvalue_args(self):
""" Test docstring for functions with default arg values """
- msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+ msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar=()):
"""doc"""
'''))
@@ -137,7 +196,7 @@ def test_funcdocstring_multiple_funcs(self):
""" Test docstring extraction for multiple functions combining
annotated args, annotated return types and default arg values
"""
- msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+ msgids = self.extract_docstrings_from_str(dedent('''\
def foo1(bar: tuple=()) -> str:
"""doc1"""
@@ -155,7 +214,7 @@ def test_classdocstring_early_colon(self):
""" Test docstring extraction for a class with colons occuring within
the parentheses.
"""
- msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+ msgids = self.extract_docstrings_from_str(dedent('''\
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
"""doc"""
'''))
diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst b/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst
new file mode 100644
index 000000000000..4d4137240e61
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst
@@ -0,0 +1,2 @@
+:program:`pygettext.py` now recognizes only literal strings as docstrings
+and translatable strings, and rejects bytes literals and f-string expressions.
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 13d7a649aec4..b46dd339736f 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
return ''.join(escapes[b] for b in s.encode(encoding))
+def is_literal_string(s):
+ return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
+
+
def safe_eval(s):
# unwrap quotes, safely
return eval(s, {'__builtins__':{}}, {})
@@ -317,8 +321,8 @@ def __init__(self, options):
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
-## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
-## 'tstring:', tstring
+## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
+## file=sys.stderr)
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
@@ -327,7 +331,7 @@ def __waiting(self, ttype, tstring, lineno):
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring?
if self.__freshmodule:
- if ttype == tokenize.STRING:
+ if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -353,7 +357,7 @@ def __suiteseen(self, ttype, tstring, lineno):
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
- if ttype == tokenize.STRING:
+ if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -378,7 +382,7 @@ def __openseen(self, ttype, tstring, lineno):
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
- elif ttype == tokenize.STRING:
+ elif ttype == tokenize.STRING and is_literal_string(tstring):
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
More information about the Python-checkins
mailing list