[Python-checkins] cpython: Issue #13748: Raw bytes literals can now be written with the `rb` prefix as
antoine.pitrou
python-checkins at python.org
Thu Jan 12 22:47:54 CET 2012
http://hg.python.org/cpython/rev/bbed36370b08
changeset: 74354:bbed36370b08
user: Antoine Pitrou <solipsis at pitrou.net>
date: Thu Jan 12 22:46:19 2012 +0100
summary:
Issue #13748: Raw bytes literals can now be written with the `rb` prefix as well as `br`.
files:
Doc/reference/lexical_analysis.rst | 6 ++++-
Lib/test/test_strlit.py | 21 ++++++++++++++---
Lib/test/tokenize_tests.txt | 8 ++++++
Misc/NEWS | 3 ++
Parser/tokenizer.c | 16 +++++++------
Python/ast.c | 19 ++++++++++------
6 files changed, 54 insertions(+), 19 deletions(-)
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -412,7 +412,7 @@
.. productionlist::
bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`)
- bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR"
+ bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"
shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"'
longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""'
shortbytesitem: `shortbyteschar` | `bytesescapeseq`
@@ -446,6 +446,10 @@
literal characters. As a result, in string literals, ``'\U'`` and ``'\u'``
escapes in raw strings are not treated specially.
+ .. versionadded:: 3.3
+ The ``'rb'`` prefix of raw bytes literals has been added as a synonym
+ of ``'br'``.
+
In triple-quoted strings, unescaped newlines and quotes are allowed (and are
retained), except that three unescaped quotes in a row terminate the string. (A
"quote" is the character used to open the string, i.e. either ``'`` or ``"``.)
diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py
--- a/Lib/test/test_strlit.py
+++ b/Lib/test/test_strlit.py
@@ -2,10 +2,10 @@
There are four types of string literals:
- 'abc' -- normal str
- r'abc' -- raw str
- b'xyz' -- normal bytes
- br'xyz' -- raw bytes
+ 'abc' -- normal str
+ r'abc' -- raw str
+ b'xyz' -- normal bytes
+ br'xyz' | rb'xyz' -- raw bytes
The difference between normal and raw strings is of course that in a
raw string, \ escapes (while still used to determine the end of the
@@ -103,12 +103,25 @@
def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x')
+ self.assertEqual(eval(""" rb'x' """), b'x')
self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
+ self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
self.assertEqual(eval(""" br'\x01' """), byte(1))
+ self.assertEqual(eval(""" rb'\x01' """), byte(1))
self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
+ self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
self.assertRaises(SyntaxError, eval, """ br'\x81' """)
+ self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
+ self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
+ self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
+ self.assertRaises(SyntaxError, eval, """ bb'' """)
+ self.assertRaises(SyntaxError, eval, """ rr'' """)
+ self.assertRaises(SyntaxError, eval, """ brr'' """)
+ self.assertRaises(SyntaxError, eval, """ bbr'' """)
+ self.assertRaises(SyntaxError, eval, """ rrb'' """)
+ self.assertRaises(SyntaxError, eval, """ rbb'' """)
def check_encoding(self, encoding, extra=""):
modname = "xx_" + encoding.replace("-", "_")
diff --git a/Lib/test/tokenize_tests.txt b/Lib/test/tokenize_tests.txt
--- a/Lib/test/tokenize_tests.txt
+++ b/Lib/test/tokenize_tests.txt
@@ -114,8 +114,12 @@
y = b"abc" + B"ABC"
x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
+x = rb'abc' + rB'ABC' + Rb'ABC' + RB'ABC'
+y = rb"abc" + rB"ABC" + Rb"ABC" + RB"ABC"
x = br'\\' + BR'\\'
+x = rb'\\' + RB'\\'
x = br'\'' + ''
+x = rb'\'' + ''
y = br'''
foo bar \\
baz''' + BR'''
@@ -124,6 +128,10 @@
bar \\ baz
""" + bR'''spam
'''
+y = rB"""foo
+bar \\ baz
+""" + Rb'''spam
+'''
# Indentation
if 1:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #13748: Raw bytes literals can now be written with the ``rb`` prefix
+ as well as ``br``.
+
- Issue #12736: Use full unicode case mappings for upper, lower, and title case.
- Issue #12760: Add a create mode to open(). Patch by David Townshend.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1412,13 +1412,15 @@
/* Identifier (most frequent token!) */
nonascii = 0;
if (is_potential_identifier_start(c)) {
- /* Process b"", r"" and br"" */
- if (c == 'b' || c == 'B') {
- c = tok_nextc(tok);
- if (c == '"' || c == '\'')
- goto letter_quote;
- }
- if (c == 'r' || c == 'R') {
+ /* Process b"", r"", br"" and rb"" */
+ int saw_b = 0, saw_r = 0;
+ while (1) {
+ if (!saw_b && (c == 'b' || c == 'B'))
+ saw_b = 1;
+ else if (!saw_r && (c == 'r' || c == 'R'))
+ saw_r = 1;
+ else
+ break;
c = tok_nextc(tok);
if (c == '"' || c == '\'')
goto letter_quote;
diff --git a/Python/ast.c b/Python/ast.c
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3744,13 +3744,18 @@
int rawmode = 0;
int need_encoding;
if (isalpha(quote)) {
- if (quote == 'b' || quote == 'B') {
- quote = *++s;
- *bytesmode = 1;
- }
- if (quote == 'r' || quote == 'R') {
- quote = *++s;
- rawmode = 1;
+ while (!*bytesmode || !rawmode) {
+ if (quote == 'b' || quote == 'B') {
+ quote = *++s;
+ *bytesmode = 1;
+ }
+ else if (quote == 'r' || quote == 'R') {
+ quote = *++s;
+ rawmode = 1;
+ }
+ else {
+ break;
+ }
}
}
if (quote != '\'' && quote != '\"') {
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list