[pypy-commit] pypy py3k: Implement str.isidentifier()

amauryfa noreply at buildbot.pypy.org
Sat Jan 14 21:48:26 CET 2012


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r51318:c863145453cb
Date: 2011-12-22 10:44 +0100
http://bitbucket.org/pypy/pypy/changeset/c863145453cb/

Log:	Implement str.isidentifier()

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -162,6 +162,13 @@
         assert "!Brown Fox".istitle() == True
         assert "Brow&&&&N Fox".istitle() == True
         assert "!Brow&&&&n Fox".istitle() == False
+
+    def test_isidentifier(self):
+        assert "".isidentifier() is False
+        assert "a4".isidentifier() is True
+        assert "_var".isidentifier() is True
+        assert "_!var".isidentifier() is False
+        assert "3abc".isidentifier() is False
         
     def test_capitalize(self):
         assert "brown fox".capitalize() == "Brown fox"
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -279,6 +279,27 @@
             previous_is_cased = False
     return space.newbool(cased)
 
+def unicode_isidentifier__Unicode(space, w_unicode):
+    v = w_unicode._value
+    if len(v) == 0:
+        return space.w_False
+
+    # PEP 3131 says that the first character must be in XID_Start and
+    # subsequent characters in XID_Continue, and for the ASCII range,
+    # the 2.x rules apply (i.e start with letters and underscore,
+    # continue with letters, digits, underscore). However, given the
+    # current definition of XID_Start and XID_Continue, it is
+    # sufficient to check just for these, except that _ must be
+    # allowed as starting an identifier.
+    first = v[0]
+    if not (unicodedb.isxidstart(ord(first)) or first == u'_'):
+        return space.w_False
+
+    for i in range(1, len(v)):
+        if not unicodedb.isxidcontinue(ord(v[i])):
+            return space.w_False
+    return space.w_True
+
 def _strip(space, w_self, w_chars, left, right):
     "internal function called by str_xstrip methods"
     u_self = w_self._value
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -107,6 +107,10 @@
                              ' characters in S are uppercase and there is\nat'
                              ' least one cased character in S, False'
                              ' otherwise.')
+unicode_isidentifier = SMM('isidentifier', 1,
+                         doc='S.isidentifier() -> bool\n\nReturn True if S is'
+                             ' a valid identifier according\nto the language'
+                             ' definition.')
 unicode_join       = SMM('join', 2,
                          doc='S.join(sequence) -> unicode\n\nReturn a string'
                              ' which is the concatenation of the strings in'


More information about the pypy-commit mailing list