[PATCH] A compromise on case

Nick Mathewson nickm at mit.edu
Tue May 23 13:18:25 EDT 2000


"Everybody's always talking about the weather, but nobody seems to do
 anything about it."

So I wanted to see how hard it would be to make Python's error
messages friendlier to case-insensitive newbies, without breaking
existing programs that rely on 'Guido', and 'GUIdo' being different
names.  I decided the best way to do this was to patch the code that
raises NameError and AttrError.

My rationale was this: Guido has said that he's considering
case-insensitivity because beginners often run into problems.  ***My
hypothesis is that their problems aren't due to the language, but due
to the unhelpful error messages it gives.*** After all, who could
learn much from an error message like this:

         Traceback (innermost last):
           File "<stdin>", line 1, in ?
         NameError: X

So I tried to make a quick-and-dirty patch the Python interpreter to
do something more reasonable.  Here's a sample interaction:
[tracebacks omitted, long lines wrapped]

    >>> def square(x):       
    ...     return X*X

    >>> print MAP(square, [1,2,3])
    NameError: No such name as 'MAP'.  Perhaps you meant 'map'?
    >>> print Square(3)
    NameError: No such name as 'Square'.  Perhaps you meant 'square'?
    >>> print square(1)
    NameError: No such name as 'X'.  Perhaps you meant 'x'?

    >>> import string, re
    >>> print string.JOIN(['a', 'b', 'c'])
    AttributeError: JOIN.  Perhaps you meant 'join'?
    >>> x = re.compile('[Py]thon')
    >>> print x.CODE
    AttributeError: 'RegexObject' instance has no attribute 'CODE'.  
    Perhaps you meant 'code'?
    >>> print x.Match('Python')
    AttributeError: 'RegexObject' instance has no attribute 'Match'.  
    Perhaps you meant 'match'?
    
With this kind of error message, newbies can learn to code better, but
without 

Of course, our powers are not what they could be:
    >>> print string.combine(['a', 'b', 'c'])
    AttributeError: combine

=====

Naturally, this patch has some problems.  After all, it's only the
result of a few hours hacking, by a guy who doesn't know Python's
internals too well.  Here are its biggest problems:

      * The code isn't particularly smart or clean.  It's probably
        possible to do something simpler with a more generic
        framework.

      * It probably has a few reference leaks.  If it does, they would
        only occur when an error is actually raised.

      * It's a drop in the bucket.  Python's error messages need more
        help than this.

      * It only handles case errors, not other kinds of misspellings.

      * It requires that your system libraries have the function
        'strcasecmp'.  This should be true on most Unix systems IIRC,
        but I have no idea whether it holds true for Windows or Mac.

      * It's against Python 1.6a2.  If you want stability, of course, 
        you shouldn't be applying this at all.

Does Guido read this newsgroup, or just the mailing list?  I'm
curious to know what he would think of this strategy (ignoring my 
wretched code).

=====Begin unified diff
--- Python-1.6a2/Python/ceval.c	Mon Apr 10 08:45:10 2000
+++ casepy/Python/ceval.c	Tue May 23 12:50:21 2000
@@ -94,6 +94,8 @@
 				PyObject *, PyObject *, PyObject *));
 static void reset_exc_info Py_PROTO((PyThreadState *));
 
+static void set_name_error Py_PROTO((PyFrameObject *, PyObject *));
+static void set_attr_error Py_PROTO((PyObject *o, PyObject *attr));
 
 /* Dynamic execution profile */
 #ifdef DYNAMIC_EXECUTION_PROFILE
@@ -1194,8 +1196,9 @@
 						"no locals");
 				break;
 			}
-			if ((err = PyDict_DelItem(x, w)) != 0)
-				PyErr_SetObject(PyExc_NameError, w);
+			if ((err = PyDict_DelItem(x, w)) != 0) {
+				set_name_error(f,w);
+			}
 			break;
 
 #ifdef CASE_TOO_BIG
@@ -1262,6 +1265,8 @@
 			v = POP();
 			err = PyObject_SetAttr(v, w, (PyObject *)NULL);
 							/* del v.w */
+			if (err)
+				set_attr_error(v, w);
 			Py_DECREF(v);
 			break;
 		
@@ -1274,8 +1279,9 @@
 		
 		case DELETE_GLOBAL:
 			w = GETNAMEV(oparg);
-			if ((err = PyDict_DelItem(f->f_globals, w)) != 0)
-				PyErr_SetObject(PyExc_NameError, w);
+			if ((err = PyDict_DelItem(f->f_globals, w)) != 0) {
+				set_name_error(f,w);
+			} 
 			break;
 		
 		case LOAD_CONST:
@@ -1297,8 +1303,7 @@
 				if (x == NULL) {
 					x = PyDict_GetItem(f->f_builtins, w);
 					if (x == NULL) {
-						PyErr_SetObject(
-							PyExc_NameError, w);
+						set_name_error(f,w);
 						break;
 					}
 				}
@@ -1313,7 +1318,7 @@
 			if (x == NULL) {
 				x = PyDict_GetItem(f->f_builtins, w);
 				if (x == NULL) {
-					PyErr_SetObject(PyExc_NameError, w);
+					set_name_error(f,w);
 					break;
 				}
 			}
@@ -1384,6 +1389,8 @@
 			w = GETNAMEV(oparg);
 			v = POP();
 			x = PyObject_GetAttr(v, w);
+			if (x == NULL)
+				set_attr_error(v, w);
 			Py_DECREF(v);
 			PUSH(x);
 			if (x != NULL) continue;
@@ -2925,6 +2932,214 @@
 	return list;
 }
 
+static int
+name_is_similar(n1, n2)
+     char *n1;
+     char *n2;
+{
+#ifdef HAVE_STRCASECMP
+	return n1 && n2 && strcasecmp(n1,n2) == 0;
+#else 
+	return 0;
+#endif
+}
+
+static int
+name_is_similar_to_str(n1, s)
+     char *n1;
+     PyObject *s;
+{
+#ifdef HAVE_STRCASECMP
+	char *n2 = PyString_AsString(s);
+	return n1 && n2 && strcasecmp(n1,n2) == 0;
+#else 
+	return 0;
+#endif
+}
+
+static PyObject* 
+dict_ClosestName(d, s)
+	PyObject *d;
+        char*s;
+{
+	int pos;
+	PyObject *key, *value;
+	
+	if (! d) 
+ 	        return NULL;
+
+	pos = 0;
+
+	while (PyDict_Next(d, &pos, &key, &value)) {
+		if (name_is_similar(PyString_AS_STRING(key), s)) {
+		        Py_INCREF(key);
+			return key;
+		}
+	}
+	return NULL;
+}
+
+static PyObject* 
+sequence_ClosestName(seq, s)
+        PyObject *seq;
+        char* s;
+{
+	PyObject *item;
+	int i;
+
+	if (!seq)
+		return NULL;
+
+	for (i = 0; ; i++) {
+		item = PySequence_GetItem(seq,i);
+		if (item == NULL) {
+			PyErr_Clear();
+			break;
+		}
+		if (name_is_similar_to_str(s, item))
+			return item;
+		Py_DECREF(item);
+	}
+	return NULL;
+}
+
+
+static PyObject*
+object_ClosestAttr(o, s)
+        PyObject *o;
+	char *s;
+{
+	static char *attrlist[] = {"__members__", "__methods__", NULL};
+
+	PyObject *d, *cls, *lst;
+	PyObject *item;
+	int i;
+	char **p;
+
+	if (!o || !s)
+  	        return NULL;
+
+	/* Look for name in o.__dict__ */
+	d = PyObject_GetAttrString(o, "__dict__");
+	if (d == NULL) 
+		PyErr_Clear();
+	else {
+		item = dict_ClosestName(d,s);
+		Py_DECREF(d);
+		if (item)
+			return item;
+	}
+	/* Look for name in o.__class__.__dict__ */
+	cls = PyObject_GetAttrString(o, "__class__");
+	if (cls == NULL) 
+		PyErr_Clear();
+	else {
+		d = PyObject_GetAttrString(cls, "__dict__");
+		Py_DECREF(cls);
+
+		if (d == NULL) 
+			PyErr_Clear();
+		else {
+			item = dict_ClosestName(d,s);
+			Py_DECREF(d);
+			if (item)
+				return item;
+		}
+	}
+
+	/* Look in o.__members__, o.__methods__ */
+	for (p = attrlist; *p != NULL; p++) {
+		lst = PyObject_GetAttrString(o, *p);
+		if (lst == NULL) {
+			PyErr_Clear();
+			break;
+		}
+		item = sequence_ClosestName(lst, s);
+		Py_DECREF(lst);
+		if (item) 
+			return item;
+	}
+	return NULL;
+}
+
+
+static PyObject*
+frame_ClosestName(f, s)
+        PyFrameObject* f;
+	PyObject* s;
+{
+	PyObject* closest;
+	char* str = PyString_AS_STRING(s);
+
+	if (f->f_locals) {
+		closest = dict_ClosestName(f->f_locals, str);
+		if (closest != NULL)
+			return closest;
+	} else if (f->f_code && f->f_code->co_varnames) {
+		closest = sequence_ClosestName(f->f_code->co_varnames);
+		if (closest != NULL)
+			return closest;		
+	}
+		
+	
+	closest = dict_ClosestName(f->f_globals, str);
+	if (closest != NULL)
+		return closest;
+
+	closest = dict_ClosestName(f->f_builtins, str);
+	return closest;
+}
+
+static void
+set_name_error(f, name) 
+        PyFrameObject *f; 
+        PyObject *name;
+{
+	PyObject *closest = frame_ClosestName(f,name);
+
+	if (closest) {		
+		PyErr_Format(PyExc_NameError,
+			     "No such name as '%s'.  Perhaps you meant '%s'?",
+			     PyString_AsString(name),
+			     PyString_AsString(closest));
+		Py_DECREF(closest);
+	} else {
+		PyErr_Format(PyExc_NameError,
+			     "No such name as '%s'.",
+			     PyString_AsString(name));
+	}
+}
+
+static void
+set_attr_error(o, attr) 
+        PyObject *o; 
+        PyObject *attr;
+{
+	PyObject *type, *value, *traceback;
+	PyObject *closest, *result;
+	
+	if (! PyErr_Occurred()) 
+		return;
+
+	PyErr_Fetch(&type, &value, &traceback);
+
+	closest = object_ClosestAttr(o, PyString_AsString(attr));
+
+	if (closest && type && value) {
+		result = PyString_FromString("");
+		PyString_ConcatAndDel(&result, value);
+		PyString_ConcatAndDel(&result,
+			 PyString_FromString(".  Perhaps you meant '"));
+		PyString_ConcatAndDel(&result,closest);
+		PyString_ConcatAndDel(&result,
+			 PyString_FromString("'?"));
+
+		PyErr_Restore(type, result, traceback);
+	} else {
+		PyErr_Restore(type, value, traceback);
+	}
+}
+     
 
 #ifdef DYNAMIC_EXECUTION_PROFILE
 
--- Python-1.6a2/config.h.in	Fri Mar 10 17:30:29 2000
+++ casepy/config.h.in	Tue May 23 12:18:19 2000
@@ -414,6 +414,9 @@
 /* Define if you have the strdup function.  */
 #undef HAVE_STRDUP
 
+/* Define if you have the strcasecmp function.  */
+#undef HAVE_STRCASECMP
+
 /* Define if you have the strerror function.  */
 #undef HAVE_STRERROR
 
--- Python-1.6a2/configure.in	Fri Mar 31 10:15:17 2000
+++ casepy/configure.in	Mon May 22 21:44:40 2000
@@ -760,7 +760,7 @@
  select setgid setlocale setuid setsid setpgid setpgrp setvbuf \
  sigaction siginterrupt sigrelse strftime strptime symlink sysconf \
  tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
- truncate uname waitpid)
+ truncate uname waitpid strcasecmp)
 
 # check for long file support functions
 AC_CHECK_FUNCS(fseek64 fseeko fstatvfs ftell64 ftello statvfs)
--- Python-1.6a2/configure	Fri Mar 31 10:15:16 2000
+++ casepy/configure	Mon May 22 21:56:37 2000
@@ -3484,7 +3484,7 @@
  select setgid setlocale setuid setsid setpgid setpgrp setvbuf \
  sigaction siginterrupt sigrelse strftime strptime symlink sysconf \
  tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
- truncate uname waitpid
+ truncate uname waitpid strcasecmp
 do
 echo $ac_n "checking for $ac_func""... $ac_c" 1>&6
 echo "configure:3491: checking for $ac_func" >&5

=====End unified diff

Now-that-that's-done-lets-figure-out-the-type-class-unification-ly y'rs,

-- 
Nick Mathewson     <nickm at mit.edu>     http://www.mit.edu/~nickm/



More information about the Python-list mailing list