python, perl, lisp, tcl / got a decision to make, can you give me a view point please ? and a joke for python users :)
Marcin 'Qrczak' Kowalczyk
qrczak at knm.org.pl
Sat Mar 24 10:21:48 EST 2001
Ok, it seems that you can't stand Perl's success and everybody here
must from time to time say that Perl is evil :-)
So to make you laugh, here is my favorite Perl silliness
(perl-5.6.0/toke.c). Enjoy the algorithm of disambiguation of the
last case!
/* S_intuit_more
* Returns TRUE if there's more to the expression (e.g., a subscript),
* FALSE otherwise.
*
* It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
*
* ->[ and ->{ return TRUE
* { and [ outside a pattern are always subscripts, so return TRUE
* if we're outside a pattern and it's not { or [, then return FALSE
* if we're in a pattern and the first char is a {
* {4,5} (any digits around the comma) returns FALSE
* if we're in a pattern and the first char is a [
* [] returns FALSE
* [SOMETHING] has a funky algorithm to decide whether it's a
* character class or not. It has to deal with things like
* /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
* anything else returns TRUE
*/
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
STATIC int
S_intuit_more(pTHX_ register char *s)
{
if (PL_lex_brackets)
return TRUE;
if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
return TRUE;
if (*s != '{' && *s != '[')
return FALSE;
if (!PL_lex_inpat)
return TRUE;
/* In a pattern, so maybe we have {n,m}. */
if (*s == '{') {
s++;
if (!isDIGIT(*s))
return TRUE;
while (isDIGIT(*s))
s++;
if (*s == ',')
s++;
while (isDIGIT(*s))
s++;
if (*s == '}')
return FALSE;
return TRUE;
}
/* On the other hand, maybe we have a character class */
s++;
if (*s == ']' || *s == '^')
return FALSE;
else {
/* this is terrifying, and it works */
int weight = 2; /* let's weigh the evidence */
char seen[256];
unsigned char un_char = 255, last_un_char;
char *send = strchr(s,']');
char tmpbuf[sizeof PL_tokenbuf * 4];
if (!send) /* has to be an expression */
return TRUE;
Zero(seen,256,char);
if (*s == '$')
weight -= 3;
else if (isDIGIT(*s)) {
if (s[1] != ']') {
if (isDIGIT(s[1]) && s[2] == ']')
weight -= 10;
}
else
weight -= 100;
}
for (; s < send; s++) {
last_un_char = un_char;
un_char = (unsigned char)*s;
switch (*s) {
case '@':
case '&':
case '$':
weight -= seen[un_char] * 10;
if (isALNUM_lazy_if(s+1,UTF)) {
scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
weight -= 100;
else
weight -= 10;
}
else if (*s == '$' && s[1] &&
strchr("[#!%*<>()-=",s[1])) {
if (/*{*/ strchr("])} =",s[2]))
weight -= 10;
else
weight -= 1;
}
break;
case '\\':
un_char = 254;
if (s[1]) {
if (strchr("wds]",s[1]))
weight += 100;
else if (seen['\''] || seen['"'])
weight += 1;
else if (strchr("rnftbxcav",s[1]))
weight += 40;
else if (isDIGIT(s[1])) {
weight += 40;
while (s[1] && isDIGIT(s[1]))
s++;
}
}
else
weight += 100;
break;
case '-':
if (s[1] == '\\')
weight += 50;
if (strchr("aA01! ",last_un_char))
weight += 30;
if (strchr("zZ79~",s[1]))
weight += 30;
if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
weight -= 5; /* cope with negative subscript */
break;
default:
if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
isALPHA(*s) && s[1] && isALPHA(s[1])) {
char *d = tmpbuf;
while (isALPHA(*s))
*d++ = *s++;
*d = '\0';
if (keyword(tmpbuf, d - tmpbuf))
weight -= 150;
}
if (un_char == last_un_char + 1)
weight += 5;
weight -= seen[un_char];
break;
}
seen[un_char]++;
}
if (weight >= 0) /* probably a character class */
return FALSE;
}
return TRUE;
}
--
__("< Marcin Kowalczyk * qrczak at knm.org.pl http://qrczak.ids.net.pl/
\__/
^^ SYGNATURA ZASTĘPCZA
QRCZAK
More information about the Python-list
mailing list