[Mailman-Developers] patch for using Bouncers/Catchall.py with Python 2.2.1

Wed, 8 May 2002 13:19:13 -0500

---------------------- multipart/mixed attachment
I just upgraded from Mailman v. 1.1 to v. 2.0.10 combined with a
Python upgrade from v. 1.5.2 to v. 2.2.1.  Mostly things went OK but I
hit a snag when I found that the Mailman/Bouncers/Catchall.py file
refers to the old regex and regsub modules and my Python v. 2.2.1
build doesn't define those.  So, just in case anyone is interested, my
patch below converts Mailman/Bouncers/Catchall.py to use the re module
in place of regex and regsub.  The patch has been lightly tested and
seems to work.

Regards,

Roy Bixler
The University of Chicago Press
rcb@ucp.uchicago.edu

---------------------- multipart/mixed attachment

--- Catchall.py.org	Tue Apr 17 23:41:47 2001
+++ Catchall.py	Wed May  8 12:07:21 2002
@@ -23,16 +23,9 @@
 # annoying so if we can import the warnings module, we turn off warnings about
 # the import of regsub.  It's not worth changing the uses of regsub to use the
 # re module because Catchall.py is going away in Mailman 2.1.
-try:
-    import warnings
-    warnings.filterwarnings('ignore', module='regsub')
-except ImportError:
-    pass
 
 import re
 import string
-import regsub
-import regex
 from types import StringType
 
 from Mailman import mm_cfg
@@ -67,11 +60,10 @@
     mime_info = msg.getheader('content-type')
     boundry = None
     if mime_info:
-        mime_info_parts = regsub.splitx(
-            mime_info, '[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"')
+        mime_info_parts = re.split(
+            '([Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+")', mime_info)
         if len(mime_info_parts) > 1:
-            boundry = regsub.splitx(mime_info_parts[1],
-                                    '"[^"]+"')[1][1:-1]
+            boundry = re.split('("[^"]+")', mime_info_parts[1])[1][1:-1]
 
     # snag out the message body
     msg.rewindbody()
@@ -85,14 +77,13 @@
             relevant_text = relevant_text[0]
     else:
         # This looks strange, but at least 2 are going to be no-ops.
-        relevant_text = regsub.split(msgbody,
-                                     '^.*Message header follows.*$')[0]
-        relevant_text = regsub.split(relevant_text,
-                                     '^The text you sent follows:.*$')[0]
-        relevant_text = regsub.split(
-            relevant_text, '^Additional Message Information:.*$')[0]
-        relevant_text = regsub.split(relevant_text,
-                                     '^-+Your original message-+.*$')[0]
+        relevant_text = re.split('^.*Message header follows.*$', msgbody)[0]
+        relevant_text = re.split('^The text you sent follows:.*$',
+                                 relevant_text)[0]
+        relevant_text = re.split('^Additional Message Information:.*$',
+            relevant_text)[0]
+        relevant_text = re.split('^-+Your original message-+.*$',
+                                 relevant_text)[0]
 
     BOUNCE = 1
     REMOVE = 2
@@ -100,30 +91,30 @@
     # Bounce patterns where it's simple to figure out the email addr.
     email_regexp = '<?\([^ \t@|<>]+@[^ \t@<>]+\.[^ \t<>.]+\)>?'
     simple_bounce_pats = (
-        (regex.compile('.*451 %s.*' % email_regexp), BOUNCE),
-        (regex.compile('.*554 %s.*' % email_regexp), BOUNCE),
-        (regex.compile('.*552 %s.*' % email_regexp), BOUNCE),
-        (regex.compile('.*501 %s.*' % email_regexp), BOUNCE),
-        (regex.compile('.*553 %s.*' % email_regexp), BOUNCE),
-        (regex.compile('.*550 %s.*' % email_regexp), BOUNCE),
-        (regex.compile('%s .bounced.*' % email_regexp), BOUNCE),
-        (regex.compile('.*%s\.\.\. Deferred.*' % email_regexp), BOUNCE),
-        (regex.compile('.*User %s not known.*' % email_regexp), REMOVE),
-        (regex.compile('.*%s: User unknown.*' % email_regexp), REMOVE),
-        (regex.compile('.*%s\.\.\. User unknown' % email_regexp), REMOVE))
+        (re.compile('.*451 %s.*' % email_regexp), BOUNCE),
+        (re.compile('.*554 %s.*' % email_regexp), BOUNCE),
+        (re.compile('.*552 %s.*' % email_regexp), BOUNCE),
+        (re.compile('.*501 %s.*' % email_regexp), BOUNCE),
+        (re.compile('.*553 %s.*' % email_regexp), BOUNCE),
+        (re.compile('.*550 %s.*' % email_regexp), BOUNCE),
+        (re.compile('%s .bounced.*' % email_regexp), BOUNCE),
+        (re.compile('.*%s\.\.\. Deferred.*' % email_regexp), BOUNCE),
+        (re.compile('.*User %s not known.*' % email_regexp), REMOVE),
+        (re.compile('.*%s: User unknown.*' % email_regexp), REMOVE),
+        (re.compile('.*%s\.\.\. User unknown' % email_regexp), REMOVE))
     # patterns we can't directly extract the email (special case these)
-    messy_pattern_1 = regex.compile('^Recipient .*$')
-    messy_pattern_2 = regex.compile('^Addressee: .*$')
-    messy_pattern_3 = regex.compile('^User .* not listed.*$')
-    messy_pattern_4 = regex.compile('^550 [^ ]+\.\.\. User unknown.*$')
-    messy_pattern_5 = regex.compile('^User [^ ]+ is not defined.*$')
-    messy_pattern_6 = regex.compile('^[ \t]*[^ ]+: User unknown.*$')
-    messy_pattern_7 = regex.compile('^[^ ]+ - User currently disabled.*$')
+    messy_pattern_1 = re.compile('^Recipient .*$')
+    messy_pattern_2 = re.compile('^Addressee: .*$')
+    messy_pattern_3 = re.compile('^User .* not listed.*$')
+    messy_pattern_4 = re.compile('^550 [^ ]+\.\.\. User unknown.*$')
+    messy_pattern_5 = re.compile('^User [^ ]+ is not defined.*$')
+    messy_pattern_6 = re.compile('^[ \t]*[^ ]+: User unknown.*$')
+    messy_pattern_7 = re.compile('^[^ ]+ - User currently disabled.*$')
 
     # Patterns for cases where email addr is separate from error cue.
     separate_cue_1 = re.compile(
         '^554 .+\.\.\. unknown mailer error.*$', re.I)
-    separate_addr_1 = regex.compile('expanded from: %s' % email_regexp)
+    separate_addr_1 = re.compile('expanded from: %s' % email_regexp)
 
     message_grokked = 0
     use_prospects = 0
@@ -131,34 +122,34 @@
 
     for line in string.split(relevant_text, '\n'):
         for pattern, action in simple_bounce_pats:
-            if pattern.match(line) <> -1:
+            if pattern.match(line) is not None:
                 email = extract(line)
                 candidates.append((string.split(email,',')[0], action))
                 message_grokked = 1
 
         # Now for the special case messages that are harder to parse...
-        if (messy_pattern_1.match(line) <> -1
-            or messy_pattern_2.match(line) <> -1):
+        if (messy_pattern_1.match(line) is not None
+            or messy_pattern_2.match(line) is not None):
             username = string.split(line)[1]
             candidates.append(('%s@%s' % (username, remote_host),
                                BOUNCE))
             message_grokked = 1
             continue
-        if (messy_pattern_3.match(line) <> -1
-            or messy_pattern_4.match(line) <> -1
-            or messy_pattern_5.match(line) <> -1):
+        if (messy_pattern_3.match(line) is not None
+            or messy_pattern_4.match(line) is not None
+            or messy_pattern_5.match(line) is not None):
             username = string.split(line)[1]
             candidates.append(('%s@%s' % (username, remote_host),
                                REMOVE))
             message_grokked = 1
             continue
-        if messy_pattern_6.match(line) <> -1:
+        if messy_pattern_6.match(line) is not None:
             username = string.split(string.strip(line))[0][:-1]
             candidates.append(('%s@%s' % (username, remote_host),
                                REMOVE))
             message_grokked = 1
             continue
-        if messy_pattern_7.match(line) <> -1:
+        if messy_pattern_7.match(line) is not None:
             username = string.split(string.strip(line))[0]
             candidates.append(('%s@%s' % (username, remote_host),
                                REMOVE))
@@ -169,11 +160,12 @@
             # Here's an error message that doesn't contain the addr.
             # Set a flag to use prospects found on separate lines.
             use_prospects = 1
-        if separate_addr_1.search(line) != -1:
+        separate_addr_1_match = separate_addr_1.search(line)
+        if separate_addr_1_match is not None:
             # Found an addr that *might* be part of an error message.
             # Register it on prospects, where it will only be used if a 
             # separate check identifies this message as an error message.
-            prospects.append((separate_addr_1.group(1), BOUNCE))
+            prospects.append((separate_addr_1_match.group(1), BOUNCE))
 
     if use_prospects and prospects:
         candidates = candidates + prospects
@@ -186,7 +178,7 @@
             who = who[:el]
         if len(who) > 1 and who[0] == '<':
             # Use stuff after open angle and before (optional) close:
-            who = regsub.splitx(who[1:], ">")[0]
+            who = re.split("(>)", who[1:])[0]
         if who not in did:
             did.append(who)
 ##    return message_grokked
@@ -195,8 +187,8 @@
 
 
 def extract(line):
-    email = regsub.splitx(line, '[^ \t@<>]+@[^ \t@<>]+\.[^ \t<>.]+')[1]
+    email = re.split('([^ \t@<>]+@[^ \t@<>]+\.[^ \t<>.]+)', line)[1]
     if email[0] == '<':
-        return regsub.splitx(email[1:], ">")[0]
+        return re.split("(>)", email[1:])[0]
     else:
         return email

---------------------- multipart/mixed attachment--