[Mailman-Users] attached script to pull out 'confirmed opt-in' and 'unconfirmed' mailman list members

Dan MacNeil dan at thecsl.org
Sun Jan 13 00:35:10 CET 2008


An organizations we (http://thecsl.org) serve is moving their 
newsletter to something like constant contact. This service has 
nice marketing features and the resources to wrestle with aol, 
hotmail, etc to prove they aren't spam.

This other serivce, requires their customers to have the list 
members re-confirm their membership if the list owner can't prove 
that the list member originally confirmed her membership

Below is a script that produces two files, one with "confirmed" 
list members and one "unconfirmed" members.

It may be useful to other people.

There may well be smarter people on this list, who can point out 
problems with the script or observe that their is an easier way 
to accomplish my goal.


#### script below this line (this line not included ######
#!/usr/bin/perl -wT

use strict;

my $LIST_NAME = 'utec-news';

# This script produces two files:

#   /tmp/$LIST_NAME_confirmed_opt_in.txt
#      the people who replied to email
#      subscribe invite or or clicked
#      confirm link in subscribe
#      invite email

#  /tmp/$LIST_NAME_unconfirmed_opt_in.txt
#      The people who were subscribed
#      rather than invited
#      or whose confirmation
#      is in logs that were
#      rotated or expunged

# requirements:
#     perl
#     sudo rights to run list_members
#     zgrep, cut, sudo commands
#     installed debian etch mailman package
#     The name of a mailman list (above)
#     Access to mailman logs

# modifying below should adapt script for
# non debian etch systems

my $RAW_CONFIRMED_FILE   = "/tmp/${LIST_NAME}_raw_confirmed.txt";
my $RAW_UNCONFIRMED_FILE = "/tmp/${LIST_NAME}_raw_unconformed.txt";

my $CONFIRMED_FILE   = "/tmp/${LIST_NAME}_confirmed.txt";
my $UNCONFIRMED_FILE = "/tmp/${LIST_NAME}_unconfirmed.txt";

my $SUBSCRIBE_LOGS = '/var/log/mailman/subscribe*';

my $CUT_CMD   = '/usr/bin/cut';
my $ZGREP_CMD = '/bin/zgrep';

my $SUDO_CMD         = '/usr/bin/sudo';
my $LIST_MEMBERS_CMD = '/usr/sbin/list_members';

$ENV{PATH} = '/bin/';

{    # main
     print "\tcreating $RAW_CONFIRMED_FILE\n";
     my $cmd = " $ZGREP_CMD -i ${LIST_NAME}:.*new.*.confirmation";
     $cmd .= " $SUBSCRIBE_LOGS ";
     $cmd .= " | $CUT_CMD  -d ':' -f5 | $CUT_CMD -d  ',' -f1 ";
     $cmd .= " | $CUT_CMD -d ' ' -f3-10 > $RAW_CONFIRMED_FILE";
     ( system($cmd) == 0 )
       or die "failed to create create: $RAW_CONFIRMED_FILE";

     print "\tcleaning $RAW_CONFIRMED_FILE\n";
     my @raw_opt_in_emails = 
pull_out_clean_emails($RAW_CONFIRMED_FILE);

     print
       "\tputting subscriber list for $LIST_NAME in 
$RAW_UNCONFIRMED_FILE\n";
     $cmd = "$SUDO_CMD $LIST_MEMBERS_CMD $LIST_NAME > 
$RAW_UNCONFIRMED_FILE";
     ( system($cmd) == 0 )
       or die "failed to create create: $RAW_CONFIRMED_FILE";

     print "\tcleaning $RAW_UNCONFIRMED_FILE\n";
     my @raw_subscriber_emails = 
pull_out_clean_emails($RAW_UNCONFIRMED_FILE);

     print "\tremoving confirmed opt-in emails from unconfirmed 
list\n";
     my @net_subscriber_emails =
       set_difference( \@raw_subscriber_emails, 
\@raw_opt_in_emails );

     print "\tremoving emails not now subscribed from  opt-in 
emails list\n";
     my @net_opt_in_emails =
       set_intersection( \@raw_subscriber_emails,
       \@raw_opt_in_emails );

     print "\tsaving results: $CONFIRMED_FILE and 
$UNCONFIRMED_FILE \n";
     output_list( $CONFIRMED_FILE,   \@net_opt_in_emails );
     output_list( $UNCONFIRMED_FILE, \@net_subscriber_emails );
}

sub pull_out_clean_emails {
     my $file = shift;
     my @emails;

     my $IN_FH;
     open $IN_FH, $file
       or die "$file: $!";

     while ( my $line = <$IN_FH> ) {
         $line =~ s/[\"<>]//g;
         chomp $line;

         $line =~ /([\w\._\-]+\@[\w\.\-_]+)/;
         $line = lc($1);
         next if $line !~ /\w+/;

         push @emails, $line;
     }
     return @emails;
}

# everything in set a that isn't in set b
sub set_difference {
     my ( $set_a_aref, $set_b_aref ) = @_;
     my @set_result;
     my $skip = 0;

   SET_A: foreach my $a (@$set_a_aref) {
         next if $a =~ /^\s$/;
       SET_B: foreach my $b (@$set_b_aref) {
             if ( $a eq $b ) {
                 $skip = 1;
                 last SET_B;
             }
         }
         push @set_result, $a if not $skip;
         $skip = 0;
     }
     return @set_result;
}

# everything that is both Set a and Set b
sub set_intersection {
     my ( $set_a_aref, $set_b_aref ) = @_;
     my @set_result;
     my $match = 0;

   SET_A: foreach my $a (@$set_a_aref) {
         next if $a =~ /^\s+$/;
       SET_B: foreach my $b (@$set_b_aref) {
             if ( $a eq $b ) {
                 $match = 1;
                 last SET_B;
             }
         }
         push @set_result, $a if $match;
         $match = 0;
     }
     return @set_result;
}

sub output_list {
     my ( $outfile, $list_aref ) = @_;
     my $OUT_FH;
     open $OUT_FH, '>', $outfile
       or die "bad open for write:$outfile reason: $!";

     foreach my $line (@$list_aref) {
         print $OUT_FH "$line\n"
           or die "bad write to $outfile: $!";
     }
}

#### script above this line (this line not included ######


More information about the Mailman-Users mailing list