#!/usr/bin/perl -Tw ############################################################################## # # SpamCop AutoReporter # Feed this script the emails from the SpamCop AutoResponder # # Written by Robert Jan Scheer in july 2001 # ############################################################################## # Create a safe environment BEGIN { for my $var (keys %ENV) { next if $var eq "LOGNAME" and $ENV{$var} =~ m!^\w+$!i; next if $var eq "HOME" and $ENV{$var} =~ m!^[/\w]+$!i; delete $ENV{$var}; } $ENV{SHELL} = '/bin/sh'; } # Load modules use strict; use Net::Domain qw(hostdomain); use URI::URL; use URI::Escape; use LWP::UserAgent; use HTML::Parser; use HTML::Entities; use Text::Wrap; ############################################################################## my($version) = q$Revision: 1.6 $ =~ /(\d+\.\d+)/; # RCS revision control my $useragent = "SpamCopReporter/$version"; my $sendmail = "/usr/sbin/sendmail -t"; my $maintainer = ''; # Against harvesters my $baseurl = "http://spamcop.net/"; # Should end on / to be safe my $delimiter = "-" x 75 . "\n"; ############################################################################## # The resulting information contains anything between # and tags, if not ignored by a pattern, and text that matches # the wanted patterns. # "needtext" signals that the next line is also wanted. # "done" signals that reporting was completed. my %ignorepatterns = ( "^Cookie option" => [], # Only useful with a browser "^No mail exchanger" => [], # No useful message "^Strict HTML" => [], # No useful message "^Nothing to do" => ["done"], # Obvious, so not useful ); my %wantedpatterns = ( "^Yum" => [], # Freshness (thanx JP :-) "^Right now" => [], # Info about SpamCop filters '^Report already sent' => # Old report with addresses ["done","neednext"], # on the next line '^Spam report id' => ["done"], # Successfully sent reports ); ############################################################################## # Collect user info my $user = $ENV{'LOGNAME'} || (getpwuid($<))[0]; my $home = $ENV{'HOME'} || (getpwuid($<))[7]; defined $user and defined $home or die "Who are you\n"; # Configurable user options my %opt = ( filedebug => 0, # Use local HTML files instead of remote HTML pages htmldebug => 0, # Show HTML parsing debug httpdebug => 0, # Show HTTP requests pagedebug => 0, # Print HTML page that is being parsed verbose => 0, # Give more info about what is happening checkall => 0, # Check all checkboxes in the HTML form background => 1,# Run program in background and mail results ); ############################################################################## # Read user configuration my $configfile = "$home/.spamcoprc"; if (open(CONFIG,"<$configfile")) { while () { s/#.*//; # Delete comments if (/^\s*(\w+)\s*=\s*([^\s,;]+)/i) { # Read until sep char $opt{$1} = $2; # Hash should be safe } } } # No filedebug unless you have a tty if ($opt{filedebug} and not -t) { print STDERR "Sorry, filedebug is only allowed with a terminal\n"; $opt{filedebug} = 0; exit; } # When debugging, disable background and enable verbose if ($opt{filedebug} or $opt{htmldebug} or $opt{httpdebug}) { print STDERR "Sorry, no background possible. " if $opt{background}; print STDERR "Running in debug mode.\n"; $opt{background} = 0; $opt{verbose} = 1 unless $opt{filedebug}; } # Determine email address $opt{email} = $opt{report_to} if defined $opt{report_to}; if ($opt{background} and not defined $opt{email}) { $opt{email} = $user . '@' . hostdomain() || die "Cannot determine your email address\n"; } # Reading input looking for SpamCop reporting URLs # If it is an email, save the message-id and recipients address my @urls; my $messageid; while () { chomp; push @urls, $1 if /^($baseurl\S+)/o; $messageid = $1 if not defined $messageid and /^Message-ID:\s+(.*)$/i; } die "No SpamCop URLs found!\n" unless @urls; # Fork and start sendmail when neccesary if ($opt{background}) { fork && exit; open(SENDMAIL, "|$sendmail") or die "$sendmail: $!\n"; select(SENDMAIL); print "From: SpamCop AutoReporter <$opt{email}>\n"; print "To: $opt{email}\n"; print "Subject: SpamCop Reporting Results\n"; print "References: $messageid\n" if defined $messageid; print "\n"; } # Create HTML Parser my $p = HTML::Parser->new( api_version => 3, start_h => [\&starttag, "self,tagname,attr,text"], end_h => [\&endtag, "tagname,text"], text_h => [\&text, "text,offset"], ); # Create user agent my $ua= new LWP::UserAgent; $ua->agent($useragent); # Legal HTTP methods my %call = ( GET => \&GET, POST => \&POST ); # Variables that are populated while parsing a HTML page my $refreshtime; # Meta refresh time, if present my $refreshurl; # Meta refresh url, if present my %input; # Form data fields my @output; # Collected output lines my $action; # Action field of encountered form my $method; # Method field of encountered form my %start; # Used by HTML start/end handler my $neednext; # Used by HTML text handler my $done; # Used when printing the result my $parsemode; # Set later to increase parsing speed print $delimiter; # Main loop for my $url (@urls) { # Handle each SpamCop URL undef $refreshtime; undef $refreshurl; undef %input; undef @output; undef $action; undef $method; undef %start; $neednext = 0; $done = 0; my $page; my $uri1 = URI::URL->new_abs($url, $baseurl); print "Getting ",$uri1->as_string,"\n" if $opt{verbose}; if ($page = GET($uri1)) { # Get first page (form) print "Parsing resulting page\n" if $opt{verbose}; print "\n$page\n" if $opt{pagedebug}; $parsemode = "page1"; $p->parse($page); # Parse first page (form) if (defined $refreshtime) { # Wait and refresh page print "Waiting $refreshtime second(s)" . " before refreshing\n" if $opt{verbose}; sleep $refreshtime; $url = $refreshurl if defined $refreshurl; redo; } } unless (defined $method) { # No form in first page unless (@output) { push @output, # Give debug info "No form found!", "Either there is some explanation in this webpage", "in a layout the maintainer of this script did not", "anticipate, or the layout of SpamCop has changed.", "Please forward this email to $maintainer.", "", "Contents of the page:", "", $page; } printresult($url, @output); # Print reason next; } unless (exists $call{$method}) { # Invalid form in first page push @output, # Give debug info "Unknown HTTP method $method found in form!", "Please forward this email to $maintainer.", "", "Contents of the page:", "", $page; printresult($url, @output); # Print reason next; } push @output, "" if @output; # Second page coming up my $uri2 = URI::URL->new_abs($action, $baseurl); print "Submitting form data to ",$uri2->as_string,"\n" if $opt{verbose}; # Get second page (result) if ($page = $call{$method}->($uri2, createquery(\%input))) { print "Parsing resulting page\n" if $opt{verbose}; print "\n$page\n" if $opt{pagedebug}; $parsemode = "page2"; # Parse second page (result) $p->parse($page); } printresult($url, @output); # Print result } if ($opt{background}) { # Send mail close(SENDMAIL) or die "$sendmail: $!\n"; } sub createquery { # Create HTTP query my($queryref) = @_; # from form data my @pairs; for my $key (keys %$queryref) { # Encode everything my $name = uri_escape($key); my $value = uri_escape($queryref->{$key}); $name =~ s/ /+/g; $value =~ s/ /+/g; push @pairs, "$name=$value"; } return join("&", @pairs); # Return HTTP query } sub GET { # Create HTTP GET request my($uri, $query) = @_; if (defined $query) { $query = $uri->query . "&$query" if $uri->query; $uri->query($query); } my $req = HTTP::Request->new("GET", $uri); return GETorPOST($req); } sub POST { # Create HTTP POST request my($uri, $query)= @_; my $req = HTTP::Request->new("POST", $uri); $req->content_type('application/x-www-form-urlencoded'); $req->content($query); return GETorPOST($req); } sub GETorPOST { my($req) = @_; if ($req->uri->scheme ne "http") { # Check HTTP request push @output, "Unsupported scheme in " . $req->uri->as_string; return undef; } warn $req->as_string if $opt{httpdebug}; if ($opt{filedebug}) { # When debugging.. my $file = shift @ARGV or die "Missing file\n"; print "Using $file instead\n" if $opt{verbose}; return getfile($file); # ..return a file } my $res = $ua->request($req); # Send HTTP request if ($res->is_success) { return $res->content; # Return page.. } else { push @output, $res->as_string; # ..or save error } return undef; } sub getfile { # Return contents of a file my $file = shift; # Only useful when debugging undef $/; open(FILE, $file) or die "$file: $!\n"; my $in = ; close(FILE); return $in; } sub printresult { my($url,@lines) = @_; print "Done " if $done; # Was reporting successful? print "$url\n"; # Print parsed URL print "\n"; if (@lines) { foreach (@lines) { print wrap("","",$_), "\n"; } } else { print "No results!\n"; # Should not be reached } print $delimiter; } sub match { # Match text from HTML page my($text, $patternref) = @_; if ($neednext) { $neednext--; return 1; # Return if text is wanted } for my $pattern (keys %$patternref) { # Walk text patterns if ($text =~ /$pattern/) { warn "Matching pattern is /$pattern/\n" if $opt{htmldebug}; my $varref = $patternref->{$pattern}; for my $var (@$varref) { $done++ if $var eq "done"; $neednext++ if $var eq "neednext"; } return 1; # Return if text is wanted } } return undef; } sub starttag { my($self, $tagname, $attr, $src) = @_; return if $parsemode eq "page2"; # Not useful in this mode $src =~ s/[\r\n]//g; # Delete CR/NL characters if ($tagname eq "meta") { # Find META tags my $httpequiv = exists $attr->{'http-equiv'} ? $attr->{'http-equiv'} : ""; if ($httpequiv =~ /^\s*refresh\s*$/i) { # Refresh warn "$src\n" if $opt{htmldebug}; my $content = exists $attr->{'content'} ? $attr->{'content'} : ""; if ($content =~ /^\s*(\d+)(.*)/) { $refreshtime = $1; # Time my $rest = $2; if ($rest =~ /^\s*;\s*url\s*\=s*(.*)/) { $refreshurl = $1; # URL } } } } if ($tagname eq "form") { # Find FROM tags warn "$src\n" if $opt{htmldebug}; $action = exists $attr->{'action'} ? $attr->{'action'} : "/sc"; $method = exists $attr->{'method'} ? $attr->{'method'} : "POST"; $method = uc $method; } if ($tagname eq "input") { # Find INPUT tags warn "$src\n" if $opt{htmldebug}; my $type = exists $attr->{'type'} ? $attr->{'type'} : ""; my $name = exists $attr->{'name'} ? $attr->{'name'} : ""; my $value = exists $attr->{'value'} ? $attr->{'value'} : ""; if ($name) { if ($type eq "hidden") { # hidden $input{$name} = $value; } elsif ($type eq "checkbox") { # checkbox if ($opt{checkall} or exists $attr->{'checked'}) { $input{$name} = "on"; } } } } if ($tagname eq "textarea") { # Find TEXTAREA tags my $name = exists $attr->{'name'} ? $attr->{'name'} : ""; if ($name) { # Save name of input field warn "$src\n" if $opt{htmldebug}; $start{'textarea'}{'name'} = $name; @{$start{$tagname}{'value'}} = (); } } if ($tagname eq "font") { # Find FONT tags my $color = exists $attr->{'color'} ? $attr->{'color'} : ""; if ($color eq "red") { # Useful messages are red warn "$src\n" if $opt{htmldebug}; $start{'font'}{'name'} = $color; undef @{$start{$tagname}{'value'}}; } } } sub endtag { my($tagname, $src) = @_; return if $parsemode eq "page2"; # Not useful in this mode $src =~ s/[\r\n]//g; # Delete CR/NL characters if (exists $start{$tagname}) { # Recall name of input field my $name = $start{$tagname}{'name'}; my $value = join(" ", @{$start{$tagname}{'value'}}); if ($value) { warn uc($tagname) . ": $value\n" if $opt{htmldebug}; if ($tagname eq "textarea") { $input{$name} = $value; } if ($tagname eq "font") { push @output, # Save text in red font $value; } } warn "$src\n" if $opt{htmldebug}; delete $start{$tagname}; # Forget name of input field } if ($tagname eq "form") { warn "$src\n" if $opt{htmldebug}; } } sub text { my($dtext,$offset) = @_; return if $dtext =~ /^\s*$/s; # No useful text return if $dtext =~ /^\s*