package Mail::Filterable;

##############################################################################
#
# Routines for parsing the e-mail being filtered and calculating various
# bits of data which will be used in the filtering process.
#

use strict;
use warnings;

use Carp;
use Data::Dumper;
use Digest::MD5 qw(md5_base64);
use Mail::Address;
use Mail::Field;
use Time::Local;

use Parp::Config qw(config);
use Parp::Folders qw(append_to_folder folder_substs);
use Parp::Friends qw(make_friend is_friend);
use Parp::IdCache;
use Parp::Options qw(opt delivery_mode);
use Parp::Utils qw(vprint diagnose check_file_dir month2i error fatal);
use Parp::Blacklist qw(blacklist_lookup);

use overload '""' => \&to_string;

sub new {
  my ($this, $mail, $props_hashref) = @_;
  my $class = ref($this) || $this;
  my $self = bless {}, $class;
  my $m = $self; # syntactic sugar

  my $header = $mail->head;
  $m->{content_type} = $header->get('Content-Type') || '';
  $m->{mail}   = $mail;
  $m->{header} = $header;
  $m->{body}   = $self->_get_body($mail->body);
  $m->{body_scalar} = join '', map { s/^>From/From/; $_ } @{$m->{body}};

  $self->_calc_common_headers;
  $self->_check_resent_headers;
  $self->_check_envelope;
  $self->_extract_addrs;
  
  $m->{recvds_array}    = [ $header->get('Received') ];
  $m->{recvds}          = join '', @{$m->{recvds_array}};

  $self->_check_auth_sender;

  # Remove all previously existing parp headers except X-Parp-Id
  my @parp_headers = grep /^X-Parp-(?!Id)/, $m->header->tags;
  foreach my $parp_header (sort @parp_headers) {
    $m->{old_parp_headers}{$parp_header} = $m->header->get($parp_header)
      if opt('regression_test');
    $m->header->delete($parp_header);
  }

  $self->_do_parp_headers;
  
  $m->{backup}   = 1;     # back up by default
  $m->{complain} = 1;     # allow complaining by default

  return $self;
}

my %headers = (
  parp_id      => 'X-Parp-Id',        from         => 'From',
  to           => 'To',               cc           => 'Cc',
  subject      => 'Subject',          return_path  => 'Return-Path',
  reply_to     => 'Reply-To',         list         => 'X-Mailing-List',
  in_reply_to  => 'In-Reply-To',      refs         => 'References',
  id           => 'Message-ID',       date         => 'Date',
  status       => 'Status',           a_to         => 'Apparently-To',
  precedence   => 'Precedence',       mailer       => 'X-Mailer',
);
my @methods = qw(mail_from env_from sender organisation);

# autogenerate read-only accessors
foreach my $method (keys(%headers), @methods) {
  no strict 'refs';
  *$method = sub { $_[0]->{cached_headers}{$method} };
}
foreach my $method (keys %headers) {
  no strict 'refs';
  my $full = 'full_' . $method;
  *$full = sub { $headers{$method} . ': ' . $_[0]->$method . "\n" };
}
sub full_mail_from { 'From ' . shift->mail_from . "\n" }

foreach my $method (qw/mail header/) {
  no strict 'refs';
  *$method = sub { $_[0]->{$method} };
}

sub _calc_common_headers {
  my $m = shift;

  my $header = $m->header;

  # It's easier if all the headers we deal with are single lines.
  # In particular, don't fold `From ' header, as we want that to match
  # a date regexp below.
  $header->unfold;

  # envelope from
  $header->mail_from('KEEP');
  if ($m->can('fromLine')) {
    # got a Mail::Box::Mbox::Message
    (my $mail_from = $m->mail->fromLine) =~ s/^From //;
    $m->_cache_header_val(mail_from => $mail_from);
  }
  else {
    $m->_cache_header(mail_from => 'From ', 'Mail-From');
  }

  while (my ($method, $hdr) = each %headers) {
    next if $method eq 'mailer';
    $m->_cache_header($method => $hdr);
  }

  $m->_cache_header(sender => qw/Sender X-Sender/);
  $m->_cache_header(mailer => qw/User-Agent X-Mailer/);
  $m->_cache_header(organisation => qw/Organisation Organization/);
}

sub _cache_header_val {
  my ($m, $header, $val) = @_;
  carp "$header already cached as `$m->{cached_headers}{$header}'\n"
    if $m->{cached_headers}{$header};
  chomp($m->{cached_headers}{$header} = $val);
}

sub _cache_header {
  my ($m, $header, @headers) = @_;
  carp "$header already cached as `$m->{cached_headers}{$header}'\n"
    if $m->{cached_headers}{$header};
  foreach my $h (@headers) {
    $m->{cached_headers}{$header} ||= $m->header->get($h);
    last if $m->{cached_headers}{$header};
  }
  $m->{cached_headers}{$header} ||= '';
  chomp $m->{cached_headers}{$header};
}

sub _do_parp_headers {
  my $m = shift;
  
  # Add a header for the process id to try to chase down obscure bugs.
  $m->header->add('X-Parp-pid', $$);
#  system("/bin/date >> ~/mail/.parp.pstree");
#  system("pstree >> ~/mail/.parp.pstree");

  $m->_calc_parp_id;
}

sub _calc_parp_id {
  my $m = shift;

  # Calculate a unique id which parp can always refer to.  We
  # calculate the MD5 digest of enough of the whole mail to ensure a
  # unique id, but without any bits which might change in some way
  # during the e-mail's life-span.  That guarantees that during any
  # subsequent reclassification of the e-mail as a false
  # positive/negative (in the spam detection sense) this unique id
  # will match with the original, so that the statistics calculation
  # program will work.
  
  my @immutable_headers = qw/mail_from from to cc subject
                             return_path reply_to in_reply_to
                             refs id date mailer/;
  my $immutable_header
    = join '', map { my $fhdr = "full_$_"; $m->$fhdr } @immutable_headers;
  $immutable_header .= $m->{recvds};

  my $immutable = $immutable_header .
                  "\n" .
                  $m->{body_scalar};
  $immutable =~ s/\n+$/\n/;

  my $parp_id = $m->{env_from_time} . "/" . md5_base64($immutable);

  # It was a PAIN to get construction of the immutable right.
  # Uncommenting this enables debugging of it if anything goes wrong.
#  $self->_write_immutable($immutable, $parp_id);

  if ($m->{parp_id}) {
    # This e-mail has already been run through parp, so it already has
    # an X-Parp-Id header.

    if ($m->{parp_id} ne $parp_id) {
      # Better check that the id we've just calculated is the same,
      # otherwise our calculation algorithm is in trouble.
      error("Message already had a parp id of: $m->{parp_id}\n" .
            "       but recalculation yielded: $parp_id",
            #"\%m:\n", Dumper(\%m),
           );
    }
  }
  else {
    # This e-mail hasn't been touched by parp before, so stamp it with
    # a parp id.
    $m->header->add('X-Parp-Id', $parp_id);
    $m->{parp_id} = $parp_id;
  }
}

sub to_string {
  my $self = shift;
  return sprintf "parp e-mail (id %s)", $self->{parp_id};
}

sub _write_immutable {
  my $self = shift;
  my ($immutable, $parp_id) = @_;

  my $immutables_dir = config->mail_dir . '.immutables';
  return unless -d $immutables_dir;
  
  my $id_file = $parp_id;
  $id_file =~ s!/!_!g;
  $id_file = "$immutables_dir/$id_file";
    
  while (-e $id_file) {
    # Generate a unique suffix
    $id_file =~ s/(?:\.(\d+))?$/"." . (($1 || 0) + 1)/e;
  }
    
  if (open(FOO, ">$id_file")) {
    print FOO $immutable;
    close(FOO);
  } else {
    error("Couldn't open $id_file for writing: $!");
  }
}  

sub _get_body {
  my ($m, $body) = @_;

  return $body unless $m->{content_type} =~ m!^multipart/.*boundary=(.*)\n!s;

  # Deal with MIME multipart messages without using a very slow
  # parser from CPAN ...
  my $boundary = $1;
  $boundary =~ s/^"(.*)"$/$1/;
  $boundary = quotemeta $boundary;
  diagnose qq{Message is multipart; splitting on boundary "$1".\n};
  
  my @parts = split /--$boundary(?:--)?\n?/m, join('', @$body);

  diagnose "Deleting non-text parts ... \n";
  my @body_lines = ();
  foreach my $part (@parts) {
    my @lines = split /(?<=\n)/, $part;
    my $part_mail = new Mail::Internet(\@lines);
    next unless @lines;
    my $content_type = $part_mail->get('Content-Type');
    if ($content_type) {
      chomp $content_type;
      diagnose "Content-Type: $content_type";
      push @{ $m->{content_types} }, $content_type;

      if ($content_type !~ m!^text/\b!) {
        diagnose "; skipping ...\n";
        next;
      } else {
        diagnose "\n";
      }
    } else {
      $content_type = '_unspecified_';
      diagnose "Warning: Content-Type was unspecified; assuming plain text.\n";
    }

    push @body_lines, @{ $part_mail->body };
  }

  return \@body_lines;
}

sub _check_resent_headers {
  my ($self) = @_;

  # From RFC822:
  #
  # --------- 8< --------- 8< --------- 8< --------- 8< --------- 8< ---------
  #    4.2.  FORWARDING
  #  
  #         Some systems permit mail recipients to  forward  a  message,
  #    retaining  the original headers, by adding some new fields.  This
  #    standard supports such a service, through the "Resent-" prefix to
  #    field names.
  #  
  #         Whenever the string "Resent-" begins a field name, the field
  #    has  the  same  semantics as a field whose name does not have the
  #    prefix.  However, the message is assumed to have  been  forwarded
  #    by  an original recipient who attached the "Resent-" field.  This
  #    new field is treated as being more recent  than  the  equivalent,
  #    original  field.   For  example, the "Resent-From", indicates the
  #    person that forwarded the message, whereas the "From" field indi-
  #    cates the original author.
  #  
  #         Use of such precedence  information  depends  upon  partici-
  #    pants'  communication needs.  For example, this standard does not
  #    dictate when a "Resent-From:" address should receive replies,  in
  #    lieu of sending them to the "From:" address.
  #  
  #    Note:  In general, the "Resent-" fields should be treated as con-
  #           taining  a  set  of information that is independent of the
  #           set of original fields.  Information for  one  set  should
  #           not  automatically be taken from the other.  The interpre-
  #           tation of multiple "Resent-" fields, of the same type,  is
  #           undefined.
  # --------- 8< --------- 8< --------- 8< --------- 8< --------- 8< ---------
  #
  # So we only take values from Resent- headers when we can't get them
  # any other way but we really would prefer to have them.

  my %resent_headers = ( id => 'Message-ID' );
  foreach my $header_key (keys %resent_headers) {
    my $header_name = $resent_headers{$header_key};
    $self->{$header_key} ||= $self->{header}->get("Resent-$header_name") || '';
  }
}

sub _check_envelope {
  my ($m) = @_;

  if (! $m->mail_from) {
    error('Envelope From header missing', <<QMAIL);
If you are using qmail as your MTA, make sure your .qmail setup passes
the mail through the preline filter before being passed to parp, e.g.

  | preline /path/to/parp -dr

Otherwise, parp may not be compatible with your MTA.  Does it deliver
in mbox format?
QMAIL
  }

  if ($m->mail_from =~
      /(.*?)\s*(\w{3}) (\w{3}) ([\d ]\d) (\d\d):(\d\d):(\d\d) (\d{4})$/) {
    $m->_cache_header_val(env_from => $1);
    my ($dow, $month, $mday, $hour, $min, $sec, $year) =
      ($2, $3, $4, $5, $6, $7, $8);

    $m->{env_from_time} =
      timelocal($sec, $min, $hour, $mday, month2i($month), $year);
  }
  else {
    $m->_cache_header_val(env_from => $m->mail_from);
    error("Envelope From header had weird date format", $m->mail_from);
  }
  
}

sub _extract_addrs {
  my ($m) = @_;

  # Certain headers should only have one address.
  my @singletons = qw/env_from from sender list reply_to return_path id/;

  my %addrs = ();
  foreach my $hdr (@singletons, qw/to cc/) {
    $addrs{$hdr} = [ map { $_->address } Mail::Address->parse($m->$hdr) ];
  }

  my @broken = ();
  foreach my $hdr (@singletons) {
    push @broken, $hdr if @{ $addrs{$hdr} } > 1;
    $addrs{$hdr} = $addrs{$hdr}[0];
  }
  error("Some headers had more than one address",
        map { my $full = 'full_' . $_; $m->$full } @broken)
    if @broken;

  $addrs{ftc}       = [ grep $_, @addrs{qw/env_from from list sender/},
                                 @{ $addrs{to} }, @{ $addrs{cc} }       ];
  $addrs{froms}     = [ grep $_, @addrs{qw/env_from from/}              ];
  $addrs{all_froms} = [ grep $_, @addrs{qw/env_from from reply_to
                                           return_path sender id/}      ];
  $m->{addrs} = \%addrs;
}

sub from_addr     { shift->{addrs}{from}     }
sub env_from_addr { shift->{addrs}{env_from} }
sub ftc_addrs     { shift->{addrs}{ftc}      }

sub _check_auth_sender {
  my ($self) = @_;

  my @comments        = $self->{header}->get('Comments');
  $self->{comments}        = \@comments;

  $self->{auth_sender}     = '';
  foreach my $comment (@comments) {
    if ($comment =~ /^Authenticated sender is (.*)/i) {
      $self->{auth_sender} = $1;
      last;
    }
  }
}


##############################################################################
#
# Routines for performing tests on the e-mail being filtered, and
# categorizing it accordingly.
#

sub matches {
  my $m = shift;
  my ($category, $re, $debug, $multi) = @_;

  my @matches = ();
  foreach my $addr (@{ $m->{addrs}{$category} }) {
    print "Testing $addr =~ /$re/\n" if $debug;
    my $matches = $addr =~ $re;
    if ($matches) {
      print "  -- matched!\n" if $debug;
      if ($multi) {
        push @matches, $addr;
      }
      else {
        my @substrs = ();
        for my $i (1 .. @+) { 
          # Perl should have a primitive for this.
          push @substrs, substr($addr, $-[$i], $+[$i]) if defined $-[$i];
        }
        return ($matches, @substrs);
      }
    }
  }

  return @matches;
}

sub ftc_matches { # TODO
  my $m = shift;
  my @matches = $m->matches('ftc', @_);
  return $matches[0];
}

sub extract_friends {
  my $m = shift;
  my ($folder) = @_;

  # Count which addresses in which headers are mine.
  my %addrs = $m->_classify_addresses;

  my @maybe_new_friends = ();

  if ($addrs{from}{me}  == 1 &&
      $addrs{to}{total} == 1 &&
      $addrs{cc}{total} == 0)
  {
    push @maybe_new_friends, { addr => $addrs{to}{addrs}[0],
                               header => 'to' };
    diagnose "Found friend in `To:' header.\n";
  }
  elsif ( # $addrs{to}{me} + $addrs{cc}{me} >= 1 && # could be on a list
         $addrs{from}{not_me} == 1)
  {
    push @maybe_new_friends, { addr => $addrs{from}{addrs}[0],
                               header => 'From' };
    push @maybe_new_friends, { addr => $addrs{reply_to}{addrs}[0],
                               header => 'Reply-To' }
      if $addrs{reply_to}{total} == 1;
    diagnose "Found friend in `From' and `Reply-To:' headers.\n";
  }
  else {
    diagnose "No friends found in From/To/Cc headers.\n";
  }

  my $folder_name = $folder ? qq.`$folder'. : 'unknown';
  return $m->_make_friends($folder_name, @maybe_new_friends);
}


sub _make_friends {
  my $m = shift;
  my ($folder_name, @new_friends) = @_;

  my $added = 0;
  foreach my $new_friend (@new_friends) {
    my $addr = $new_friend->{addr};
    vprint "Adding `$addr' to friends database ... \n";
    if (my $reason = is_friend($addr)) {
      vprint "$addr is already a friend ($reason)\n";
      next;
    }
    my $source = "friend extracted from `$new_friend->{header}' " .
                 "header of message";
    if ($m->{parp_id}) {
      $source .= " parp id $m->{parp_id}";
    }
    elsif ($m->id) {
      $source .= " id `" . $m->id . "'";
    }
    elsif ($m->date) {
      $source .= " dated " . $m->date;
    }
    $source .= " in $folder_name folder";
    make_friend($addr, $source);
    $added++;
  }
  
  return $added ? q[EXTRACTED_FRIEND] : q[DIDN'T_EXTRACT_FRIEND];
}

sub _classify_addresses {
  my $m = shift;
  
  my %addrs = (
#              env_from    => { descr => 'envelope From' },
               from        => { descr => 'From'          },
               to          => { descr => 'to'            },
               cc          => { descr => 'cc'            },
               reply_to    => { descr => 'Reply-To'      },
#              return_path => { descr => 'Return-Path'   },
              );

  foreach my $addr_type (keys %addrs) {
    $addrs{$addr_type}{$_} ||= 0 foreach qw/me not_me total/;
    next unless $m->{$addr_type};
    my @addrs = Mail::Address->parse($m->{$addr_type});
    foreach my $parsed (@addrs) {
      my $paddr = $parsed->address;
      if ($paddr =~ config->me || $paddr =~ config->old_me) {
        $addrs{$addr_type}{me}++;
      } else {
        $addrs{$addr_type}{not_me}++;
      }
      $addrs{$addr_type}{total}++;
      push @{$addrs{$addr_type}{addrs}}, $paddr;
    }
  }

# for my $type (qw/from to cc reply_to/) {
#   vprint "type $type: ";
#   for my $count (qw/me not_me total/) {
#     vprint "[$count $addrs{$type}{$count}]";
#   }
#   vprint "\n";
# }

  return %addrs;
}

sub is_duplicate {
  my $m = shift;

  return Parp::IdCache::is_duplicate($m->id);
}

sub add_id_to_cache {
  my $m = shift;
  return if opt('test_run') || ! opt('no_dups');
  Parp::IdCache::add_id($m->id);
  diagnose "Added id to duplicates cache.\n";
}

sub was_to_old_addresses {
  my $m = shift;

  my $found = 0;

  if ($m->to =~ config->old_me) {
    diagnose "*** Old address found:\n  ", $m->to, "\n";
    $found++;
  }

  if ($m->cc =~ config->old_me) {
    diagnose "*** Old address found:\n  ", $m->cc, "\n";
    $found++;
  }

  return $found;
}

sub is_passworded {
  my $m = shift;
  my $password = config->password;
  if ($m->subject =~ $password or
      ($m->header->get(config->password_header) || '') =~ $password) {
    $m->accept_mail('contains good password');
    make_friend($m->from_addr, 'gave password');
    return 1;
  }

  return 0;
}

sub is_from_good_domain {
  my $m = shift;

  if ($m->from     =~ config->good_domains &&
      ($m->env_from =~ config->good_domains ||
#       $m->id       =~ config->good_domains ||
       $m->sender   =~ config->good_domains))
  {
    my $good_domain = $1;
    $m->accept_mail('good domain', $good_domain);
    return 1;
  }

  return 0;
}

sub has_good_headers {
  my $m = shift;

  # Could cross-check In-Reply-To: with good domains, but
  # no spammers seem to be setting this header yet, which
  # makes it an even more powerful test.
  if ($m->in_reply_to) {
    $m->accept_mail('had In-Reply-To: header');
    return 1;
  }

  if ($m->refs =~ config->good_domains) {
    $m->accept_mail('References: had good domain', $1);
    return 1;
  }

  if ($m->subject =~ config->subject_buzzwords) {
    $m->accept_mail('subject had buzzword', $1);
    return 1;
  }

  if ($m->mailer =~ /(mutt)/i) {
    $m->accept_mail('good X-Mailer header', $1);
    return 1;
  }

  return 0;
}

sub is_from_good_person {
  my $m = shift;

  foreach my $addr (map { $m->$_() } qw/from_addr env_from_addr/) {
    next unless $addr;
    if (my $reason = is_friend($addr)) {
      $m->accept_mail('from friend', "`$addr' -- $reason");
      return 1;
    }
  }

  return 0;
}

sub has_spam_headers {
  my $m = shift;

  # Many thanks to Mark-Jason Dominus and to the authors of junkfilter
  # and the NAGS filter for some of the ideas contained herein.

  my $octet_RE = '([12]?\d\d|\d\d|\d)';
  my $ipv4_RE  = ("$octet_RE\\." x 3) . $octet_RE;
  my $foo_RE   = qr![\w.%\#\$+-/]+\*?!;
  if ($m->id !~ m/^<
                      ($foo_RE|"$foo_RE")
                      \@
                      (
                       [\w-]+ (\. [\w-]+){0,6}         |
                       \[ $ipv4_RE \]
                      )
                     >/x) {
    $m->reject_junk_mail('invalid Message-ID: header', "`" . $m->id . "'");
    return 1;
  }

  if (my @m = $m->matches('ftc', config->decoys)) {
    $m->{complain} = 0; # don't let them wise up to me subscribing to
                        # stuff using a dud address
    $m->reject_junk_mail('not sent to a proper address', $m[1] || undef);
    return 1;
  }

  foreach my $bad_header (qw/PMFLAGS Advertisement X-Advertisement X-Shock/) {
    if ($m->header->get($bad_header)) {
      $m->reject_junk_mail('found bad header', $bad_header);
      return 1;
    }
  }

  my $uidl = $m->header->get('X-UIDL') || '';
  chomp $uidl;
  if ($uidl and $uidl !~ /^([0-9a-f]{32}|.{20})$/i) {
    $m->reject_junk_mail('invalid X-UIDL: header', "`$uidl'");
    return 1;
  }

  if ($m->status =~ /MC/i) {
    $m->reject_junk_mail('MaxAnnon! mailer');
    return 1;
  }

  if (($m->header->get('X-Distribution') || '') =~ /mass/i) {
    $m->reject_junk_mail('bulk mail sent with Pegasus');
    return 1;
  }

  if ($m->from =~ /^(<(_?\@_)?>)$/) {
    $m->reject_junk_mail("bad From: header", "`$1'");
    return 1;
  }

  if ($m->return_path =~ /^(<(_?\@_)?>)$/) {
    $m->reject_junk_mail("bad Return-Path: header", "contained `$1'");
    return 1;
  }

  if ($m->from eq '') {
    $m->reject_junk_mail('From: header is blank or missing');
    return 1;
  }

  if (((my $subj = $m->subject) =~ tr/\x80-\xff//) > 3) {
    $m->reject_junk_mail('Subject: header had too many 8-bit characters');
    return 1;
  }

  if ($m->date =~ m![^\w:,()+/ \t-]!) {
    $m->reject_junk_mail('bad Date: header', "`" . $m->date . "'");
    return 1;
  }

  if ($m->{recvds} =~ /(-0600 \(EST\)|-0[57]00 \(EDT\))/) {
    $m->reject_junk_mail('bad Received: header date', "`$1'");
    return 1;
  }

  if ($m->mailer =~ config->bad_words) {
    $m->reject_junk_mail("bad X-Mailer: header", "contained `$1'");
    return 1;
  }

  if ($m->mailer =~ /[0-9a-f]{10}/i) {
    $m->reject_junk_mail("bad X-Mailer: header", "contained nonsense hex");
    return 1;
  }

  if ($m->{recvds} =~ config->bad_words) {
    $m->reject_junk_mail("bad Received: header", "contained `$1'");
    return 1;
  }

  if ($m->organisation =~ config->bad_words) {
    $m->reject_junk_mail("bad organisation header", "contained `$1'");
    return 1;
  }

  if ($m->precedence =~ /(junk)/i) {
    # Precedence: bulk sometimes set by list software :-(
    $m->reject_junk_mail("bad precedence header", "contained `$1'");
    return 1;
  }

  if (@{ $m->ftc_addrs } > config->max_recipients) {
    $m->reject_junk_mail('too_many_recipients');
    return 1;
  }

  if ($m->subject =~ /\s{8}[0-9]+\s*$/) { # 8 spaces seems to be about right
    $m->reject_junk_mail('Subject: header contained trailing spam id');
    return 1;
  }

  if (config->like_me) {
    my @recipients_like_me = $m->matches('ftc', config->like_me, 0, 1);
    if (@recipients_like_me > config->max_like_me) {
      $m->reject_junk_mail('spam sent to a big alphabetical list');
      return 1;
    }
  }

  if ($m->from_addr =~ /(\@{2,})/) {
    $m->reject_junk_mail('bad From: address', "contained `$1'");
    return 1;
  }

  return 1 if $m->has_spam_received_headers;
  
  return 0;
}

sub has_spam_received_headers {
  my $m = shift;
  $m->parse_received_headers;
  while (my ($recvd, $tree) = each %{ $m->{recvd_parse_trees} }) {
    return 1 if $m->received_header_is_spam($recvd, $tree);
  }
}

sub received_header_is_spam {
  my ($m, $recvd, $tree) = @_;

  my $HELO        = $tree->{from}{HELO}   || '';
  my $from_domain = $tree->{from}{domain} || '';

  if ($HELO =~ /tmpstr/i) {
    $m->reject_junk_mail('bad HELO', $HELO);
    return 1;
  }

# Some bona-fide MTAs give a single sub-domain as the domain.  Bah.
#   if ($HELO                  &&
#       $HELO !~ /\./          &&
#       $HELO ne 'localhost'   &&
#       $from_domain !~ /^$HELO/)
#   {
#     $m->reject_junk_mail('Received header had invalid from domain',
#                          $HELO);
#     return 1;
#   }

  if ($HELO =~ /hotmail\.com/ && $from_domain !~ /hotmail\.com$/) {
    my $whole = $tree->{from}{whole};
    chomp $whole;
    $m->reject_junk_mail('Received header faked as hotmail', $whole);
    return 1;
  }

  return 0;
}

sub has_suspicious_headers {
  my $m = shift;

  if ($m->to eq '' and $m->cc eq '') {
    $m->reject_junk_mail('To: and Cc: headers both blank or missing');
    return 1;
  }

  if ($m->to eq '') {
    $m->reject_junk_mail('To: header blank or missing');
    return 1;
  }

  if ($m->to =~ config->bad_to) {
    $m->reject_junk_mail("bad To: header", "contained `$1'");
    return 1;
  }

  if ($m->subject =~ config->bad_subjects) {
    $m->reject_junk_mail("bad Subject: header", "contained `$1'");
    return 1;
  }

  if (((my $subject = $m->subject) =~ tr/!/!/) >= 5 ||
      $m->subject =~ /!!!!/) {
    $m->reject_junk_mail("Subject: header contained too many exclamation marks");
    return 1;
  }

  if ((my @words = $m->subject =~ /\b[A-Z]+\b/g) >= 6) {
    $m->reject_junk_mail('Subject: header had too many all-caps words');
    return 1;
  }

# This one is a bit extreme ...
#  if ($m->subject eq '') {
#    $m->reject_junk_mail('Subject: header is blank or missing');
#    return 1;
#  }

# This one is a bit extreme too ...
#  if ($m->from =~ /^(\d+)\@/ ||
#      $m->from =~ /^(\d+)\@/)
#  {
#    $m->reject_junk_mail('username is all digits', "`$1'");
#    return 1;
#  }

  return 0;
}

sub has_spam_from_addresses {
  my $m = shift;

  if (my @m = $m->matches('all_froms', config->bad_from)) {
    $m->reject_junk_mail('bad from address', "contained `$m[1]'");
    return 1;
  }

  return 0;
}

sub has_spam_domains_anywhere {
  my $m = shift;

  if (my @m = $m->matches('all_froms', config->bad_origins)) {
    $m->reject_junk_mail('bad from/return address', "`$m[1]'");
    return 1;
  }

  if ($m->{recvds} =~ config->bad_origins) {
    $m->reject_junk_mail('bad Received: address', "`$1'");
    return 1;
  }

  $m->parse_received_headers;

  return 1 if opt('do_blacklist') && $m->has_blacklisted_IPs;

  return 0;
}

sub has_blacklisted_IPs {
  my $m = shift;
  
  my $debug = 0;
  my %ips = ();

  foreach my $recv (@{$m->{recvds_array}}) {
    # Avoid various false positives
    $recv =~ s/JetMail \d\.\d\.\d\.\d\b//g;
      
    my @ips = $recv =~ m@(?<!/)\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b(?!\.)@g;
    @ips = grep { ! exists $ips{$_} } @ips;
    vprint "blacklist check on $recv ...\n" if $debug && @ips;
    foreach my $ip (@ips) {
      $ips{$ip}++;
      vprint "  Checking IP $ip ... " if $debug;
      my $rbl = blacklist_lookup($ip);
      if ($rbl) {
        my $reason = $rbl eq '1' ? $ip : $rbl;
#       if ($reason =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) {
#         $reason = "http://ordb.org/lookup/?host=$1";
#       }
        vprint ''. ($rbl || 'blacklisted') . "\n" if $debug;
        my @reject = ('Blacklisted');
        push @reject, $reason if $reason;
        $m->reject_junk_mail(@reject);
        return 1;
      } else {
        vprint "not found\n" if $debug;
      }
    }
  }

  return 0;
}


sub parse_received_headers {
  my $m = shift;

  return if $m->{recvd_parses_done};

  my $failed_parses_output = '';

  foreach my $recv (@{$m->{recvds_array}}) {
    $recv =~ s/\s*\n\s*/ /gm;

    my $obj = Mail::Field->new('Received', $recv);
    $obj->debug(5);

    if (! $obj->parsed_ok) {
      # Output follows in order ...

      # First, preamble before parser errors
      $failed_parses_output .= <<EOF;
--
Error parsing Received: `$recv'

EOF

      # Then, a reminder of the message details
      $failed_parses_output .= $m->full_from . $m->full_to;
      $failed_parses_output .= $m->full_cc if $m->cc;
      $failed_parses_output .= $m->full_subject . $m->full_id;

      # Finally, the incomplete parse tree
      $failed_parses_output .= Dumper($obj->parse_tree) . "\n";

      $m->{recvd_parses_failed}++;

      $failed_parses_output .= $obj->diagnostics;
    }

    $m->{recvd_parse_trees}{$recv} = $obj->parse_tree;
#    vprint Dumper($obj->parse_tree), "\n";
  }

  $m->{recvd_parses_out} = $failed_parses_output;
  $m->{recvd_parses_done} = 1;
}

sub for_me {
  my $m = shift;
  if ($m->to =~ config->me or $m->cc =~ config->me) {
    return 1;
  }
  $m->reject_junk_mail('not addressed to me');
  return 0;
}

sub has_spam_content {
  my $m = shift;

  return 1 if $m->has_bad_content_type;
  return 1 if $m->too_many_forwards;

  # Strip blank and quoted lines to get original text.
  my @original = grep ! /^\s*$|^> /, @{$m->{body}};
  $m->{_original_body_text} = \@original;

  my $first_few = $m->_get_body_head(4);
  return 1 if $m->has_spam_intro($first_few);

  my $last_few = $m->_get_body_tail(12);
  return 1 if $m->has_spam_ending($last_few);

  if ($last_few =~ /group.mail/i) {
    $m->reject_junk_mail('body suggested that a group mailer was used');
    return 1;
  }

  my $original = join '', @original;

  if ($original =~
        /this\s+(e?.?mail|message)\s+is\s+(being\s+)?sent\s+in\s+compliance/i) {
    $m->reject_junk_mail('body pretended that spam is legal');
    return 1;
  }

  if ($original =~ config->very_bad_words) {
    $m->reject_junk_mail('body contained a very bad phrase', "`$1'");
    return 1;
  }

  if ($original =~ /\b ( \$\d{1,3}(,\d{3})+ | \${4}'s ) \b/x) {
    $m->reject_junk_mail('body mentioned dollars', "`$1'");
    return 1;
  }

  if ($original =~
        qr/(Hi!\s+How\s+are\s+you.+I\s+send\s+you\s+this\s+file\s+in\s+order\s+to\s+have\s+your\s+advice)/s) {
    $m->reject_junk_mail('body looked like virus', "`$1'");
    return 1;
  }

  if ($original =~ qr/(
                       # American phone numbers
                       \b (800|900|877|888) ([-\ ])
                       (
                         (?-i: [A-Z]{2,} (\2 [A-Z]+)* )          |
                         \d{3} \2 \d{4} \b
                       )                                         |
                       \b \(\d{3}\) \  \d{3}-\d{4} \b
                      )/) {
    $m->reject_junk_mail('body contained American phone #', "`$1'");
    return 1;
  }

  my $qbw = config->quite_bad_words;
  my @matches = ($original =~ /$qbw/g);
  my %uniques = map { lc $_ => $_ } @matches;
  diagnose "Quite bad words found in body: ",
              scalar(@matches), " (", scalar(keys %uniques), " unique)\n"
    if @matches;

  if (@matches > config->max_quite_bad_words &&
      scalar(keys %uniques) > config->max_unique_quite_bad_words) {
    $m->reject_junk_mail('body contained too many bad words',
                         join ', ', map { "`$_'" } values %uniques);
    return 1;
  }

  return 0;
}

sub has_bad_content_type {
  my ($m) = @_;
  foreach my $type ($m->{content_type}, @{ $m->{content_types} || [] }) {
    if ($type =~ config->bad_content_type) {
      $m->reject_junk_mail('bad Content-Type', $type);
      return 1;
    }
  }
  return 0;
}

sub _get_body_head {
  my ($m, $how_many) = @_;

  my @original = @{ $m->{_original_body_text} };
  
  my ($start, $end) = (0, $how_many);
  $end = $#original if $#original < $how_many;
  return join '', @original[$start .. $end];
}

sub _get_body_tail {
  my ($m, $how_many) = @_;

  my @original = @{ $m->{_original_body_text} };
  
  my ($start, $end) = (-$how_many, -1);
  if (@original < $how_many) {
    ($start, $end) = (0, $#original);
  }
  return join '', @original[$start .. $end];
}

sub too_many_forwards {
  my ($m) = @_;
  
  my $max = config->max_forwards;
  my @matches = ($m->{body_scalar} =~ /^\s*(>\s*){$max,}/mg);
#  diagnose "Lines exceeding max_forwards: ", scalar(@matches), "\n";
  if (@matches > config->max_forwards_lines) {
    $m->reject_junk_mail("forwarded more than "
                         . config->max_forwards
                         . " times");
    return 1;
  }
  return 0;
}

sub has_spam_intro {
  my ($m, $first_few) = @_;

  my $me = config->me;
  if ($first_few =~ /^\s*
                      (Dear\ (
                        friend \b       |
                        .* surfer       |
                        $me
                      ))
                    /imx)
  {
    $m->reject_junk_mail('Suspicious method of address', "`$1'");
    return 1;
  }
  return 0;
}

sub has_spam_ending {
  my ($m, $last_few) = @_;

#   warn "remov: [$last_few] : " . ($last_few =~ /\bremoved?\b/i);
#   warn "2: " . $last_few =~ /respond|notify|reply|send|forward|click|software|
#                     mailto|type|return/ix;
#   warn "3: " . $last_few =~ /subjec?t|process|automatically/i;
  
  if ($last_few =~ /\bremoved?\b/i        &&
      $last_few =~ /respond|notify|reply|send|forward|click|software|
                    mailto|type|return|please/ix &&
      $last_few =~ /subjec?t|header|process|automatically/i) {
    $m->reject_junk_mail('body confessed it was junk');
    return 1;
  }
  return 0;
}


##############################################################################
#
# Routines providing actions to taken on the e-mail being filtered.
#

sub ditch_mail {
  my $m = shift;
  diagnose "Delivered to /dev/null",
              @_ ? " (@_)" : '',
              "\n";
}

sub deliver_to_main {
  my $m = shift;
  $m->deliver_to(config->main_folder, @_);
}

sub deliver_to_inbox {
  my $m = shift;
  my ($inbox) = @_;
  $m->deliver_to(config->inbox($inbox), @_);
}

sub maybe_backup {
  my $m = shift;

  $m->deliver_to(config->backup_folder) if $m->{backup};
}

sub deliver_to {
  return if opt('wrong_class');

  my ($m, $folder) = @_;

  my $file = ($folder =~ m!^/!) ? $folder : config->mail_dir . "/$folder";
  $file = folder_substs($file);

  if (delivery_mode()) {
    append_to_folder($file, $m);
    diagnose "Delivered to $file\n";
  }
  else {
    diagnose "Would deliver to $file\n";
  }
}

sub accept_mail {
  return if opt('wrong_class');

  my $m = shift;
  my ($reason_ident, @details) = @_;

  $m->{accepted} = [ $reason_ident, @details ];

  my $text = "$reason_ident" .
             (@details ? " (@details)" : '');

  $m->header->add('X-Parp-Accepted', $text);
  diagnose "Accepted: $text\n";
}

sub reject_junk_mail {
  return if opt('wrong_class');
  my $m = shift;
  $m->reject_mail(@_);
#  $m->{backup} = 0;
  $m->deliver_to_inbox('junk-mail');
}

sub reject_mail {
  return if opt('wrong_class');

  my $m = shift;
  my ($reason_ident, @details) = @_;

  $m->{rejected} = [ $reason_ident, @details ];

  my $text = $reason_ident .
             (@details ? " (@details)" : '') .
             "\n";

  $m->header->add('X-Parp-Rejected', $text);
  diagnose "REJECTED: $text";
}

sub pipe_forward {
  return if opt('wrong_class');

  my $m = shift;
  my ($pipe_command) = @_;

  if (opt('dry_pipes') || opt('test_run')) {
    diagnose "Would pipe | $pipe_command\n";
  }
  else {
    diagnose "Piping | $pipe_command ... ";
    if (! open(PIPE, "| $pipe_command")) {
      error("Couldn't open pipe command $pipe_command: $!");
    }
    else {
      print PIPE $m->{mail}->as_mbox_string;
      close(PIPE) or error("close(| $pipe_command) failed: $!\n");
      diagnose "done.\n";
    }
  }
}

1;
