package Parp::Filter;

=head1 NAME

Parp::Filter - base class for Parp filter logic

=head1 SYNOPSIS

Do not use directly!  Subclass as follows:

  package Parp::Filter::User;

  use strict;
  use warnings;

  use base 'Parp::Filter';

  # Override documented methods with your own filter logic, e.g.
  sub categorize {
    # TODO
  }

  1;

=head1 DESCRIPTION

This class provides the main routines which take filtering action on
one mail, and a sensible, all-purpose default filtering logic.  The
documented methods in this class have been designed specifically with
the expectation that they will be overridden by the user.  In this way
it should be possible for the user to get parp to filter in precisely
the desired fashion.

By default, parp tries to load user code from a file called
F<Filter.pm> in the F<.parp> subdirectory of your home directory.  It
must provide the overridden methods within the C<Parp::Filter::User>
package.  If you wish to place F<Filter.pm> in a different directory,
set the environment variable C<PARP_USERDIR> to that directory before
you invoke parp.

Note that it is not necessary for a user-specific F<Filter.pm> to
exist anywhere; in that case, the settings are taken entirely from
C<Parp::Config>.

=cut

use strict;
use warnings;

use Parp::Config  qw(config);
use Parp::Options qw(opt);
use Parp::Utils   qw(vprint diagnose log_header log_rule fatal);

use base 'Exporter';
our @EXPORT_OK = qw(filter);

=head2 categorize($mail)

This subroutine is the heart of the filtering strategy.  It places
the mail currently being filtered into one of the following
categories:

  IS_SPAM    -- spam
  TO_MAIN    -- good mail destined for main inbox
  TO_AUX     -- good mail destined for auxiliary inboxes
  SPECIAL    -- leave subroutines to do their own thing

by returning a scalar containing the name of the category.
Gets passed a Parp::Mail object as the only parameter.

=cut

sub categorize {
  my ($self, $m) = @_;

  if ($m->is_to_old_address) {
    $m->reject_mail('was to old address');
    $m->deliver_to_inbox('old_addresses');
    return 'TO_AUX';
  }

  # Ignore real lusers.
  return 'IS_SPAM' if $m->has_spam_from_addresses;

  # Allow config file to deal with special cases in its own way.
  my $rv = $self->is_special_case($m);
  return $rv if $rv;

  return 'TO_MAIN' if $self->is_from_daemon($m);

  # Any good signs which indicate that the mail should definitely
  # NOT be treated as junk?
  my $grace = $m->is_passworded        ||
              $m->is_from_good_person  ||
              $m->is_from_good_domain  ||
              $m->has_good_headers;

  return 'IS_SPAM'
    if ! $grace &&
       ($m->has_spam_headers          ||
        $m->has_spam_domains_anywhere ||
        $m->has_spam_content);

  if ($self->is_list_mail($m)) {
    $m->{complain} = 0;
    return 'TO_AUX';
  }

  return 'TO_MAIN' if $grace;

  # We put this one after the check for list mail, because on average,
  # mail from lists tends to be lower grade than personal mail.
  return 'IS_SPAM' if $m->has_suspicious_headers;

  return 'IS_SPAM' if ! $m->for_me;

  $m->accept_mail('passed all tests');
  return 'TO_MAIN';
}

=head2 is_special_case

This method acts as a hook which can be overridden by the user
allow complete control over how certain special cases of e-mail
are handled.  It should return true if a special case has been
detected.  By default it always returns false.

The hook gets called early on in C<categorize>.

=cut

sub is_special_case { 0 }

=head2 is_list_mail

This method should detect whether the mail is from a mailing list,
and if so, return true.  By default it always returns false.

The hook gets called early on in C<categorize>.

=cut

sub is_list_mail { 0 }

=head2 is_from_daemon

This method detects whether the mail is from a mailer daemon, and if
so, returns true.  It will not catch all cases; given knowledge of the
behaviour of your local daemons, you should be able to improve it by
overriding it.  See the sample F<Filter.pm> provided in the
distribution for an example of this.

The hook gets called fairly early on in C<categorize>.

=cut

sub is_from_daemon {
  my $self = shift;
  my $m = shift;

  # standard daemon errors
  if (
      ($m->from =~ /
                    Mail Delivery (Subsystem|System)    |
                    MAILER-DAEMON                       |
                    MESSAGE_ERROR_HANDLER               |
                    postmaster\@
                   /ix or
       $m->env_from_addr =~ /MAILER-DAEMON/
      )
      and
      $m->subject =~ /(
                       Undeliverable                                    |
                       Returned\ mail:\ User unknown|subscri(be|ption)  |
                       delivery\ fail(ed|ure)                           |
                       failure\ notice                                  |
                       returning\ (mail|message)
                      )/ix
     )
  {
    $m->accept_mail('from daemon', "subject contained `$1'");
    return 1;
  }

  return 0;
}

# This is where all the fun starts.  Takes a Mail::Internet object as
# parameter, wraps it in a Parp::Mail object, and processes it
# according to the command-line options (see L<Parp::Options>).
sub process_mail {
  my ($self, $mail) = @_;

  my $folder = $mail && $mail->{parp_foldername};

  if (! $mail) {
    error("Message parsing failed",
          "\$folder:\n", Dumper($folder),
          "\n",
          "\$mail:\n", Dumper($mail),          
         );
    return 0;
  }

  my $m = Parp::Mail->new($mail);

  log_header($m);

  return $m->make_friends($folder) if opt('extract_friends');

  unless (opt('wrong_class') || opt('regression_test')) {
    my $is_dup_or_loop = $self->is_dup_or_loop($m);
    return $is_dup_or_loop if $is_dup_or_loop;
  }

  $m->{filter_category} = $self->categorize($m);

  if (opt('wrong_class')) {
    $self->reclassify($m);
    $m->update_friends_db;
  }
  elsif (opt('regression_test')) {
    $self->regression_test($m);
  }
  else {
    $self->take_action($m);
    $m->update_friends_db;
    $m->add_id_to_cache;
  }

  log_rule('-');

  return $m->{filter_category};
}

sub is_dup_or_loop {
  my $self = shift;
  my ($m) = @_;

  if (opt('no_dups') && $m->is_duplicate) {
    $m->reject_mail('was duplicate by message id');
#   $m->deliver_to_inbox('duplicates');
    $m->{backup} = 0;
    return 'IS_DUPLICATE';
  }

  # FIXME: There could be more than one X-Loop header.
  if (($m->header->get('X-Loop') || '') eq config->loop_value) {
    $m->accept_mail('looped');
    return 'LOOPED';
  }

  return 0;
}

sub reclassify {
  my $self = shift;
  my ($m) = @_;

  # The user's telling us that the filter_category we've just
  # calculated is wrong.
  if ($m->{filter_category} eq 'IS_SPAM') {
    vprint "Reclassification: was incorrectly identified as spam.\n";
    $m->{filter_category} = 'UNKNOWN_NOT_SPAM';
  }
  elsif ($m->{filter_category} ne 'IS_SPAM') {
    vprint "Reclassification: was incorrectly identified as bona-fide.\n";
    $m->{filter_category} = 'IS_SPAM';
  }
}

# Returns false on test failure, true on test success.
sub regression_test {
  my $self = shift;
  my ($m) = @_;

  my @tags = grep /^X-Parp-(?!Id|Pid)/, $m->header->tags;
  my %headers = map { $_ => $m->header->get($_, 0) } @tags;

  my $old_headers = $m->{old_parp_headers} || {};
  if (! keys %$old_headers) {
    vprint "Mail had not already been processed\n";
    return 1;
  }

  delete $old_headers->{'X-Parp-Pid'};
  my $old = $self->_header_hash_to_string($old_headers);
  my $new = $self->_header_hash_to_string(\%headers);
  vprint "!!! Regression failure !!!\nOLD:\n${old}NEW:\n$new\n"
    if $old ne $new;
  return $old eq $new;
}

sub _header_hash_to_string {
  my $self = shift;
  my ($headers) = @_;
  my $string = '';
  foreach my $tag (sort keys %$headers) {
    $string .= "$tag: $headers->{$tag}";
  }
  return $string;
}

sub check_received_headers {
  my $self = shift;
  my ($m) = @_;

  $m->parse_received_headers;
  
  if ($m->{recvd_parses_failed}) {
    $m->header->add('X-Parp-Received-Parse',
                      $m->{recvd_parses_failed}
                      . ' header'
                      . ($m->{recvd_parses_failed} > 1 ? 's' : '')
                      . ' failed parse');
    if ($m->{filter_category} eq 'IS_SPAM') {
      $m->deliver_to('spam_recvds');
    }
    else {
      vprint $m->{recvd_parses_out};
      $m->deliver_to('bad_recvds');
    }
  }
}

=head2 take_action($mail)

Once the mail has been filtered and classified, if parp is in delivery
mode (i.e. not in test-run (C<-t>) or extract-friends (C<-e>) or
regression-test (C<-R>) or correction (C<-w>) mode), this is where
delivery (and possibly other actions, e.g. generating auto-replies)
takes place.

You can override it to do what you want, e.g. to specify default
delivery locations for mail which has classified into particular
categories.

=cut

sub take_action {
  my $self = shift;
  my ($m) = @_;

  if ($m->{filter_category} eq 'TO_MAIN') {
    $m->deliver_to_main;
  }
  elsif ($m->{filter_category} eq 'IS_SPAM') {
    if ($m->{complain}) {
      # TODO: write and send a rude letter to relevant abuse@foo.com address.
      diagnose "Would complain\n";
    }
  }
  elsif ($m->{filter_category} eq 'TO_AUX') {
    # list mail; already delivered to primary target
    #  - maybe back up though
    $m->maybe_backup;
  }
  elsif ($m->{filter_category} eq 'IS_SPECIAL') {
    # special case mail; already delivered to primary target
    #  - maybe back up though
    $m->maybe_backup;
  }
  else {
    fatal(9, "Oh dear, mail didn't get put in any categories.");
  }
}

sub new { shift } # monadic class, no state needed

# This is naughty but convenient.
my $user_dir    = $ENV{PARP_USERDIR} || $ENV{HOME};
my $user_filter = $user_dir . '/.parp/Filter.pm';
if (-e $user_filter) {
  eval { require $user_filter };
  die $@ if $@;
}
my $filter = Parp::Filter::User->can('new') ?
               Parp::Filter::User->new : Parp::Filter->new;
sub filter { $filter }

1;
