package Parp::Mail::Tests::Body;

=head1 NAME

Parp::Mail::Tests::Body - body tests for Parp::Mail objects

=head1 SYNOPSIS

See L<Parp::Mail>.

=head1 DESCRIPTION

This class provides methods for Parp::Mail objects which test
their message bodies for traces of spam.

=head1 METHODS

=cut

use strict;
use warnings;

use Parp::Config qw(config);
use Parp::Utils qw(diagnose vprint);

=head2 has_spam_content()

Returns true if the mail's body contains spam.

=cut

sub has_spam_content {
  my $m = shift;

  return 1 if $m->has_bad_content_type;
  return 1 if $m->too_many_forwards;

  # Strip blank and quoted lines to get original text.
  my @original = grep ! /^\s*$|^> /, @{$m->{body}};
  $m->{_original_body_text} = \@original;

  my $first_few = $m->_get_body_head(4);
  return 1 if $m->has_spam_intro($first_few);

  my $last_few = $m->_get_body_tail(12);
  return 1 if $m->has_spam_ending($last_few);

  if ($last_few =~ /group.mail/i) {
    $m->reject_junk_mail('body suggested that a group mailer was used');
    return 1;
  }

  my $original = join '', @original;

  if ($original =~
        /this\s+(e?.?mail|message)\s+is\s+(being\s+)?sent\s+in\s+compliance/i) {
    $m->reject_junk_mail('body pretended that spam is legal');
    return 1;
  }

  if ($original =~ config->very_bad_words) {
    (my $phrase = $1) =~ s/\n+/ /g;
    $m->reject_junk_mail('body contained a very bad phrase', "`$1'");
    return 1;
  }

  if ($original =~ /\b ( \$\d{1,3}(,\d{3})+ | \${4}'s ) \b/x) {
    $m->reject_junk_mail('body mentioned dollars', "`$1'");
    return 1;
  }

  if ($original =~
        qr/(Hi!\s+How\s+are\s+you.+I\s+send\s+you\s+this\s+file\s+in\s+order\s+to\s+have\s+your\s+advice)/s) {
    $m->reject_junk_mail('body looked like virus', "`$1'");
    return 1;
  }

  if ($original =~ qr/(
                       # American phone numbers
                       \b (800|900|877|888) ([-\ ])
                       (
                         (?-i: [A-Z]{2,} (\2 [A-Z]+)* )          |
                         \d{3} \2 \d{4} \b
                       )                                         |
                       \b \(\d{3}\) \  \d{3}-\d{4} \b
                      )/) {
    $m->reject_junk_mail('body contained American phone #', "`$1'");
    return 1;
  }

  my $qbw = config->quite_bad_words;
  my @matches = ($original =~ /$qbw/g);
  my %uniques = map { lc $_ => $_ } @matches;
  diagnose "Quite bad words found in body: ",
              scalar(@matches), " (", scalar(keys %uniques), " unique)\n"
    if @matches;

  if (@matches > config->max_quite_bad_words &&
      scalar(keys %uniques) > config->max_unique_quite_bad_words) {
    $m->reject_junk_mail('body contained too many bad words',
                         join ', ', map { "`$_'" } values %uniques);
    return 1;
  }

  return 0;
}

sub has_bad_content_type {
  my ($m) = @_;
  foreach my $type ($m->{content_type}, @{ $m->{content_types} || [] }) {
    if ($type =~ config->bad_content_type) {
      $m->reject_junk_mail('bad Content-Type', $type);
      return 1;
    }
  }
  return 0;
}

sub _get_body_head {
  my ($m, $how_many) = @_;

  my @original = @{ $m->{_original_body_text} };
  
  my ($start, $end) = (0, $how_many);
  $end = $#original if $#original < $how_many;
  return join '', @original[$start .. $end];
}

sub _get_body_tail {
  my ($m, $how_many) = @_;

  my @original = @{ $m->{_original_body_text} };
  
  my ($start, $end) = (-$how_many, -1);
  if (@original < $how_many) {
    ($start, $end) = (0, $#original);
  }
  return join '', @original[$start .. $end];
}

sub too_many_forwards {
  my ($m) = @_;
  
  my $max = config->max_forwards;
  my @matches = ($m->{body_scalar} =~ /^\s*(>\s*){$max,}/mg);
#  diagnose "Lines exceeding max_forwards: ", scalar(@matches), "\n";
  if (@matches > config->max_forwards_lines) {
    $m->reject_junk_mail("forwarded more than "
                         . config->max_forwards
                         . " times");
    return 1;
  }
  return 0;
}

sub has_spam_intro {
  my ($m, $first_few) = @_;

  my $me = config->me;
  if ($first_few =~ /^\s*
                      (Dear\ (
                        friend \b       |
                        .* surfer       |
                        $me
                      ))
                    /imx)
  {
    $m->reject_junk_mail('Suspicious method of address', "`$1'");
    return 1;
  }
  return 0;
}

sub has_spam_ending {
  my ($m, $last_few) = @_;

#   warn "remov: [$last_few] : " . ($last_few =~ /\bremoved?\b/i);
#   warn "2: " . $last_few =~ /respond|notify|reply|send|forward|click|software|
#                     mailto|type|return/ix;
#   warn "3: " . $last_few =~ /subjec?t|process|automatically/i;
  
  if ($last_few =~ /\b(removed?|do\s+not\s+wish|no\s+more\s+(e.?)?mail)\b/i &&
      $last_few =~ /respond|notify|reply|send|forward|click|software|
                    mailto|type|return|please/ix &&
      $last_few =~ /email|cancel|subjec?t|header|process|automatically/i) {
    $m->reject_junk_mail('body confessed it was junk');
    return 1;
  }
  return 0;
}

=head1 SEE ALSO

L<Parp::Mail>

=cut

1;
