package URI::Bookmarks::Netscape::Parser;

=head1 NAME

URI::Bookmarks::Netscape::Parser - parses Netscape bookmark files

=head1 SYNOPSIS

Do not use directly.

=head1 DESCRIPTION

Subclass of HTML::Parser.

=head1 BUGS

Decodes some SGML entities when maybe it shouldn't.

=cut

use strict;

use URI::Bookmark::Folder;
use URI::Bookmark::Entry;
use URI::Bookmark::Rule;

use HTML::Entities;

use base qw(HTML::Parser);

sub new {
  my ($this, %p) = @_;
  my $class = ref $this || $this;
  my $self = $class->SUPER::new(
     api_version => 3,
     handlers =>
     {
      declaration => [ 'declaration', 'self,text,line'                      ],
      comment     => [ 'comment',     'self,text,line'                      ],
      start       => [ 'start',       'self,tagname,attr,attrseq,text,line' ],
      end         => [ 'end',         'self,tagname,line'                   ],
      text        => [ 'text',        'self,text,line'                      ],
     },
  );
  bless $self, $class;

  my $collection = $self->{_collection} = $p{collection};
  $self->{_current_folder} = $collection->tree_root();
  $self->{_state} = 'preamble';
  $self->{_depth} = 0;

  $collection->{origin}{type} = 'Netscape bookmarks file';
  $collection->{origin}{source} = $p{file};
  return $self;
}

sub _fail_parse {
  my ($self, $line, $reason) = @_;
  $self->{_parse_fail_error} = "$reason (state: $self->{_state}, line: $line)";
#  warn "parse failure: $reason (state: $self->{_state}, line: $line)\n";
  $self->eof();
}

sub parse_failed { $_[0]->{_parse_fail_error} ? 1 : 0 }

sub declaration {
  my ($self, $doctype, $line) = @_;

  if ($self->{_state} ne 'preamble') {
    $self->_fail_parse($line, 'found HTML declaration outside preamble');
    return;
  }

  $self->{_collection}{origin}{doctype} = $doctype;
}

sub comment {
  my ($self, $comment, $line) = @_;

  if ($self->{_state} ne 'preamble') {
    $self->_fail_parse($line, 'found HTML declaration outside preamble');
    return;
  }

  $self->{_collection}{origin}{preamble} ||= '';
  $self->{_collection}{origin}{preamble} .= $comment;
}

sub start {
  my ($self, $tag, $attrs, $attrseq, $text, $line) = @_;

  my $collection = $self->{_collection};

  if ($tag eq 'meta' && $self->{_state} eq 'preamble') {
    $collection->{origin}{meta} = $attrs;
  }
  elsif ($tag eq 'base' && $self->{_state} eq 'preamble') {
    $collection->{origin}{base} = $attrs;
  }
  elsif ($tag eq 'title' && $self->{_state} eq 'preamble') {
    $self->{_state} = 'title text';
  }
  elsif ($tag eq 'h1' && $self->{_state} eq 'preamble') {
    $self->{_state} = 'h1 text';
  }
  elsif ($tag eq 'h3' && $self->{_state} eq 'folder') {
    $self->_add_folder($attrs);
  }
  elsif ($tag eq 'a' && $self->{_state} eq 'folder') {
    $self->_add_bookmark($attrs);
  }
  elsif ($tag eq 'dl' &&
         $self->{_state} =~ /^(preamble|folder|(root )?node description)$/) {
    $self->_handle_folder();
  }
  elsif ($tag eq 'dd' && $self->{_state} eq 'preamble') {
    $self->{_state} = 'root node description';
  }
  elsif ($tag eq 'dd' && $self->{_state} eq 'folder') {
    $self->{_state} = 'node description';
  }
  elsif ($tag =~ /^(dt|hr)$/ && $self->{_state} eq 'node description') {
    $self->_add_rule() if $tag eq 'hr';
    $self->{_state} = 'folder';
  }
  elsif ($tag eq 'hr' && $self->{_state} eq 'folder') {
    $self->_add_rule();
  }
  elsif ($self->{_state} eq 'node description') {
    my $last_node = $self->{_last_node};
    my $old = $last_node->attribute->{description} || '';
    $last_node->set_attribs({ description => $old . "<$tag>" });
    $self->_debug("# descr `$old<$tag>'");
  }
  elsif ($tag eq 'p' || $tag eq 'dt') {
    # ignore
  }
  else {
    $self->_fail_parse($line, "unrecognised start tag `$tag'");
  }
}

sub end {
  my ($self, $tag, $line) = @_;

  if ($tag eq 'title' && $self->{_state} eq 'title text') {
    $self->{_state} = 'preamble';
  }
  elsif ($tag eq 'h1' && $self->{_state} eq 'h1 text') {
    $self->{_state} = 'preamble';
  }
  elsif ($tag eq 'h3' && $self->{_state} eq 'folder name') {
    $self->{_state} = 'folder';
    my $name = $self->{_last_node}->name();
    $self->_debug("+ added folder `$name'");
  }
  elsif ($tag eq 'a' && $self->{_state} eq 'bookmark name') {
    $self->{_state} = 'folder';
    my $name = $self->{_last_node}->name() || '';
    $self->_debug("+ added bookmark `$name'");
  }
  elsif ($tag eq 'dd' && $self->{_state} eq 'node description') {
    $self->{_state} = 'folder';
  }
  elsif ($tag eq 'dl') {
    $self->{_state} = 'folder';
    delete $self->{_last_node};
    $self->{_current_folder} = $self->{_current_folder}->mother();
    unless ($self->{_current_folder}) {
      $self->{_state} = 'end';
      return;
    }
    my $name = $self->{_current_folder}->name();
    $self->{_depth}--;
    $self->_debug("< back to folder `$name'");
  }
  else {
    $self->_fail_parse($line, "unrecognised end tag $tag");
  }
}

sub text {
  my ($self, $text, $line) = @_;

  my $collection = $self->{_collection};
  my $last_node = $self->{_last_node};

  if ($self->{_state} eq 'title text') {
    my $old = $collection->title() || '';
    $collection->title($old . $text);
    $self->_debug("* title text `$old$text'");
  }
  elsif ($self->{_state} eq 'h1 text') {
    my $old = $collection->tree_root->name() || '';
    $collection->tree_root->name($old . $text);
    die unless $collection->tree_root->name() eq $old.$text;
    $self->_debug("* h1 text `$old$text'");
  }
  elsif ($self->{_state} =~ /^(folder|bookmark) name$/) {
    my $old = $last_node->name() || '';
    $last_node->name($old . $text);
  }
  elsif ($self->{_state} eq 'node description') {
    my $old = $last_node->attribute->{description} || '';
    $last_node->set_attribs({ description => $old . $text });
    $self->_debug("# descr `$old$text'");
  }
  elsif ($self->{_state} eq 'root node description') {
    my $root = $collection->tree_root();
    my $old = $root->attribute->{description} || '';
    $root->set_attribs({ description => $old . $text });
    $self->_debug("# descr `$old$text'");
  }
  else {
    $self->_fail_parse($line, "text found in illegal context: `$text'")
      if $text =~ /\S/;
  }
}

sub _add_folder {
  my ($self, $attrs) = @_;

  # hacks (bug in HTML::Parser?)
  for my $hack (qw/folded newitemheader/) {
    $attrs->{$hack} = undef
      if ($attrs->{$hack} || '') eq uc $hack;
  }

  my $folder = URI::Bookmark::Folder->new($attrs);

  $self->{_current_folder}->add_node($folder);
  $self->_debug(sprintf "to `%s' ...", $self->{_current_folder}->name());
  $self->{_last_node} = $folder;
  $self->{_state} = 'folder name';
}

sub _add_bookmark {
  my ($self, $attrs) = @_;

  # Bah, no way of stopping HTML::Parser from turning
  # href="foo&amp;bar" into href="foo&bar", but plenty of
  # hrefs just have & in, so can't just re-encode the entities.
#   foreach my $key ('href') { # there may be trouble ahead ...
#     next unless exists $attrs->{$key};
#     $attrs->{$key} = encode_entities($attrs->{$key});
#   }

  my $bookmark = URI::Bookmark::Entry->new($attrs);
  $self->{_current_folder}->add_node($bookmark);
  $self->{_last_node} = $bookmark;
  $self->{_state} = 'bookmark name';
}

sub _add_rule {
  my ($self) = @_;
  
  my $rule = URI::Bookmark::Rule->new();
  $self->{_current_folder}->add_node($rule);
  $self->{_last_node} = $rule;
  $self->_debug('- added hrule ------------------------------');
}

sub _handle_folder {
  my ($self) = @_;

  if ($self->{_state} =~ /^(preamble|root node description)$/) {
    $self->{_state} = 'folder';
    return;
  }
  
  $self->_enter_folder($self->{_last_node});
}

sub _enter_folder {
  my ($self, $folder) = @_;

  delete $self->{_last_node};
  $self->{_state} = 'folder';
  $self->{_current_folder} = $folder;
  my $name = $folder->name();
  $self->_debug("> entered folder `$name'");
  $self->{_depth}++;
}

sub failure_reason {
  return $_[0]->{_parse_fail_error} || '';
}

sub _debug ($$) {
  my ($self, $text) = @_;
  $self->{_debugging} .= ('  ' x $self->{_depth}) . $text . "\n";
#  warn '  ' x $self->{_depth}, $text, "\n";
}

sub debugging { $_[0]->{_debugging} }


1;
