#!/usr/bin/perl -wT
#
# searchbruno.pl -- Search for Bruno strips

# Inits

use strict;
use CGI;
use CGI::Pretty; # Remove this after debugging for faster downloading
use CGI::Carp qw / fatalsToBrowser warningsToBrowser /;

# Add more as we think of them...
my @people = qw/
  Allison
  Anise
  Bruno
  Dan
  Dana
  Dierdre
  Dije
  Doris
  Elian
  Henry
  Jenn
  Jeremy
  Judi
  Jules
  Lenny
  Louis
  Samantha
  Sara
  Sean
  Sophia
  Stanley
  Susan
/;

# <style type="text/css">
#   <!--
#   * {font-family: Arial, Heletica, Univers, sans-serif}
#   -->
# </style>

my $title        = 'das brunosürtschen';
my $style_sheet  = "* {font-family: Arial, Heletica, Univers, sans-serif}";
my $mailto_rph   = 'mailto:rph@liou.de';
my $mailto_knute = 'mailto:knute@trinityproject.org';

# Main

my $q = new CGI;
my $count         = 1;
my $ttl_hits      = 0;
my $start_at      = 1;
my $hits_per_page = 10;
my $narrow_search = 0;

if ($q->param  and  not $q->param('Back') ) {
  $start_at      = $q->param('start_at');
  $hits_per_page = $q->param('hits_per_page');
  $narrow_search = $q->param('narrow_search');
  print $q->header('text/html' );
  warningsToBrowser(1);
  print $q->start_html( -title    => $title,
                        -style    => $style_sheet,
                        -meta     => {
                                       'authors'     => 'Roland P. Hofmann, Knute Snortum',
                                       'description' => 'A Texual Search and Lookup for the Bruno Webcomic',
                                       'keywords'    => 'Bruno, www.brunostrip.com, Chris Baldwin, searchbruno, search, web comic'
                                     },
                        -bgcolor  => 'white',
                        -fgcolor  => 'black'
                      ),
        $q->h1('Search Results');
  print_strips($q);
  print $q->end_html;
}
else {
  show_form()
}

# Subs

sub show_form {

  $q->param('start_at', 1);
  print $q->header( 'text/html' );
  warningsToBrowser(1);
  print $q->start_html( -title    => $title,
                        -style    => $style_sheet,
                        -meta     => {
                                       'authors'     => 'Roland P. Hofmann, Knute Snortum',
                                       'description' => 'A Texual Search and Lookup for the Bruno Webcomic',
                                       'keywords'    => 'Bruno, www.brunostrip.com, Chris Baldwin, searchbruno, search, web comic'
                                     },
                        -bgcolor  => "white",
                        -fgcolor  => "black"),
        $q->h1( 'The Fabulous Bruno Strip Search' ),
        $q->p( "Don't misplace your modifiers or you'll be in trouble." );

  print <<"  END";
  <img src="http://www.brunostrip.com/brunoaway/brunoaway.gif" width=674 ismap usemap="#map2" border=0 alt="Today's Bruno strip" vspace=20 >
  <map name="map2">
    <area coords="470,0,674,12"  href="http://www.brunostrip.com/bruno.html" alt="Bruno's home">
    <area coords="550,12,674,30" href="http://www.moodycow.com"              alt="The Moody Cow store">
  </map>
  END

  print $q->p( "Hey, just so's yer aware, the data base is not complete yet.  We got most of " .
               "2002 and 2001 and some of 2000.  If you have a little time and a text editor ".
               "at hand, we hope you could transcribe a month or two.  Please, ".
               $q->a( {-href => $mailto_rph}, "write to rph!" ) ),
        $q->p( "Or if programming's you're thing or you just want to look at the CGI script, " .
               "you can download it <a href=../searchbruno.pl>here</a>.  You can send " .
               $q->a( {href => $mailto_knute}, "Knute" ) .
               " a diff or the modified script."),
        $q->start_form( 'post', $0, &CGI::URL_ENCODED ),
        $q->table(
          $q->Tr(
            $q->td( {-align => 'right'}, 'Text' ),
            $q->td( $q->textfield( -name => 'or_text', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Person' ),
            $q->td(
              $q->Select(
                { -name => 'or_person', -size => 1 },
                  $q->option( [ '', @people ] )
              )
            )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Other Person' ),
            $q->td( $q->textfield( -name => 'or_other', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Activity' ),
            $q->td( $q->textfield( -name => 'or_activity', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Comment' ),
            $q->td( $q->textfield( -name => 'or_comment', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Margin' ),
            $q->td( $q->textfield( -name => 'or_margin', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Location' ),
            $q->td( $q->textfield( -name => 'or_location', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Everything' ),
            $q->td( $q->textfield( -name => 'or_all', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Hits per page' ),
            $q->td( $q->textfield( -name => 'hits_per_page', -default=>10, -size =>'5') )
          ),
          $q->Tr(
            $q->td(''),
            $q->td(
              $q->radio_group( -name    => 'show_what',
                               -values  => ['Images', 'URLs', 'Both'],
                               -default => 'Images'
              )
            )
          )
        ), # End OR table

       # Submit buttons
       $q->table(
          $q->Tr(
            $q->td( $q->submit ),
            $q->td( $q->reset )
          )
       ),

       # Narrow search tables
       $q->hr,
       $q->center(
         $q->h3( 'Narrow search by requiring other matches (logical AND)' ),
         'The selection must match at least one of the below'
       ),
       $q->table(
          $q->Tr(
            $q->td( {-align => 'right'}, 'Text' ),
            $q->td( $q->textfield( -name => 'and_text', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Person' ),
            $q->td(
              $q->Select(
                { -name => 'and_person', -size => 1 },
                  $q->option( [ '', @people ] )
              )
            )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Other Person' ),
            $q->td( $q->textfield( -name => 'and_other', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Activity' ),
            $q->td( $q->textfield( -name => 'and_activity', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Comment' ),
            $q->td( $q->textfield( -name => 'and_comment', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Margin' ),
            $q->td( $q->textfield( -name => 'and_margin', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Location' ),
            $q->td( $q->textfield( -name => 'and_location', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Everything' ),
            $q->td( $q->textfield( -name => 'and_all', -size =>'20') )
          )
       ), # End AND table

       $q->hr,
       $q->center(
         $q->h3( 'Narrow search by excluding other matches (logical NOT)' ),
         'The selection may not match any of the below'
       ),
       $q->table(
          $q->Tr(
            $q->td( {-align => 'right'}, 'Text' ),
            $q->td( $q->textfield( -name => 'not_text', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Person' ),
            $q->td(
              $q->Select(
                { -name => 'not_person', -size => 1 },
                  $q->option( [ '', @people ] )
              )
            )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Other Person' ),
            $q->td( $q->textfield( -name => 'not_other', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Activity' ),
            $q->td( $q->textfield( -name => 'not_activity', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Comment' ),
            $q->td( $q->textfield( -name => 'not_comment', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Margin' ),
            $q->td( $q->textfield( -name => 'not_margin', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Location' ),
            $q->td( $q->textfield( -name => 'not_location', -size =>'20') )
          ),
          $q->Tr(
            $q->td( {-align => 'right'}, 'Everything' ),
            $q->td( $q->textfield( -name => 'not_all', -size =>'20') )
          )
       ), # End NOT table

       # Finish form
       $q->hidden( 'start_at', $q->param('start_at') ),
       $q->end_form,
       $q->end_html;

}

sub print_strips {
  my $q = shift;
  my @error = ();
  
  print "<!-- Looking for text file at /home/sn/snortum.net/html/bruno/bruno*.txt -->\n"; ###debug###

  FILE_LOOP: foreach my $file (glob "/home/sn/snortum.net/html/bruno/bruno*.txt") { 
    print "<!-- Searching file $file -->\n"; ###debug###
    open FH, $file  or die "Can't open $file, $!\n";

    my %rec = ();
    my $line;
    my $pic_url = '';
    my $last_match = '';

    while( defined($line = <FH>) ) {
      next if $line =~ /^\s*$/;

      # A URL signals the begining of a new record, so look for previous matches
      if ($line =~ /^http:/) {
        if ($pic_url) {
          last FILE_LOOP if find_match($q, $pic_url, %rec);
        }
        chomp($pic_url = $line);
        %rec = ();
      }
      elsif ($line =~ /^(\d{4}-\d{2}-\d{2})/) {
        $last_match = 'date';
        $rec{$last_match} = $1;
      }
      elsif ($line =~ /^Location:\s+(.*)\s*$/i) {
        $last_match = 'location';
        push @{ $rec{$last_match} }, $1;
      }
      elsif ($line =~ /^Margin:\s+(.*)\s*$/i) {
        $last_match = 'margin';
        push @{ $rec{$last_match} }, $1;
      }
      elsif ($line =~ /^Activity:\s+(.*)\s*$/i) {
        $last_match = 'activity';
        push @{ $rec{$last_match} }, $1;
      }
      elsif ($line =~ /^Comments?:\s+(.*)\s*$/i) {
        $last_match = 'comment';
        push @{ $rec{$last_match} }, $1;
      }
      elsif ($line =~ /^Persons:\s+(.*)\s*$/i) {
        $rec{person} = [ split /,\s*/, $1 ];
      }

      # Every other "keyed" line is assumed to be dialog
      elsif ($line =~ /^(\w[^:]*):\s+(\S.*)\s*$/) {
        my $person = quotemeta $1;
        push @{ $rec{person} }, $1 unless grep /^${person}$/, @{ $rec{person} };
        $last_match = 'text';
        push @{ $rec{$last_match} }, $2;
      }

      # Assume lines that start with whitespace belong to the last match
      elsif ($line =~ /^\s+(\S.*)\s*$/) {
        push @{ $rec{$last_match} }, $1;
      }

      # We don't know what it is
      else {
        push @error, "$file: $line"; # debug
      }

    } # end while

    # Find the last one
    find_match($q, $pic_url, %rec) if $pic_url;

    close FH;
  }

  print $q->p( "$start_at through $ttl_hits" ) if $ttl_hits;
  print $q->start_form( 'post', $0, &CGI::URL_ENCODED );

  if ($ttl_hits - $start_at + 1 < $hits_per_page) {
    print $q->p( 'End of search' ),
          $q->submit( -name => 'Back', -default => '1' );
  }
  else {
    print $q->table(
            $q->Tr(
              $q->td( $q->submit( "Next $hits_per_page" ) ),
              $q->td( $q->submit( -name => 'Back', -default => '1' ) ),
            ),
          );

    $q->param(-name => 'start_at', -value => $start_at + $hits_per_page);

    foreach ($q->param) {
      print $q->hidden( $_, $q->param($_) ) if $q->param($_);
    }
  }

  print $q->end_form;

  # Print lines that didn't match (error in data or program, debug)
  if (@error) {
    print $q->hr,
          $q->center( $q->h3( "Debugging Info" ) ),
          $q->p( "The following data lines were not matched:" ),
          $q->pre( @error );
  }

  return;
}

sub find_match {
  my ($q, $pic_url, %rec) = @_;
  my @urls = ();

  # Search for OR matches
  OR_LOOP: foreach my $name (grep /^or_/, $q->param) {
    my $value = quotemeta $q->param($name);
    next unless $value;
    $name =~ s/^or_//;
    if ($name eq 'all') {
      foreach my $key (keys %rec) {
        if ( $key ne 'date'     and
             exists $rec{$key}  and
             grep /${value}/i, @{ $rec{$key} }
           )
        {
          add_url($q, \@urls, $pic_url, %rec);
          last OR_LOOP;
        }
      }
    }
    elsif ( ( $name eq 'person'  or  $name eq 'other')  and
              exists $rec{person}                       and
              grep /^${value}$/i, @{ $rec{person} } )
    {
      add_url($q, \@urls, $pic_url, %rec);
      last OR_LOOP;
    }
    elsif ( exists $rec{$name}  and
            grep /${value}/i, @{ $rec{$name} } )
    {
      add_url($q, \@urls, $pic_url, %rec);
      last OR_LOOP;
    }

  }

  # Show no more matches than $hits_per_page
  if (@urls) {
    $ttl_hits += @urls;
    ## print $q->p( "Total hits so far: $ttl_hits, This pass: " . scalar(@urls) ); # debug
    ## print $q->p( "Count: $count, Start at: $start_at, Hits/page: $hits_per_page\n" ); # debug
    foreach my $url (@urls) {
      last if $count - $start_at >= $hits_per_page;
      next if $count++ < $start_at;
      print $q->hr() if $q->param('show_what') eq 'Both';
      if ( $q->param('show_what') eq 'URLs'  or  $q->param('show_what') eq 'Both' ) {
        print $q->p( $q->a( {-href => $url}, $url ) );
      }
      if ( $q->param('show_what') eq 'Images'  or  $q->param('show_what') eq 'Both' ) {
        print $q->p( $q->img( {-src => $url} ) );
      }
    }
  }

  return $count - $start_at >= $hits_per_page;
}

# Add URL unless it matches narrowing conditions
sub add_url {
  my ($q, $r_urls, $pic_url, %rec) = @_;

  return if grep /^\Q$pic_url\E$/, @$r_urls; # Don't add it twice

  # If it matches a NOT value, we're done
  foreach my $name (grep /^not_/, $q->param) {
    my $value = quotemeta $q->param($name);
    next unless $value;
    $name =~ s/^not_//;

    if ($name eq 'all') {
      foreach my $key (keys %rec) {
        if ( $key ne 'date'     and
             exists $rec{$key}  and
             grep /${value}/i, @{ $rec{$key} }
           )
        {
          return;
        }
      }
    }
    elsif ( ( $name eq 'person'  or  $name eq 'other')  and
              exists $rec{person}                       and
              grep /^${value}$/i, @{ $rec{person} } )
    {
      return;
    }
    elsif ( exists $rec{$name}  and
            grep /${value}/i, @{ $rec{$name} } )
    {
      return;
    }
  }

  # If there are no AND params, it matches
  my @and_params = grep /^and_/, $q->param;
  my $found_and  = 0;

  foreach (map $q->param($_), @and_params) {
    if ($_) {
      $found_and = 1;
      last;
    }
  }

  unless ($found_and) {
     push @$r_urls, $pic_url;
     return;
  }

  # Otherwise, it must match one AND param
  AND_LOOP: foreach my $name (@and_params) {
    my $value = quotemeta $q->param($name);
    next unless $value;
    $name =~ s/^and_//;

    if ($name eq 'all') {
      foreach my $key (keys %rec) {
        if ( $key ne 'date'     and
             exists $rec{$key}  and
             grep /${value}/i, @{ $rec{$key} }
           )
        {
          push @$r_urls, $pic_url;
          last AND_LOOP;
        }
      }
    }
    elsif ( ( $name eq 'person'  or  $name eq 'other')  and
              exists $rec{person}                       and
              grep /^${value}$/i, @{ $rec{person} } )
    {
      push @$r_urls, $pic_url;
      last AND_LOOP;
    }
    elsif ( exists $rec{$name}  and
            grep /${value}/i, @{ $rec{$name} } )
    {
      push @$r_urls, $pic_url;
      last AND_LOOP;
    }
  }

  return;
}
