#! /usr/bin/perl
##
#       txt2pdbdoc -- Doc Text to HTML converted for Palm Pilots
#       pdbtxt2html
#
#       Copyright (C) 1998  Paul J. Lucas
#
#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 2 of the License, or
#       (at your option) any later version.
#       
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#       GNU General Public License for more details.
#       
#       You should have received a copy of the GNU General Public License
#       along with this program; if not, write to the Free Software
#       Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##

########## You shouldn't have to change anything below this line. #############

use File::Basename;
use Getopt::Std;

$me = basename( $0 );
$VERSION = '1.3.1';

sub usage {
  die "usage: $me [-t] file.txt [ file.html ]\n       $me -v\n";
}

getopts( 'tv' ) or usage;

###############################################################################
# Input checks
###############################################################################

if ( $opt_v ) {
  print "$me version $VERSION\n";
  exit 0;
}

usage unless $#ARGV + 1 >= 1;
( $txt_file, $html_file ) = @ARGV;

open( INPUT, $txt_file ) or die "$me: can not open $txt_file for input\n";
if ( $html_file ) {
  open( OUTPUT, ">$html_file" ) or
    die "$me: can not open $html_file for output\n";
  select OUTPUT;
}

###############################################################################
# Set up URL syntax
# See RFC 1630: Universal Resource Identifiers in WWW
###############################################################################

# common sub-parts
$url_user_password    = '(?:[a-zA-Z][-\w.]+(?::\w+)?@)?';
$url_host             = '\w[-\w.]*\w';
$url_port             = '(?::\d+)?';
$url_path             = '(?:[-%/\w.~]*[-%/\w~])?';
$url_query            = '(?:\?[-!$%&\'()*+,./\w=?~]+)?';

$url_syntax{ ftp }    = "ftp://$url_user_password$url_host$url_port$url_path";
$url_syntax{ gopher } = "gopher://$url_host$url_port$url_path";
$url_syntax{ http }   = "https?://$url_host$url_port$url_path$url_query";
$url_syntax{ e_mail } = '[a-zA-Z][-\w.]+@[-\w.]+[a-zA-Z]';
$url_syntax{ mailto } = "mailto:$url_syntax{ e_mail }";
$url_syntax{ news   } = 'news:[a-z][-\w.]*\w';
$url_syntax{ telnet } = "telnet://$url_user_password$url_host$url_port";
$url_syntax{ wais }   = "wais://$url_host$url_port$url_path$url_query";

###############################################################################
# Do it!
###############################################################################

# get the bookmark character sequence from the end of the file
seek( INPUT, -10, 2 ) or die "$me: can not seek to end of file $txt_file\n";
while ( <INPUT> ) {
  s/^\s*(.*?)\s*$/$1/;
  next unless /^<([^>]+)>$/;
  $bookmark = quotemeta( $1 );
  last;
}
seek( INPUT, 0, 0 );

$last_level = 1;
while ( <INPUT> ) {
  s/^\s*(.*?)\s*$/$1/;
  last if /^<$bookmark>/;

  s/</\&lt;/g;
  s/>/\&gt;/g;

  $title = $_ unless $title;    # set title to first line

  # make embedded URLs selectable
  s!$url_syntax{ ftp    }!<A HREF="$&">$&</A>!g;
  s!$url_syntax{ gopher }!<A HREF="$&">$&</A>!g;
  s!$url_syntax{ http   }!<A HREF="$&">$&</A>!g;
  s!$url_syntax{ e_mail }!<A HREF="mailto:$&">$&</A>!g;
  s!$url_syntax{ news   }!<A HREF="$&">$&</A>!g;
  s!$url_syntax{ telnet }!<A HREF="$&">$&</A>!g;
  s!$url_syntax{ wais   }!<A HREF="$&">$&</A>!g;

  unless ( /^\s*$bookmark/o ) {
    $html .= "$_<BR>";
    next;
  }

  ##### parse heading and level

  $_ = $';
  ( $level, $heading ) = $_ =~ /^(\s*)(\S.*)$/;
  $initial_spaces = length( $level ) unless defined $initial_spaces;
  $level = length( $level ) - $initial_spaces + 1;
  $level = 1 if $level < 1;

  unless ( $opt_t ) {
    $html .= "<H$level>$heading</H$level>";
    next;
  }

  ##### build a table of contents

  s/[]["#%&+\/:;<=>?@\\^`{|}~]//g;  # make an anchor out of heading
  s/^\s*(.*?)\s*$/$1/;
  tr/A-Z /a-z_/;
  tr/_//s;

  if ( $level > $last_level ) {
    $toc .= "\n<DD><DL COMPACT>";
  } elsif ( $level < $last_level ) {
    $toc .= "\n</DL>" x ( $last_level - $level );
  }
  my $h = $level == 1 ? "<B>$heading</B>" : $heading;
  $toc .= "\n<DT><A HREF=\"#$_\">$h</A>";
  $html .= "<A NAME=\"$_\"></A>\n<H$level>$heading</H$level>";
  $last_level = $level;
}
close( INPUT );

print "<HTML><HEAD><TITLE>$title</TITLE></HEAD><BODY><H1>$title</H1><HR>\n";
if ( $toc ) {
  print "<H2>Contents</H2><DL><DT><DL>$toc\n";
  print '</DL>' x ( $last_level - $level + 1 ), "</DL><P><HR><P>\n";
}

# "beautify" HTML somewhat
$html =~ s!</H(\d)><BR>!</H$1>!g;
$html =~ s!(?:<BR>\s*){2,}!\n<P>\n!g;
$html =~ s!<BR>!<BR>\n!g;

print "$html\n<HR><SMALL>End of document</SMALL></BODY></HTML>\n";

close( OUTPUT );
exit 0;

# vim:set et sw=2 ts=2:
