#!/usr/local/bin/perl5 -w
# <title> AG2HTML.pl </title>
# <h1> AG2HTML.pl Amiga Guide to HTML Converter</h1>
# <listing>
# <b>This code is (c) 1993,1994,1995 to Michael Witbrock</b>
# <b>This code is  1993,1994,1995 to Michael Witbrock</b>
# <b>You may use it and modify and redistribute it freely,</b>
# <b>but you may not sell it in any way (including in disk collections)</b>
# <b>without first recieving my permission.</b>
# <b>Fred Fish, and the makers of the Aminet CD are granted that permission.
# <b>If you significantly improve it, please let me know so that I can</b>
# <b>use the new version.</b>
# <b>You may contact me as witbrock@cs.cmu.edu</b>

$VERSION = "2.951201";
($prog=$0) =~ s@.*\/([^/]+)@$1@;
print "$prog: version $VERSION\n";
#http://www.cs.cmu.edu/~mjw/Amiga/Perl/AG2HTML.pl
#is a URL to the latest version of this program.

# P.S. I know that this is redundant. It's my second perl script, and I don't
#     yet know how to do subroutines. When I do, it will be both neater and shorter.
# P.P.S the reason it is preformatted is because many AG documents have button
#     layouts which depend on this. Perhaps later I will make it smarter, so that
#     it can recognise obvious text paragraphs and do the right thing.
# DONE: P.P.P.S handling some of the style flags, and the next, menu, prev, help, etc buttons
#      is next, along with removing what can't be done (background colours etc).


#Changes: October 16 1993 MJW
#         Allow email addresses.
#         Translate & to &amp; > to &gt; < to &lt; 
#Changes: November the 9th 1993 MJW
#         Allow some strange links that have strings after the link name
#         found in Viewport
#Changes: March 12 1994 MJW
#         Allow Link whereever I allow link -after Steve Gowdy's suggestion-
#Changes: April 22 1994 MJW
#         Major rewrite?
#         for Daniel Barrett, @{"mybutton" system "more myfile"} -> <A HREF="myfile">mybutton</A>
#          THE next version will handle pictures in AmigaReport files automatically, but that may be a few days off.
#          Picture conversions done
#          Automatic inlining of pictures done for ones like the following 

#UNHANDLED?: @{"Lynx" system "display.s 10,100,AR215_pic1.iff,Lynx"} is a user-friendly hypertext interface on UNIX and VMS 
#UNHANDLED?: The @{"Commodore Amiga Information Resource" system "display.s 10,100,AR215_pic2.iff,CAIR"} is a collection 
#UNHANDLED?: - @{"Amiga Report" system "display.s 10,100,AR215_pic3.iff,AR on the WWW"}

#Changes May 28th 1994
# Amiga report changed to display like this:
# @{" AR Logo " system "display.s 650 100 AR217_pic2.iff Amiga_Report"}
# Change it to handle that too.
# June 9th ---- Changed pointer to itself
# June 18th ---- version numbers, so I can tell people when it changes
# June 28th ---- try to tolower links so they work for gowdy
# Nov 23 1994 Fixed to work under perl 5, but bug handling multiple links
# on one line persists! I wish they wouldn't do that.
# (fixed for Waldemar Zurowski)
#
# Nov 26 1994 Incorporated Bilbo (Waldemar)'s changes to handle multiple links
# in a line properly. Thanks Waldemar!.
#In real       : Waldemar Zurowski
#email internet: WALDEK@PLEARN.EDU.PL or bilbo@ci.pwr.wroc.pl
#<A HREF="http://sun10.ci.pwr.wroc.pl/~bilbo/">Bilbo</A>
#
# Nov 26 1994 b. Fixed strange links with commas
# UNHANDLED?:                           @{"  North America  ", link P6-3-3}
# Nov 26 1994 c. Tidied up IFF handling
# Dec 01 1994    Hangle .guide .Guide etc (bug rept from Gael Marziou)
# Jan 23 1995 Waldemar Zurowski: Fix another capitalisation bug
# Feb 14 1995 Michael Witbrock : Fix yet another capitalisation bug!
# Nov 19 1995 Michael Witbrock : Correct HTML document structure
# Dec 01 1995 Michael Witbrock : Active URLs

sub handlenode{
    while (<GUIDEF>){
        chop;
        if (m/\@[eE][nN][dD][Nn][oO][dD][Ee]/){
	    # print "Found \@endnode\n";            # found @endnode
	    print PAGEF "</pre>\n"; # 
	    if ($buttonline ne "<HR>") { print PAGEF "$buttonline"; }
	    print PAGEF "<HR><font size=-2>HTML Conversion by <a href=\"http://www.cs.cmu.edu/~mjw/Amiga/Perl/AG2HTML.pl\"><i>AG2HTML.pl</i></a> V${VERSION}, perl $] &amp; <a href=\"http://www.cs.cmu.edu/~mjw/\"><i>witbrock\@cs.cmu.edu</i></a></font>\n</BODY></HTML>";
	    close (PAGEF);	# 
	    last;		# 
        } else {
	    # Remember TOC link, and delete the line if found
	    if (m/\@[tT][oO][cC]\s*"?([^\"\s\}]*)"?/){
		($tmp=$1) =~ y/A-Z/a-z/;
		$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Contents]<\/a> ";
		next;		# 
	    }			# 
	    # Remember prev link, and delete the line if found
	    if (m/\@[pP][rR][eE][vV]\s*"?([^\"\s\}]*)"?/){
		($tmp =$1) =~ y/A-Z/a-z/;
		$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Browse <-]<\/a> ";
		next;
	    }
	    # Remember next link, and delete the line if found
	    if (m/\@[nN][eE][xX][tT]\s*"?([^\"\s\}]*)"?/){
		($tmp =$1) =~ y/A-Z/a-z/;
		$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Browse ->]<\/a> ";
		next;
	    }
	    # Remember help link, and delete the line if found
	    if (m/\@[hH][eE][lL][pP]\s*"?([^\"\s\}]*)"?/){
		($tmp =$1) =~ y/A-Z/a-z/;
		$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Help]<\/a> ";
		next;
	    }
	    # otherwise look for more calls, links, or plain text
#	    study;
	    s/\&/\&amp;/g;
	    s/>/\&gt;/g;
	    s/</\&lt;/g;
	    # Handle pictures (as inline maybe change eventually)
	    s/\@\{\s*\"([^\"]*)\"\s+[sS][yY][Ss][tT][eE][mM]\s+\"displa\S* [0-9,\s]*(\S*)\.iff[,\s]*([^\"\}]*)\"*\s*\}/<p>\<img src=\"$2\.gif\"\><br>$3<p>$1/g;

	    # for Daniel Barrett, \@{"mybutton" system "more myfile"} -> <A HREF="myfile">mybutton</A>
	    s/\@\{\s*\"([^\"]*)\"\s+[sS][yY][Ss][tT][eE][mM]\s+\"more *([^\"\s\}]*)\"*\s*\}/\<a href=\"$2\"\>$1<\/a>/;
	    #link - FROM BILBO: Here are my changes - I replaced 3 ifS with 
	    #       whileS and remove 'g' option after s/PATTERN/PATTERN/
	    while (m/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\}/){
		($link = $2) =~ y/A-Z/a-z/; # 
		s/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\}/\<a href=\"$link\.HTML\"\>$1<\/a>/; #HERE WAS 'G' OPTION
	    }
	    # found in viewport -- link with string after don't know what means
   	    while (m/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\"[^\"]*\"\s*\}/) {
		($link = $2) =~ y/A-Z/a-z/; # 
		s/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\"[^\"]*\"\s*\}/\<a href=\"$link\.HTML\"\>$1<\/a>/; #HERE WAS 'G' OPTION
            }
	    # found in kingcon -- link with number after don't know what means
            while (m/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*[0123456789]*\s*\}/) {
		($link = $2) =~ y/A-Z/a-z/; # 
		s/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*[0123456789]*\s*\}/\<a href=\"$link\.HTML\"\>$1<\/a>/;#HERE WAS 'G' OPTION
            }
	    s/\@\{[bB]\}/<B>/g;  s/\@\{[uU][bB]\}/<\/B>/g; # bold
	    s/\@\{[iI]\}/<I>/g;  s/\@\{[uU][iI]\}/<\/I>/g; #italic
    # make URLs active
            s@(http:\/\/\S+)(\s+)@<a href="$1">$1</a>$2@g;
            s@(ftp:\/\/\S+)(\s+)@<a href="$1">$1</a>$2@g;    
            s@(news:\/\/\S+)(\s+)@<a href="$1">$1</a>$2@g;     
            s@(gopher:\/\/\S+)(\s+)@<a href="$1">$1</a>$2@g;     
            s@(http:\/\/\S+)$@<a href="$1">$1</a>@g;
            s@(ftp:\/\/\S+)$@<a href="$1">$1</a>@g;    
            s@(news:\/\/\S+)$@<a href="$1">$1</a>@g;     
            s@(gopher:\/\/\S+)$@<a href="$1">$1</a>@g;     
    # make mail actice
            s@(\S+\@\S+\.\S+)$@<a href="mailto:$1">$1</a>@g; 
            s@(\S+\@\S+\.\S+)(\s+)@<a href="mailto:$1">$1</a>$2@g; 

	    if (m/\@\{/) {		# Recognise and hide unhandled cases 
		print "UNHANDLED?: $_\n";
		s/(\@\{[^\}]*\})/<!- Unhandled AmigaGuide(TM) sequence "$1">/g;
	    }			       
	    print PAGEF  "$_\n";
        }
    }
}

die "Usage: %0 <AmigaGuideFile> $!\n N.B. This program puts AG node HTML files in a dir.\n"
    unless ( $#ARGV == 0 );
$agname = $ARGV[0]; $_ = $agname;
if (/(.*)\.[Gg][Uu][iI][Dd][eE]/){
  $root = $1;
} else {
  die "Error: $agname doesn't seem to be an AmigaGuide(TM) file.\n";
}

$dirname = $root."_Sections";
mkdir($dirname,oct(777)) unless (-e $dirname);
die "Couldn't make $dirname \n" unless -e $dirname;
open (GUIDEF, $agname) || die "Can't open $agname: $!\n";
     
$databasefound=0;
# Move suitably labelled pictures to go with the file
@iffs = <${root}_*.iff>;
if (defined(@iffs)){
    $ciff = $#iffs+1;
    print "# Moving $ciff iff files: @iffs \n";
    system ("mv ${root}_\*\.iff $dirname");
}
# Convert them to something mosaic can handle
opendir(SECT,$dirname) || die "Can't read dir $dirname\n";
while ($_ = readdir(SECT)){
    next unless m#(.*)\.iff#;
    print "Converting picture $1 to gif \n" unless -e  "${dirname}/$1.gif";
    print "Founded gif version of picture $1 \n" if -e  "${dirname}/$1.gif";
    system("ilbmtoppm < ${dirname}/$1.iff | ppmtogif > ${dirname}/$1.gif")
	unless -e  "${dirname}/$1.gif";

}
closedir(SECT);
    
while (<GUIDEF>) {
    chop;
    if ((m/\@[dD][Aa][Tt][Aa][bB][aA][sS][eE]\s*"(.*)"/)
	|| (m/\@[dD][Aa][Tt][Aa][bB][aA][sS][eE]\s*(\S*)/)){
	if ($databasefound != 0) {
	    print "IGNORED: database label $1 found after first one $database\n";
	    next;
	}
	$databasefound = 1;
	$database=$1;
	print "Database: $database\n";
	while (<GUIDEF>){
	    chop;
	    $nodetitle="Untitled";
	    $buttonline = "<HR>";
	    if ( (m/\@[Nn][oO][dD][Ee]\s*"(\S*)"\s*"(.*)"/)
		|| (m/\@[Nn][oO][dD][Ee]\s*(\S*)\s*"(.*)"/)
		|| (m/\@[Nn][oO][dD][Ee]\s*(\S*)/)
		){  
		# found \@Node LABEL "title"
		# or just \@node Label or even \@node "label" "title"
		$nodename = $1;
                $nodename =~ y/A-Z/a-z/;
		$nodetitle = $2;
		$htmlname = $dirname."/".$nodename.".HTML";
		if (-e $htmlname) {
		    unlink($htmlname);
		}
		open (PAGEF,'>'.$htmlname)
		    || die "Can't open $htmlname to write $!\n";
		print PAGEF "<HTML>\n<HEAD><TITLE>$nodetitle</TITLE></HEAD>\n";
		print PAGEF "<BODY BGCOLOR=\"#FFEFE0\"><H1>$nodetitle</H1>\n<pre>\n";
		last;
	    } else {
		if (m/^.*\S.*$/) {
		    print "# SKIPPED while looking for \@node: $_ \n";
		}
		next;
	    }
	}   # Found first @node line
	print "NOTE: main node is $htmlname\n";
	# Now look for end of first node
	&handlenode;
	# found end of first node, or of the file     
    } else { # No database label on this line 
	if ($databasefound == 0){  # stuff before first @database is ignored
	    print "#SKIPPED while looking for database: $_\n";
	    next;
	}
	# otherwise, it may be a perfectly good line
	# check whether it is the start of a new node
	$nodetitle="Untitled";
	$buttonline = "<HR>";
	if ( (m/\@[Nn][oO][dD][Ee]\s*\"(\S*)\"\s*\"(.*)\"/)
	    || (m/\@[Nn][oO][dD][Ee]\s*(\S*)\s*\"(.*)\"/)
	    || (m/\@[Nn][oO][dD][Ee]\s*(\S*)/)
	    ){  # found @Node LABEL "title", @node Label, or even @node "label" "title"
	    $nodename = $1;
	    $nodetitle = "No Known Title";
	    $nodetitle = $2 if defined $2;
	    $nodename =~ y/A-Z/a-z/;
	    $htmlname = $dirname."/".$nodename.".HTML";
	    unlink($htmlname) if -e $htmlname;
	    open (PAGEF,'>'.$htmlname)
		|| die "Can't open $htmlname to write $!\n";
            print PAGEF "<HTML>\n<HEAD><TITLE>$nodetitle</TITLE></HEAD>\n";
	    print PAGEF "<BODY BGCOLOR=\"#FFEFE0\"><H1>$nodetitle</H1>\n<pre>\n";
	    # print "Found Node: $nodename $nodetitle as $htmlname \n";
	    # since we found the beginning of the node, copy to the end
	    &handlenode;
	} else {   # Haven't yet found a @node
	    if (m/^.*\S.*$/) {
              print "# SKIPPED while looking for \@node: $_ \n";
           }
	    next;
	}
    }
}
# 
# 

close GUIDEF;

# </listing>
