#! /usr/bin/perl -w
#
# This software is copyright, 1999, Norman Gray. 
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# 
# Author: Norman Gray, norman@astro.gla.ac.uk.
# Department of Physics and Astronomy, University of Glasgow, UK
#
# See the file LICENCE for a copy of the GPL.
# You can also find an online copy at http://www.gnu.org/copyleft/gpl.html .
#
# $Id: bibhtml,v 1.1.1.1 1999/06/06 16:31:13 norman Exp $
#
# Changes:
#   1.1  : 6 June 1999, cleaned up documentation,
#          modified match of $bibfilename#... , requiring $bibfilename to be
#          present even in the bibliography file itself.  Clarified
#          copyright and licence.
#   1.0b6: March 1997, bugfix
#   1.0b5: March 1997, added $bibconfig, to allow it to be set
#   1.0b4: December 1996, changed default for $bibdata (to bib.bib), and
#          changed the start pattern to '<!--\s*', from '--\s*'.
#   1.0b3: December 1996, added $bibstyle, added --version
#   1.0b2: June 1996, corrected arg 1 of unshift, to include @
#   1.0b1: April 1996, released to the world!
#
# See <http://www.astro.gla.ac.uk/users/norman/distrib/bibhtml.html>
# for documentation.

$version = 'Bibhtml, version 1.1, Norman Gray <norman@astro.gla.ac.uk>, 6 June 1999';


# configuration...
$rootname = 'bibliography';
$bibdata = 'bib';			# default bibdata is ./bib.bib
$bibstyle = 'plainhtml';
$bibconfig = 'bibhtml.config';	# configuration file

$html3 = 0;
$citeall = 0;
$verbose = 1;
$mergebib = 0;

# set to 1 to switch on debugging, set > 1 to exit after option parsing
$debug = 0;

# normalise progname - part after last slash (or colon on Mac)
$0 =~ /([^\/:]*)$/;
$progname = $1;

# Are we on a Mac?
$MacPerl = ($^X =~ /Mac/);


if ($#ARGV < $[) {
    if ($MacPerl) {
	print <<"EOD";

$version

To use: 
    1. Drag one or more .html files on to the MacBibhtml icon, 
       to create an .aux file
    2. Run BibTeX on this .aux file
    3. Drag the newly-generated .bbl file on to the MacBibhtml icon.

EOD
    } else {
	print "Usage: $progname filename ...\n[$version]\n";
    }
    exit 1;
}

# get directory of first arg (this is where we'll search for bibhtml.config)
$ARGV[0] =~ /[^\/:]*$/;
$defdir = $`;
#print "defdir $defdir\n";

if ($MacPerl)
{
    print "We're on a mac: defdir $defdir\n";
    # The Mac finder doesn't have the `current directory' conveniently defined,
    # so set default there explicitly
    chdir ($defdir) || die "can't change dir to $defdir";
}

print "progname $progname, argv=@ARGV\n";

#------

# if -c is present in the argument list, then undefine $bibconfig, 
# and so don't read it automatically at the beginning
for (@ARGV) { if (/^-c/) { undef ($bibconfig); last; }}
if (defined ($bibconfig) && (-r $bibconfig)) {
    readconfig ($bibconfig);
}

while ($_ = $ARGV[0], /^[-+]/)
{
    # print "ARGV[0]=$_\n";
    if (/^([-+])3/ )	{ $html3= ($1 eq '+') }
    if (/^-v/)		{ $verbose = 1 }
    if (/^-q/)		{ $verbose = 0 }
    if (/^-a/)		{ $citeall = 1 }
    if (/^--version/)	{ print STDERR $version, "\n"; exit }
    if (/^--merge/)	{ $mergebib = 1 }

    shift;
    last if ($#ARGV < $[);
    if (/^-r/)		{ $rootname = $ARGV[0]; shift }
    if (/^-b/)		{ $bibdata = $ARGV[0]; shift }
    if (/^-s/)		{ $bibstyle = $ARGV[0]; shift }
    if (/^-c/)		{ my $bibconfig = $ARGV[0];
			  shift;
			  readconfig ($bibconfig); }
    last if ($#ARGV < $[);
}


if ($debug)
{
    # debugging arglist...
    print "rootname $rootname, bibdata $bibdata\n";
    if ($html3) { print "html3\n"; }
    if ($verbose) { print "verbose\n"; }
    if ($citeall) { print "citeall\n"; }
    print "ARGV @ARGV\n";
    exit if ($debug > 1);
}

if ($mergebib) {
    # usage is bibhtml file.bbl file.html - do nothing other than
    # merge a bbl file into an html file
    if ($#ARGV < 1) {
	print "Usage: $progname --merge file.bbl file.html\n";
	exit;
    }

    $phase = +1;
    $bblfilename = $ARGV[0];
    $bibfilename = $ARGV[1];
    # Insist that both files have the correct extension
    # This route will mostly be called by scripts, so this check is
    # both useful and easily satisfied
    ($bblfilename =~ /\.bbl$/) && ($bibfilename =~ /\.html$/)
	|| do {
	    print STDERR "Usage: $progname --merge file.bbl file.html\n\tfile extensions required!\n";
	    exit;
	};
}
else
{
    if ($#ARGV < $[)
    {
	print "Usage: $progname filename ...\n";
	exit;
    }

    # If the command name is bibhtml, then do everything.
    # If it isn't, then use the name of the (first) argument to work out which
    # phase to perform.  If it's a .bbl file, then merge it with the .html
    # file, if not, then create the .aux file ready for BibTeX to be
    # invoked manually.
    if ($progname eq 'bibhtml')
    {
	$phase = 0;
    }
    else
    {
	if ($ARGV[0] =~ /(.*)\.bbl$/)
	{
	    $rootname = $1;
	    $phase = +1;
	}
	else
	{
	    $phase = -1;
	}
    }
}

if ($debug)
{
  print "rootname $rootname\nbibdata $bibdata\nphase $phase, html3 $html3\n";
#exit;
}


@date = localtime(time);
@mname = ("January", "February", "March", "April", "May", "June",
	  "July", "August", "September", "October", "November", "December");

$bblfilename = "$rootname.bbl"  unless (defined ($bblfilename));
$bibfilename = "$rootname.html" unless (defined ($bibfilename));
$auxfilename = "$rootname.aux"  unless (defined ($auxfilename));

print "bibfilename=$bibfilename, bibdata=$bibdata\n" if ($verbose);

if ($phase <= 0)		# -1 or 0
{
    print "Creating $auxfilename...\n" if ($verbose);

    open (OUT, ">$auxfilename") || die "can't open $auxfilename to write\n";
    print OUT "\\relax\n";	# not really necessary, but...

    if ($citeall)
    {
	print "Citing all...\n" if ($verbose);
	print OUT "\\citation{*}\n";
    }
    else
    {
	foreach $fn (@ARGV)
	{
	    print "scanning $fn...\n" if ($verbose);
	    
	    open (IN, "$fn") || die "can't open $fn to read\n";

	    while (<IN>)
	    {
		# Match possibly multiple times on one line
		# Note bibfilename must be present, even if this is the
		# bibliography itself.
		foreach $m (/$bibfilename#([^\"]+)/g) {
		    print OUT "\\citation{$m}\n";
		}
	    }

	    close (IN);
	}
    }

    print OUT "\\bibstyle{$bibstyle}\n";
    print OUT "\\bibdata{$bibdata}\n";

    close (OUT);
}
  

if (! $MacPerl && $phase == 0)
{
    print "Calling BibTeX...\n" if ($verbose);

    system ("bibtex $rootname")==0 || die "can't bibtex $auxfilename\n";
}

if ($phase >= 0 && -r "$bblfilename") 	# $phase +1 or 0
{
    print "Merging $bblfilename into $bibfilename...\n" if ($verbose);

    # now merge the new bbl file with the old bibfilename, between the
    # pattern /<!--\s*bibhtml:start/ and :end


    if ($debug)
    {
        open (ORIG, "$bibfilename")||die "can't open $bibfilename to read";
    }
    else
    {
        rename ($bibfilename, "$bibfilename.old")||die "can't rename $bibfilename";
        open (OUT, ">$bibfilename")    ||die "can't open $bibfilename to write";
        select (OUT);
        open (ORIG, "$bibfilename.old")||die "can't open $bibfilename.old to read";
    }
    open (BBL, "$bblfilename")    ||die "can't open $bblfilename to read";


    $foundslot = 0;		# copy the old to the new up to pattern 

    FINDSLOT: while (<ORIG>)
    {
	print;
	if (/<!--\s*bibhtml:today/)
	{
	    # gobble a line containing the old date
	    <ORIG>;
	    print "$date[3] $mname[$date[4]] 19$date[5]\n";
	}
	elsif (/<!--\s*bibhtml:start/)
	{
	    $foundslot = 1;
    print "Found start\n" if ($debug);
	    last FINDSLOT;
	}
    }

    $foundslot || die "couldn't find the insertion point";

    if ($html3)
    {
	$nbsp='&nbsp;';
	$endash='&endash;';
    }
    else
    {
	$nbsp=' ';
	$endash='-';
    }

    while (<BBL>)
    {
	s/\~/$nbsp/g ;
	s/[\{\}]//g ;
	s/--/$endash/g ;
	# If a line ends with a %, then remove the %\n.  write$ does
	# a decent job of line breaking, but if it can't break a line
	# at space, it puts in a %, which is OK in TeX, but messes
	# up HTML.
	if (/%$/) { chop; chop; };
	print;
    }

    $foundslot = 0;

    # now discard lines of the input file until we find one which 
    # matches bibhtml:end in a comment
    SKIPGAP: while (<ORIG>)
    {
	if (/--\s*bibhtml:end/)
	{
	    print;
	    $foundslot = 1;
      print "Found end\n" if ($debug);
	    last SKIPGAP;
	}
    }

    $foundslot || warn "couldn't find end marker";

    # now copy the remainder of the file to OUT
    while (<ORIG>)
    {
	print;
	if (/--\s*bibhtml:today/)
	{
	    # gobble a line
	    <ORIG>;
	    print "$date[3] $mname[$date[4]] 19$date[5]\n";
	}
    }

    close (ORIG);
    close (BBL);
    close (OUT) if (!$debug);
}


sub readconfig {
    my $bibconfig = shift;
    if (-r $bibconfig)
    {
	print STDERR "reading config $bibconfig\n";
	open (CONF, $bibconfig);
	$_ = <CONF>;
	chop;
	@newargs = split (/ +/);
	close (CONF);
	unshift (@ARGV, @newargs);
	print "readconfig: ARGV[0]=($ARGV[0]), ARGV=@ARGV\n" if ($debug);
    }
    else
    {
	print "no config file $bibconfig\n";
    }
}

