#!/usr/local/bin/perl

# artex  -  bundles a LaTeX2e document with non-standard external files.
#
# Copyright (C) 1995 Nick Efford.
#
# This script analyzes a LaTeX2e document to determine whether it loads
# any non-standard files. Any such files are bundled with the document by
# means of filecontents or filecontents* environments. The resulting
# document should be portable to any system with a standard installation
# of LaTeX.
#
# Tested with:
#
#     Perl 4.036 and 5.000 under Irix 5.2
#     Perl 4.036 (bigperl) under MS-DOS 6.2
#
# Direct bug reports, comments and suggestions to:
#
#     nde@dcre.leeds.ac.uk

($PROG = $0) =~ s!.*/!!;
$VER = '1.00b';
$DATE = '18/2/95';

# Configuration - uncomment/edit to suit your OS.

# UNIX:
$copy_cmd = 'cp';
$copy_out = "";
$to_null = '1> /dev/null 2>&1';
($TEXINPUTS = $ENV{'TEXINPUTS'})
    || die "$PROG: TEXINPUTS variable not defined\n";
@texinputs = split(/:/, $TEXINPUTS);
$BIBINPUTS = $ENV{'BIBINPUTS'} || '.';
@bibinputs = split(/:/, $BIBINPUTS);
$config_file = "~/.$PROG" . "rc";

# DOS (MS-DOS 6.2 and emTeX):
#$copy_cmd = 'copy/y';
#$copy_out = '> nul';
#$to_null = '> nul';
#($TEXINPUTS = $ENV{'TEXINPUT'})
#    || die "$PROG: TEXINPUT variable not defined\n";
#@texinputs = split(/;/, $TEXINPUTS);
#$BIBINPUTS = $ENV{'BIBINPUT'} || '.';
#@bibinputs = split(/;/, $BIBINPUTS);
#$config_file = $PROG . '.ini';

$latex_cmd = 'latex';
$bibtex_cmd = 'bibtex';

# The assignments below specify standard bibliography styles
# (styles which will never be bundled with the document).

$stdbib{'plain'} = 1;
$stdbib{'unsrt'} = 1;
$stdbib{'abbrv'} = 1;
$stdbib{'alpha'} = 1;

# The assignments below determine whether a 'filecontents' or
# 'filecontents*' environment is used for a particular filetype.
# 0 or 1 can be used for files native to LaTeX; 1 should be
# used for other files (e.g, PostScript).

$star{'.tex'} = 0;
$star{'.ltx'} = 0;
$star{'.cls'} = 0;
$star{'.dtx'} = 0;
$star{'.sty'} = 0;
$star{'.fd'}  = 0;
$star{'.bib'} = 0;
$star{'.bbl'} = 0;
$star{'.ind'} = 0;

$star{'.eps'} = 1;
$star{'.ps'}  = 1;

# Construct a UNIX & DOS-compatible basename for temporary files.

$t = time;
$tmpbase = 'ar' . substr($t, length($t)-6);

# Define temporary filenames and extensions.

$tmpdoc = $tmpbase . '.tex';
$tmplog = $tmpbase . '.log';
$tmpaux = $tmpbase . '.aux';
$tmpbbl = $tmpbase . '.bbl';

@tmpext = ('.tex', '.log', '.dvi', '.aux', '.lof', '.lot',
	   '.toc', '.bbl', '.blg', '.idx', '.ilg');


############# you shouldn't need to change anything below here ##############


# Parse command line arguments.

while ($_ = $ARGV[0], /^-/) {
    shift;
    last if /^--$/;
    /^-f/ && ($fast = 1);
    /^-c/ && ($confirm = 1);
    /^-b/ && ($bibfiles = 1);
    /^-q/ && ($tex_out = $to_null);
    /^-v/ && do { &Version; exit(0) };
    /^-h/ && do { &Help; exit(0) };
    if (/^-i/ && $ARGV[0] !~ /^-/) {
	$include_list = $ARGV[0];
	shift;
    }
    if (/^-e/ && $ARGV[0] !~ /^-/) {
	$exclude_list = $ARGV[0];
	shift;
    }
}

(@ARGV < 1) && die "usage: $PROG [options] input_file [output_file]\n";

# Check input filename argument.
# Valid filenames have a .tex or .ltx extension.

if (index($ARGV[0], '.') == -1) {
    $bbl = $ARGV[0] . '.bbl';
    $ARGV[0] .= '.tex';
}
else {
    die "$PROG: invalid filename (not *.tex or *.ltx)\n"
	unless ($ARGV[0] =~ /(\w+)\.(tex|ltx)$/);
    $bbl = $1 . '.bbl';
}

unless ($fast) {

    # Read configuration file, if it exists.

    if (open(CFG, $config_file)) {
	@config = <CFG>; close(CFG);
	eval(join(' ', @config));
	if ($@) {
	    chop($@); die "$PROG: config file error - $@\n";
	}
    }
}

# Override @include and @exclude definitions from configuration file,
# if necessary, using what's been specified on the command line.

if (length($include_list) > 0) {
    @include = &StringToArray($include_list);
}

if (length($exclude_list) > 0) {
    @exclude = &StringToArray($exclude_list);
}

# Test whether input file is a LaTeX2e document.
# Test also for the presence of \nonstopmode or \listfiles
# commands and any existing filecontents environments.

open(DOC, $ARGV[0]) || die "$PROG: cannot open $ARGV[0]\n";
while (<DOC>) {
    /^\\nonstopmode\b/        && ($nonstop = 1);
    /^\\listfiles\b/          && ($listfiles = 1);
    /^\\documentclass(\[|\{)/ && ($latex2e = 1);
    if (/\\begin\{filecontents.?\}\{(.*)\}/) {
	$bundled{$1} = 1;
    }
}
close(DOC);

die "$PROG: $ARGV[0] not a LaTeX 2e document\n" unless $latex2e;

# Create a temporary copy of the document, adding \nonstopmode
# and \listfiles commands if necessary.

open(DOC, $ARGV[0]) || die "$PROG: cannot reopen $ARGV[0]\n";
open(TMP, ">$tmpdoc") || die "$PROG: cannot open $tmpdoc for output\n";
print TMP "\\nonstopmode\n" unless $nonstop;
print TMP "\\listfiles\n"   unless $listfiles;
print TMP while <DOC>;
close(TMP); close(DOC);

# Run LaTeX on temporary copy and analyze logfile for dependencies.

system "$latex_cmd $tmpdoc $tex_out";
($error = $? >> 8) && die "$PROG: error running LaTeX, exit code $error\n";
open(LOG, $tmplog) || die "$PROG: cannot open $tmplog for input\n";
while (<LOG>) {
    last if /^\s+\*File List\*/;
}
while (<LOG>) {
    last if /^\s+\*{11}/;
    next if /\bStandard LaTeX\b/;
    if (/^\s*(\w+\.\w+)/ && ($file = &Pathname($1, @texinputs)) ne "") {
	$dependency{$1} = $file;
    }
}
close(LOG);

# Bibliographies are detected by analyzing the .aux file.
# If the '-b' option has been specified, the .bib and .bst files
# become candidates for inclusion; otherwise, a .bbl file is
# generated by running BibTeX and this becomes a candidate for
# inclusion. The latter course is taken only if the .aux file
# contains \citation commands.

open(AUX, $tmpaux) || die "$PROG: cannot open $tmpaux\n";
while (<AUX>) {
    /\\citation/ && ($citations = 1);
    /\\bibstyle\{(.*)\}/ && ($bibstyle = $1);
    if (/\\bibdata\{(.*)\}/) {
	$bibdata = 1;
	@biblist = split(/,/, $1);
    }
}
close(AUX);

if ($bibdata) {
    if ($bibfiles) {

	# Make .bib and .bst files candidates for inclusion.

	foreach (@biblist) {
	    $_ .= '.bib';
	    if (($file = &Pathname($_, @bibinputs)) ne "") {
		$dependency{$_} = $file;
	    }
	}
	if (! $stdbib{$bibstyle}) {
	    $bibstyle .= '.bst';
	    if (($file = &Pathname($bibstyle, @texinputs)) ne "") {
		$dependency{$bibstyle} = $file;
	    }
	}

    }
    else {

	# Generate a .bbl file and make it a candidate for inclusion.

	if ($citations) {
	    system $bibtex_cmd, $tmpbase, $tex_out;
	    ($error = $? >> 8)
		&& die "$PROG: error running BibTeX, exit code $error\n";
	    $dependency{$bbl} = $tmpbbl;
	}

    }
}

# Add or remove dependencies defined in @include and @exclude
# arrays (derived from configuration file/command line).

foreach (@include) {
    if (($file = &Pathname($_, @texinputs)) ne "") {
	$dependency{$_} = $file;
    }
}

foreach (@exclude) {
    delete $dependency{$_};
}

if (($n = scalar(keys %dependency)) == 1) {
    print '1 dependency';
}
else {
    print "$n dependencies";
}

# Determine whether further processing is necessary.

if ($n == 0) {
    if ($ARGV[1] ne "") {
	print ": $ARGV[1] not created.\n";
    }
    else {
	print ": $ARGV[0] unchanged.\n";
    }
    &Housekeeping($tmpbase, @tmpext);
    exit(0);
}
else {
    print ".\n";
}

# Perform interactive editing of dependency list if required.

if ($confirm) {
    foreach $file (keys %dependency) {
	&Confirm($file) || delete($dependency{$file});
    }
}

# Open a file for output.

if ($ARGV[1] ne "") {
    open(DOC, ">$ARGV[1]")
	|| die "$PROG: cannot open $ARGV[1] for output\n";
}
else {

    # Create a backup copy of the original document
    # (extension .te~ or .lt~ for DOS & UNIX compatibility)
    # then open original file for overwriting.

    ($backup = $ARGV[0]) =~ s/.$/~/;
    system("$copy_cmd $ARGV[0] $backup $copy_out");
    ($error = $? >> 8)
	&& die "$PROG: failed to create backup file $backup\n";
    open(DOC, ">$ARGV[0]")
	|| die "$PROG: cannot open $ARGV[0] for output\n";

}

# Write a new version of the document containing filecontents
# environments for all non-standard files that are loaded.

@files = sort(keys %dependency);
if (@files) {
    print DOC &Header(@files);
    foreach $file (@files) {
	next if $bundled{$file};
	$env = &EnvName(%star, $file);
	print DOC "\n\\begin{$env}{$file}\n";
	open(FILE, $dependency{$file})
	    || die "$PROG: cannot open $dependency{$file} for input\n";
	print DOC while <FILE>;
	close(FILE);
	print DOC "\\end{$env}\n";
    }
    print DOC "\n";
}

open(TMP, $tmpdoc) || die "$PROG: cannot open $tmpdoc for input\n";
while (<TMP>) {
    next if /^\\nonstopmode/;
    next if /^\\listfiles/;
    print DOC;
}

close(TMP);
close(DOC);
&Housekeeping($tmpbase, @tmpext);

exit(0);


#############################################################################

# Version:
# writes information on program version to stdout.

sub Version
{
    print "$PROG version $VER, last revised $DATE\n";
}


#############################################################################

# Help:
# writes help on command line options to stdout.

sub Help
{
    print "\n";
    &Version;
    print <<"END";

Analyzes a LaTeX2e document to determine whether it loads any non-standard
files. Such files, if they exist, are then bundled with the document by
means of filecontents or filecontents* environments. The resulting document
should be portable to any system with a standard installation of LaTeX.

Usage: $PROG [options] input_file [output_file]

input_file must have the extension .tex or .ltx (the extension can be
omitted from the command line if it is .tex). Output replaces the original
file if output_file is not specified (backup of original created, extension
.te~ or .lt~).

Command line options:

    -f  fast startup (skip reading configuration file)
    -c  prompt for confirmation before including a file
    -b  include .bib files instead of creating a .bbl file
    -q  quiet mode (no screen output from LaTeX)
    -v  print program version and exit
    -h  display this information and exit

    -i file1[,file2,...]  files for which inclusion is guaranteed
    -e file1[,file2,...]  files guaranteed to be excluded

END
}


#############################################################################

# StringToArray:
# Converts a string of comma-separated names into an array of names,
# or an empty array if the string is "none".

sub StringToArray
{
    local($string) = @_;
    local(@array);

    if ($string =~ /^none$/i) {
	@array = ();
    }
    else {
	@array = split(/,/, $string);
    }

    @array;
}


#############################################################################

# Pathname:
# searches for a filename in a list of directories, returning the full
# pathname of the file if found, an empty string otherwise.

sub Pathname
{
    local($target, @dirlist) = @_;
    local($file, $result);
    $result = "";

  DIR_LOOP:
    foreach $dir (@dirlist) {
	opendir(DIR, $dir) || die "$PROG: cannot access directory $dir\n";
	while ($file = readdir(DIR)) {
	    if ($file eq $target) {
		$result = $dir . '/' . $target;
		last DIR_LOOP;
	    }
	}
    }

    $result;
}


#############################################################################

# Header:
# constructs header text for the output document.

sub Header
{
    local(@files) = @_;
    local($n)     = $#files + 1;
    local($hdr)   = "% Packaged by $PROG v$VER";
    local(@mon)   = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
		     'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
    local(@h)     = ('00' .. '23');
    local(@ms)    = ('00' .. '59');
    local(@t)     = localtime;

    $hdr .= " at $h[$t[2]]:$ms[$t[1]]:$ms[$t[0]], $t[3] $mon[$t[4]] $t[5].\n";

    if ($n == 1) {
	$hdr .= '% A new file';
    }
    else {
	$hdr .= "% Up to $n new files";
    }
    $hdr .= " may be created when you first process this document:\n";
    foreach (@files) {
	$hdr .= "% $_\n";
    }

    $hdr;
}


#############################################################################

# Confirm:
# prompts for confirmation that a file is to be included in a
# filecontents or filecontents* environment.

sub Confirm
{
    local($file) = @_;
    local($yes);

    print "Include $file? (y/n) [y]: ";
    chop($_ = <STDIN>);

    $yes = (length == 0 || /^y/i);
}


#############################################################################

# EnvName:
# returns "filecontents*" if the specified file's extension has been
# flagged as requiring a starred environment; otherwise returns
# "filecontents".

sub EnvName
{
    local(%star, $file) = @_;
    local($name) = 'filecontents';
    local($i);

    if (($i = index($file, '.')) != -1 && $star{substr($file, $i)}) {
	$name .= '*';
    }

    $name;
}


#############################################################################

# Housekeeping:
# removes a set of filenames given a basename and a list of extensions.

sub Housekeeping
{
    local($base, @ext) = @_;
    local($file);

    foreach (@ext) {
	$file = $base . $_;
	if (-e $file) {
	    unlink $file || die "$PROG: error removing $file\n";
	}
    }
}
