#!/usr/bin/perl -w
use strict;
use FindBin;
use lib "$FindBin::RealBin/../lib";
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use LaTeXML;
use LaTeXML::Util::Pathname;

#**********************************************************************
# Parse command line

my $identity = "latexml (LaTeXML version $LaTeXML::VERSION)";
my($verbosity,$strict,$comments,$noparse)=(0,0,1,0);
my ($format,$destination,$help,$showversion)=('xml','');
my ($documentid);
my (@paths,@preload,@debugs);
GetOptions("destination=s" => \$destination,
	   "output=s"  => \$destination,
	   "preload=s" => \@preload,
	   "path=s"    => \@paths,
	   "quiet"     => sub { $verbosity--; },
	   "verbose"   => sub { $verbosity++; },
	   "strict"    => \$strict,
	   "xml"       => sub { $format = 'xml'; },
	   "tex"       => sub { $format = 'tex'; },
	   "box"       => sub { $format = 'box'; },
	   "noparse"   => \$noparse,
	   "comments!" => \$comments,
	   "VERSION"   => \$showversion,
	   "debug=s"   => sub { eval "\$LaTeXML::$_[1]::DEBUG=1; "; },
	   "documentid=s" => \$documentid,
	   "help"      => \$help,
	  ) or pod2usage(-message => $identity, -exitval=>1, -verbose=>0, -output=>\*STDERR);
pod2usage(-message=>$identity, -exitval=>1, -verbose=>2, -output=>\*STDOUT) if $help;
if($showversion){ print STDERR "$identity\n"; exit(1); }
pod2usage(-message=>"$identity\nMissing input TeX file", 
	  -exitval=>1, -verbose=>0,-output=>\*STDERR) unless @ARGV;
my $source = $ARGV[0];

#**********************************************************************
# Do the processing.
print STDERR "$identity\n" unless $verbosity < 0;

my $latexml= LaTeXML->new(preload=>[@preload], searchpaths=>[@paths],
			  verbosity=>$verbosity, strict=>$strict,
			  includeComments=>$comments,
			  documentid=>$documentid,
			  nomathparse=>$noparse);

# Check that destination is valid before wasting any time...
if($destination){
  if(my $dir =pathname_directory($destination)){
    pathname_mkdir($dir) or die "Couldn't create destination directory $dir: $!"; }}
binmode(STDERR,":utf8");
# ========================================
# First read and digest whatever we're given.
my $digested;
if($source eq '-'){
    my $content;
    { local $/ = undef; $content = <>; }
    $digested = $latexml->digestString($content); }
else {
    $digested = $latexml->digestFile($source); }

# ========================================
# Now, convert to DOM and output, if desired.
use Encode;
my $serialized;
if($digested){
    local $LaTeXML::Global::STATE = $$latexml{state};
    if($format eq 'tex'){
	$serialized = LaTeXML::Global::UnTeX($digested); }
    elsif($format eq 'box'){
	$serialized = $digested->toString; }
    else {
	my $dom = $latexml->convertDocument($digested); 
#	$serialized = $dom->toString(1);
#	$serialized = Encode::decode("utf-8",$dom->toString(1));
#	$dom->setEncoding("UTF8");
	$serialized = $dom->toString(1);


    }}
print STDERR "\nConversion complete: ".$latexml->getStatusMessage.".\n";

if($destination){
#  open(OUT,">:utf8",$destination) or die "Couldn't open output file $destination: $!"; }
  open(OUT,">",$destination) or die "Couldn't open output file $destination: $!"; }
else {
#  binmode(STDOUT,":utf8");
  *OUT = *STDOUT; }
print OUT $serialized if $serialized;
if($destination){ 
  close(OUT); }

#**********************************************************************
__END__

=head1 NAME

C<latexml> - transforms a TeX/LaTeX file into XML.

=head1 SYNOPSIS

latexml [options] texfile

 Options:
 --destination=file specifies destination file; default to stdout.
 --output=file      [obsolete synonym for --destination]
 --preload=module   requests loading of an optional module;
                    can be repeated
 --path=dir         adds dir to the paths searched for files,
                    modules, etc; 
 --documentid=id    assign an id to the document root.
 --quiet            suppress messages (can repeat)
 --verbose          more informative output (can repeat)
 --strict           makes latexml less forgiving of errors
 --xml              requests xml output (default).
 --tex              requests TeX output after expansion.
 --box              requests box output after expansion
                    and digestion.
 --noparse          suppresses parsing math
 --nocomments       omit comments from the output
 --VERSION          show version number.
 --debug=package    enables debugging output for the named
                    package
 --help             shows this help message.

If texfile is '-', latexml reads the TeX source from standard input.

=head1 OPTIONS AND ARGUMENTS

=over 4

=item B<--destination>=I<file>

Specifies the destination file; by default the XML is written to stdout.

=item B<--preload>=I<module>

Requests the loading of an optional module or package.  This may be useful if the TeX code
does not specificly require the module (eg. through input or usepackage).

=item B<--path>=I<dir>

Add I<dir> to the search paths used when searching for files, modules, style files, etc;
somewhat like TEXINPUTS.  This option can be repeated.

=item B<--documentid>=I<id>

Assigns an ID to the root element of the XML document.  This ID is generally
inherited as the prefix of ID's on all other elements within the document.
This is useful when constructing a site of multiple documents so that
all nodes have unique IDs.

=item B<--quiet>

Reduces the verbosity of output during processing, used twice is pretty silent.

=item B<--verbose>

Increases the verbosity of output during processing, used twice is pretty chatty.
Can be useful for getting more details when errors occur.

=item B<--strict>

Specifies a strict processing mode. By default, undefined control sequences and
invalid document constructs (that violate the DTD) give warning messages, but attempt
to continue processing.  Using --strict makes them generate fatal errors.

=item B<--xml>

Requests XML output; this is the default.

=item B<--tex>

Requests TeX output for debugging purposes;  processing is only carried out through expansion and digestion.
This may not be quite valid TeX, since Unicode may be introduced.

=item B<--box>

Requests Box output for debugging purposes;  processing is carried out through expansion and digestions,
and the result is printed.

=item B<--nocomments>

Normally latexml preserves comments from the source file, and adds a comment every 25 lines as
an aid in tracking the source.  The option --nocomments discards such comments.

=item B<--VERSION>

Shows the version number of the LaTeXML package..

=item B<--debug>=I<package>

Enables debugging output for the named package. The package is given without the leading LaTeXML::.

=item B<--help>

Shows this help message.

=back

=head1 SEE ALSO

L<latexmlpost>, L<LaTeXML>

=cut
#**********************************************************************

