#!/usr/bin/perl -w
use strict;
use FindBin;
use lib "$FindBin::RealBin/../lib";
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use XML::LibXML;
use XML::LibXML::XPathContext;
use Text::Wrap;

#**********************************************************************
# Parse command line
my $VERSION = '0.0.1';
#my $identity = "latexmlfind (LaTeXML version $LaTeXML::VERSION)";
my $identity = "latexmlfind ($VERSION)";
my($verbosity)=(0);
my ($help,$showversion)=(0,0);
my (@symbols,@unknowns,@posfuncs,@labels,@refnums);
GetOptions("symbol=s"           => \@symbols,,
	   "unknown=s"          => \@unknowns,
	   "possiblefunction=s" => \@posfuncs,
	   "label=s"            => \@labels,
	   "refnum=s"           => \@refnums,
	   "quiet"     => sub { $verbosity--; },
	   "verbose"   => sub { $verbosity++; },
	   "VERSION"   => \$showversion,
	   "help"      => \$help,
	  ) or pod2usage(-message => $identity, -exitval=>1, -verbose=>0, -output=>\*STDERR);
pod2usage(-message=>$identity, -exitval=>1, -verbose=>2, -output=>\*STDOUT) if $help;
if($showversion){ print STDERR "$identity\n"; exit(1); }
pod2usage(-message=>"$identity\nMissing input TeX file", 
	  -exitval=>1, -verbose=>0,-output=>\*STDERR) unless @ARGV;
my $source = $ARGV[0];

#**********************************************************************
# Do the processing.
print STDERR "$identity\n" unless $verbosity < 0;
binmode(STDOUT,":utf8");	# Make sure output can handle UTF8

my $DOC = XML::LibXML->new->parse_file($source);
my $XPATH = XML::LibXML::XPathContext->new;
$XPATH->registerNs(ltxml=>"http://dlmf.nist.gov/LaTeXML");
$XPATH->registerFunction('match-font',\&LaTeXML::Font::match_font);

foreach my $symbol (@symbols){
  show_matches("Symbols \"$symbol\"",
	       "//ltxml:Math[descendant::ltxml:XMTok[\@name='$symbol' or text()='$symbol']]"); }
foreach my $spec (@unknowns){
  my $symbol = $spec;
  my $font;
  $font = $1 if $symbol=~ s/{(\w*)}$//;
  show_matches("Unknown \"$spec\"",
	       # Find Math containing an XMTok, with role=UNKNOWN
	       "//ltxml:Math[descendant::ltxml:XMTok[\@role='UNKNOWN']"
	       #                                    whose name or content is the requested symbol
	       .                                   "[\@name='$symbol' or text()='$symbol']"
	       .                                   ($font ? "[\@font='$font']" : '')
	       #                                   BUT which isn't in presentation branch of an XMDual!!
	       #                                   ie. no ancestor w/preceding sibling has parent = XMDual
	       .                                   "[not(ancestor-or-self::*[preceding-sibling::*][parent::ltxml:XMDual])]"
	       .                                   "]"); }

foreach my $symbol (@posfuncs){
  show_matches("Possible function \"$symbol\"",
	       "//ltxml:Math[descendant::ltxml:XMTok[\@possibleFunction='yes']"
	       .                                   "[\@name='$symbol' or text()='$symbol']"
	       .                                   "]"); }

#**********************************************************************
# This matches fonts when both are converted to strings (toString),
# such as when they are set as attributes.
sub match_font {
  my($font1,$font2)=@_;
#print STDERR "Match font \"".($font1 || 'none')."\" to \"".($font2||'none')."\"\n";
  return 0 unless $font1 && $font2;
  $font1 =~ /^Font\[(.*)\]$/;
  my @comp1  = split(',',$1);
  $font2 =~ /^Font\[(.*)\]$/;
  my @comp2  = split(',',$1);
  while(@comp1){
    my $c1 = shift @comp1;
    my $c2 = shift @comp2;
    return 0 if ($c1 ne '*') && ($c2 ne '*') && ($c1 ne $c2); }
  return 1; }
#**********************************************************************

sub show_matches {
  my($description,$xpath)=@_;
  my @nodes = $XPATH->findnodes($xpath,$DOC);
  print "\n".$description." appears in ".scalar(@nodes)." places:\n";
  print "XPath = \"$xpath\"\n" if $verbosity > 0;
  foreach my $node (@nodes){
    show_node($node); }}

# Things to do here:
#   find a labelled parent.
#   find a nearby comment (for linenumber).
sub show_node {
  my($node)=@_;
  print "  In ".ancestry($node).":\n";
  if($node->localname eq 'Math'){
    my $ptex = $node->getAttribute('tex');
    my $ctex = $node->getAttribute('content-tex');
    if($verbosity > 1){
      print $node->toString."\n"; }
    else {
      print wrap("       ","       ",$ctex || $ptex)."\n";}}
  else {
    print $node->toString."\n"; }
}

sub ancestry {
  my($node)=@_;
  my $ancestry = '';
  while($node && ($node->nodeType == XML_ELEMENT_NODE)){
    my $label  = $node->getAttribute('label') || '';
    my $refnum = $node->getAttribute('refnum') || '';
    if($label || $refnum){
      $ancestry .= " < "            if $ancestry;
      $ancestry .= $label           if $label;
      $ancestry .= " [".$refnum."]" if $refnum; }
    $node = $node->parentNode; }
  $ancestry; }

#**********************************************************************
__END__

=head1 NAME

C<latexmlfind> finds interesting things in LaTeXML generated XML.

=head1 SYNOPSIS

latexmlfind [options] xmlfile

  Options:
   --unknown=symbol           finds equations where the unknown symbol appears.
   --possiblefunction=symbol  finds equations where symbol is possibly used as a function.
   --label=symbol             finds objects with the given label.
   --refnum=symbol            finds objects with the given refnum (reference number).
   --quiet                    suppress messages (can repeat)
   --verbose                  more informative output (can repeat)
   --VERSION                  show version number.
   --help                     shows help message.

=head1 OPTIONS AND ARGUMENTS

latexmlfind is useful for finding objects within an XML file generated by LaTeXML.


=over 4

=item B<--output=>I<outputfile>

Specifies the output file; by default the XML is written to stdout.

=item B<--unknown=>I<symbol>

Finds equations where the unknown symbol appears.

=item B<--possiblefunction=>I<symbol>

Finds equations where symbol is possibly used as a function.

=item B<--label=>I<label>

Finds objects (sections, equations, whatever) labeled by the given label.

=item B<--refnum=>I<refnum>

Finds objects (sections, equations, whatever) with the given reference number.

=item B<--quiet>

Reduces the verbosity of output during processing, used twice is pretty silent.

=item B<--verbose>

Increases the verbosity of output during processing, used twice is pretty chatty.
Can be useful for getting more details when errors occur.

=item B<--VERSION>

Shows the version number of latexmlfind..

=item B<--help>

Shows this help message.

=back

=head1 SEE ALSO

L<latexml>, L<LaTeXML>

=cut
#**********************************************************************

