#!/usr/bin/env perl
# /=====================================================================\ #
# |  latexmlc                                                           | #
# | client/server conversion program                                    | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
use strict;
use warnings;

use Cwd qw(cwd abs_path);
use IO::Socket;

my $RealBin_safe;
use FindBin;
use File::Spec::Functions;
use File::Which;

BEGIN {
  if ($FindBin::RealBin =~ /^([^\0]+)\z/) {    # Valid Unix path TODO: Windows, revisit regexp
    $RealBin_safe = $1; }
  die 'Fatal:I/O:tainted RealBin was tainted! Failing...'
    unless ($RealBin_safe && (-e catfile($RealBin_safe, 'latexmlc')));
}
# TODO: We probably want file cat for things like /../lib instead of spelling out a Unix path
use lib catdir($RealBin_safe, "..", "lib");

#TODO: Do we ever care about ENV PATH that much? Do we miss on some feature like that?
#$ENV{PATH} = "$RealBin_safe:/usr/bin:/usr/local/bin:";
use LaTeXML;
use LaTeXML::Common::Config;
use LaTeXML::Common::Error;
use LaTeXML::Util::Pathname;
use URI::Escape;
use HTTP::Response;
use HTTP::Request;
use JSON::XS qw(decode_json);

# Determine if a socket server is installed locally and obtain its pathname:
my $latexmls;
$latexmls = catfile($RealBin_safe, 'latexmls') if (-e catfile($RealBin_safe, 'latexmls'));
$latexmls = which('latexmls') unless defined $latexmls;

# Some defaults:
my $opts = LaTeXML::Common::Config->new(input_limit => 100);

# Parse and load command-line options
$opts->read(\@ARGV);
my $keyvals = $opts->scan_to_keyvals(\@ARGV);
# Get the full source of interest
my $source = $opts->get('source');
$opts->delete('source');

# Setup logging messages
SetVerbosity($opts->get('verbosity'));
UseSTDERR();    # setup for LaTeXML.pm to also use STDERR spinner messages.
my $starttime = StartTime();

# Client-side options:
my ($port, $address, $expire, $local) = map { $opts->get($_) } qw(port address expire);
$address = '127.0.0.1' if !$address || ($address eq 'localhost');
$address =~ s/^(\w+)\:\/\///;     # strip away any protocol
my $route = '/';
if ($address =~ s/\/(.+)$//) {    # strip away route
  $route = '/' . $1;
}
# Local if peerhost is localhost:
$local  = ($expire && ($expire == -1)) || ($address eq '127.0.0.1');
$expire = -1                   unless ((defined $expire) && $latexmls);
$port   = ($local ? 3334 : 80) unless $port;    #Fall back if all fails...
#***************************************************************************
#Add some base, so that relative paths work
my $cdir = abs_path(cwd());
$cdir =~ s/ /\\ /g;
if (!$opts->get('base')) {
  $opts->set('base', $cdir);
  push @$keyvals, ['base', $cdir];
}

# Record if destination exists, for summary
my $deststat;
$deststat = (stat($opts->get('destination')))[9] if $opts->get('destination');
$deststat = 0 unless defined $deststat;

push @$keyvals,                ['path', $cdir];    #add current path, to ensure never empty
push @{ $opts->get('paths') }, $cdir;

if (!$source) { die "Input was empty.\n"; }
if ($source eq '-') {
  { local $/ = undef;
    $source = "literal:" . <STDIN>;
    # Set the source in the keyvals to be sent over the wire:
    @$keyvals = grep { $_->[0] !~ /source|tex/ } @$keyvals;
    push @$keyvals, $source;
  } }

# Always create a .log file for tracking the conversion messages
# Use $jobname.latexml.log or just latexml.log for logging by default
#
# In the local conversion case, this can be delegated to LaTeXML.pm
my $logfile = $opts->get('log');
if (!$logfile) {
  if (!pathname_is_literaldata($source)) {
    my ($dir, $name, $ext) = pathname_split($source);
    if ($name) {
      $logfile = pathname_make(dir => pathname_cwd(), name => $name, type => 'latexml.log'); }
    else {
      $logfile = 'latexml.log';
  } }
  else {
    $logfile = 'latexml.log'; }
  $opts->set('log', $logfile); }
# A bit special: Since LaTeXML.pm will always try to append to the log file
# make sure the file is new before we start.
unlink($logfile) if (-f $logfile);

#***************************************************************************
# Prepare output variables:
my ($result, $status, $status_code, $log);

# TODO: Talk to the web service via HTTP
#Setup client and communicate
my $sock = $latexmls && IO::Socket::INET->new(
  PeerAddr => $address,
  PeerPort => $port,
  Proto    => 'tcp',
);    #Attempt connecting to a service
if ((!$sock) && $local && ($expire == -1)) {
  # Don't boot a server, single job requested:
  require LaTeXML;
  $opts->set('local', 1);
  my $converter = LaTeXML->get_converter($opts);
  $converter->prepare_session($opts);
  my $response = $converter->convert($source);
  ($result, $status, $status_code, $log) = map { $$response{$_} } qw(result status status_code log) if defined $response;
} else {
  my $message = q{};
  foreach my $entry (@$keyvals) {
    my ($key, $value) = ($$entry[0], $$entry[1]);
    $message .= uri_escape($key) . ($value ? '=' . uri_escape($value) : '') . '&';
  }
  chop $message;
  ProgressSpinup("Calling out to latexmls");
  #Startup daemon and feed in args, if needed
  system($latexmls, "--port=$port",
    "--expire=$expire", "--autoflush=" . $opts->get('input_limit')) if !$sock && $local;
  my $http_response = ($local
    ? process_local($address, $port, $route, $message, $sock)
    : process_remote($address, $port, $route, $message, $sock));
  if ($http_response->is_success) {
    my $response = decode_json($http_response->content);
    ($result, $status, $status_code, $log) = map { $$response{$_} } qw(result status status_code log) if defined $response;
    NoteSTDERR("Conversion complete: " . $$response{status});
  } else {
    die("Fatal:HTTP:" . $http_response->code() . " " . $http_response->message() . "\n");
  }
  ProgressSpindown("Calling out to latexmls"); }

#***************************************************************************
### Common treatment of output:
my $destination = $opts->get('destination');

# Always write log to a file, either specified via --log or 'latexml.log'
# with the exception of *archive* outputs, where the log is in the ZIP
my $whatsout   = $opts->get('whatsout');
my $is_archive = $whatsout && ($whatsout =~ /^archive/);

if ($log && !$is_archive) {
  chomp($log);
  # If we get a log payload in the response, likely from a latexmls call
  # we need to write it to file from within latexmlc itself:
  my $logfile = $opts->get('log');
  UseLog($logfile);
  NoteLog($log);
  UseLog(undef);
  NoteSTDERR("full log written to $logfile."); }

if ($result) {
  # NOTE that we are serializing the XML::LibXML::Document whose toString
  # has ALREADY encoded (in this case to utf8), so NO encode is needed!
  if ($destination) {
    my $output_handle;
    open($output_handle, ">", $destination) or die("Fatal:I/O:forbidden Couldn't open output file " . $destination . ": $!");
    if ($whatsout eq 'archive') {
      binmode($output_handle, ':raw');
    }
    print $output_handle $result;
    close $output_handle;
  } else {
    print "\n", $result, "\n"; } }

# == End of Program ==
# cleanup STDERR e.g. in case of fatals
UseSTDERR(undef);
if ($status_code == 3 || ($status =~ /fatal error/)) {    # non-zero exit code for fatal conversions
  exit 1;
} else {
  exit 0;
}

# == Helpers ==
sub summary {
  my ($destination, $prior_stat) = @_;
  my $new_stat = (stat($destination))[9] || 0;
  return ($new_stat && ($prior_stat != $new_stat)) ? "\nWrote $destination\n" :
    "\nError! Did not write file $destination\n"; }

sub process_local {
  my ($req_address, $req_port, $req_route, $req_message, $req_sock) = @_;
  #daemon is running, reconnect and feed in request
  $req_sock = $req_sock || IO::Socket::INET->new(
    PeerAddr => $req_address,
    PeerPort => $req_port,
    Proto    => 'tcp',
  ) || die "Fatal:perl:socket-create Could not create socket: $!\n";
  my $req_message_length = length($req_message);
  $req_route = "$req_address:$req_port" unless $req_route;
  my $payload = <<"PAYLOADEND";
POST $req_route HTTP/1.0
Host: $req_address:$req_port
User-Agent: latexmlc
Content-Type: application/x-www-form-urlencoded
Content-Length: $req_message_length

$req_message
PAYLOADEND
  $req_sock->send($payload);
  my $response_string = q{};
  { local $/ = undef;
    $response_string = <$req_sock>; }
  close($req_sock);
  return ($response_string
    ? HTTP::Response->parse($response_string)
    : HTTP::Response->new(500, 'Internal Server Error')); }

sub process_remote {
  my ($req_address, $req_port, $req_route, $req_message, $req_sock) = @_;
  $req_sock->close if $req_sock;    # No need of the socket here, using LWP instead
  my $payload = HTTP::Request->new(POST => "http://$req_address:$req_port$req_route");
  $payload->header('User-Agent',   'latexmlc');
  $payload->header('Content-Type', 'application/x-www-form-urlencoded');
  $payload->content($req_message);
  require LWP::UserAgent;
  my $ua = LWP::UserAgent->new;
  $ua->timeout(10);
  return $ua->request($payload); }

#**********************************************************************
__END__

=head1 NAME

C<latexmlc> - An omni-executable for LaTeXML, capable of
  stand-alone, socket-server and web service conversion.
  Supports both core processing and post-processing.

=head1 SYNOPSYS

See the OPTIONS section in L<LaTeXML::Common::Config> for usage information.

=head1 DESCRIPTION

L<latexmlc> provides a client which automatically sets up a LaTeXML local server
  if necessary (via L<latexmls>).

  If such server already exists, the client proceeds to communicate normally.

  A stand-alone conversion (the default) can also be requested via --expire=-1

=head1 SEE ALSO

L<LaTeXML::Common::Config>

=cut
