#!perl -w
# usage: perl docml.pl <source>.rtf
#    or: perl docml.pl -x <source>.rtf.xml
#	options: -s "stylesheet URL to include in xml-stylesheet pi"
#		Default is "http://laurent.caprani.free.fr/econoweb/docml2html.xsl"
# ouputs DOCML conversion of source file.
# affiche la transformation DOCML du fichier source.
#
# Copyright (C) 1997, 2000 Laurent CAPRANI (laurent.caprani@laposte.net)
# Copyright (C) 1997, 1999 Laboratoire de recherche en gestion du logiciel (www.lrgl.uqam.ca)
# Econoweb RTF is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#

use strict;

use constant ECHO_DURATION => 0;
END {print STDERR "duration: ", time - $^T, "\n" if ECHO_DURATION}

# open (TRACE_FH, '>trace.txt'); use constant TRACE_FH => *TRACE_FH;
use constant TRACE_FH => *STDERR;

use Carp;
# use -MCarp=verbose to see stack backtrace
$SIG{__WARN__} = sub{&Carp::croak};

$SIG{__DIE__} = sub{&Carp::croak};

use Getopt::Std;

my %opts;
getopts('x:s:', \%opts);

my $Grove;
use XML::Grove::Builder;
my $builder = XML::Grove::Builder->new();

if (exists $opts{x}) {
	require XML::Parser::PerlSAX;
	
	my $parser = XML::Parser::PerlSAX->new( Handler => $builder );
	print STDERR "loading: ", time - $^T, "\n" if ECHO_DURATION;
	$Grove = $parser->parse( Source => {SystemId => $opts{x}} );
} else {
	require 'RTF/parser.pl';

	print STDERR "loading: ", time - $^T, "\n" if ECHO_DURATION;
	$Grove = RtfParser::parse(Handler => $builder);
}

print STDERR "grove building: ", time - $^T, "\n" if ECHO_DURATION;

use XML::Handler::XMLWriter;

my $_SaxHandler = XML::Handler::XMLWriter->new(Newlines => 1);

$_SaxHandler->start_document() if $_SaxHandler->can('start_document');
foreach (qw(start_element end_element characters processing_instruction)) {
	die "SAX handler must handle '$_'" unless $_SaxHandler->can($_);
}

use Data::Grove::Visitor 0.08;
$Grove->children_accept_name (new Visitor);

my $rv = $_SaxHandler->end_document() if $_SaxHandler->can('end_document');

####################################
package Visitor;

sub new {
	my $class = shift;

	my $self = {@_};
	return bless $self, $class;
}

sub visit_name_link {
	my $self = shift; my $element = shift;

	my %att = %{$element->{Attributes}};
	my $href;
	if (exists $att{target}) {
		$href = $att{target};
	}
	if (exists $att{fragment}) {
		my $fragment = $att{fragment};
		$href .= "#$fragment";
	}
	# Standard servers do not need to convert URI
	# HtmlUtils::utf8_to_latin1($href);
	# HtmlUtils::uri_safe($href);
	my $elt = {Name=>'a', Attributes=>{href=>$href}};
	$_SaxHandler->start_element($elt);
	$element->children_accept_name ($self, @_),
	$_SaxHandler->end_element($elt);
}

sub visit_name_bkmarkstart {
	my $self = shift; my $element = shift;

	# HtmlUtils::utf8_to_latin1($element->{Attributes}{name});
	HtmlUtils::uri_safe($element->{Attributes}{name});
	$_SaxHandler->start_element($element);
	$_SaxHandler->end_element($element);
}

sub visit_name_span {
	my ($self, $element, %context) = @_;

	for ($element->{Attributes}{'character-style'} || '') {
		/^literal$/ && do {
			$context{literal} = 1;
		last};
#		/^Nom propre$/ && do {
#			$context{small_caps} = 1;
#		last};
	}
	$element->{Attributes}{'xml:space'} = 'preserve';
	$_SaxHandler->start_element($element);
	$element->children_accept_name ($self, %context);
	$_SaxHandler->end_element($element);
}

sub visit_name_p {
	my ($self, $element, %context) = @_;

	my $att = $element->{Attributes} or die;
	my $htmlEl;
	{
		my $style = $att->{style};
		if ($style eq 'macro') {
			$context{literal} = 1;
		} else {
			$htmlEl = {Name=>'p', Attributes=>$att};
		}
	}
	$_SaxHandler->start_element($htmlEl) if defined $htmlEl;
	$element->children_accept_name ($self, %context);
	$_SaxHandler->end_element($htmlEl) if defined $htmlEl;
}

use RTF::WordGrove;

sub visit_name_sect {
	my ($self, $element, @context) = @_;

	my $att = $element->{Attributes};
	if ($att->{_no} == -1) {
		$_SaxHandler->end_element({Name => 'head'});
		$_SaxHandler->start_element({Name => 'body'});
		RTF::WordGrove::list_group($element);
		RTF::WordGrove::table_group($element);
		RTF::WordGrove::div_group($element);
	} else {
		push @context, (literal => 1);
	}
	return (
		$element->children_accept_name ($self, @context),
	);
}

sub visit_name_rtf {
	my $self = shift; my $element = shift;


	my $ssURL = exists $opts{s}? $opts{s}: "http://laurent.caprani.free.fr/econoweb/docml2html.xsl";
	$_SaxHandler->processing_instruction ({Target => 'xml-stylesheet', Data => 'type="text/xsl"'." href=\"$ssURL\""});
	$_SaxHandler->start_element({Name => 'doc'});
	$_SaxHandler->start_element({Name => 'head'});
	{
	#	As of current dtd, <info> & <sections> may not be the only content of <rtf>.
	#	There can be anchors for instance.
		my $sectno = -1;
		foreach my $child (reverse @{$element->{Contents}}) {
			if ($child->{Name} eq 'sect') {
				$child->{Attributes}{_no} = $sectno;
				$sectno--;
			}
		}
	}
	$element->children_accept_name ($self, @_);
	$_SaxHandler->end_element({Name => 'body'});
	$_SaxHandler->end_element({Name => 'doc'});
}

use HtmlUtils;

sub visit_element {
	my $self = shift; my $element = shift;

	$_SaxHandler->start_element($element);
	$element->children_accept_name ($self, @_),
	$_SaxHandler->end_element($element);
}

sub visit_characters {
	my $self = shift; my $characters = shift;
	my %options = @_;
	return if $options{no_pcdata};

	if ($characters->{Data}) {
		my (@lines) = split(/\n/, $characters->{Data}, -1);	# -1 preserves trailing fields
		for (my $i=0; $i<@lines; $i++) {
			unless ($i == 0) {
				$_SaxHandler->start_element({Name => 'br'});
				$_SaxHandler->end_element({Name => 'br'});
			}
			$_SaxHandler->start_element({Name => 'span', Attributes => {'disable-output-escaping' => 'yes'}})
				if exists $options{literal};
			$_SaxHandler->characters({Data => $lines[$i]});
			$_SaxHandler->end_element({Name => 'span'})
				if exists $options{literal};
		}
	}
}

