#!perl -w
# usage: perl html.pl <source>.rtf
#    or: perl html.pl -x <source>.rtf.xml
#	options: -s "CSS stylesheet to include"
# ouputs conversion of source file to HTML.
# affiche la transformation HTML du fichier source.
#
# Copyright (C) 1997, 2000 Laurent CAPRANI (laurent.caprani@laposte.net)
# Copyright (C) 1997, 1999 Laboratoire de recherche en gestion du logiciel (www.lrgl.uqam.ca)
# Econoweb RTF is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
use strict;

use constant ECHO_DURATION => 0;
END {print STDERR "duration: ", time - $^T, "\n" if ECHO_DURATION}

# open (TRACE_FH, '>trace.txt'); use constant TRACE_FH => *TRACE_FH;
use constant TRACE_FH => *STDERR;

use Carp;
# use -MCarp=verbose to see stack backtrace
$SIG{__WARN__} = sub{&Carp::croak};

$SIG{__DIE__} = sub{&Carp::croak};

use Getopt::Std;

my %opts;
getopts('x:s:', \%opts);

my $Grove;
use XML::Grove::Builder;
my $builder = XML::Grove::Builder->new();


if (exists $opts{x}) {
	require XML::Parser::PerlSAX;
	
	my $parser = XML::Parser::PerlSAX->new( Handler => $builder );
	print STDERR "loading: ", time - $^T, "\n" if ECHO_DURATION;
	$Grove = $parser->parse( Source => {SystemId => $opts{x}} );
} else {
	require 'RTF/parser.pl';

	print STDERR "loading: ", time - $^T, "\n" if ECHO_DURATION;
	$Grove = RtfParser::parse(Handler => $builder);
}

print STDERR "grove building: ", time - $^T, "\n" if ECHO_DURATION;
use Data::Grove::Visitor 0.08;
print $Grove->children_accept_name (new Visitor);

####################################
package Visitor;

sub new {
	my $class = shift;

	my $self = {@_,
		anchors => [],
	};
	return bless $self, $class;
}

use HtmlUtils;

my @footnotes;

sub footnotes {
	my $self = shift;
	my @return;

	if (@footnotes) {
		my $no=0;
		@return = (
			HtmlUtils::empty_tag({Name=>'HR', Attributes=>{ALIGN=>"LEFT", SIZE=>"1", WIDTH=>"33%"}}),
			HtmlUtils::open_tags({Name=>'TABLE'}),
			map({
				my @left_col = (
					{Name=>'TD'},
					{Name=>'SMALL'},
					{Name=>'SUP'},
					{Name=>'A', Attributes=>{NAME=>"_ftn$no", HREF=>"#_ftnref$no"}},
				);
				$no++;
				(
					HtmlUtils::open_tags({Name=>'TR', Attributes=>{VALIGN=>'TOP'}}),
						HtmlUtils::open_tags(@left_col),
							"[$no]",
						HtmlUtils::close_tags(@left_col),
						HtmlUtils::open_tags({Name=>'TD'}),
							$footnotes[$no-1]->children_accept_name ($self),
						HtmlUtils::close_tags({Name=>'TD'}),
					HtmlUtils::close_tags({Name=>'TR'}),
				)
			} @footnotes),
			HtmlUtils::close_tags({Name=>'TABLE'}),
		);
	}
	return @return;
}

sub visit_name_footnote {
	my $self = shift; my $element = shift;
	my @return;

	if (@{$self->{anchors}}) {
		push @return, HtmlUtils::close_tags($self->{anchors}[-1]);
	}
	push @footnotes, $element;
	my $no = @footnotes;
	push @return,qq(<A NAME="_ftnref$no" HREF="#_ftn$no">[$no]</A>);
	if (@{$self->{anchors}}) {
		push @return, HtmlUtils::open_tags($self->{anchors}[-1]);
	}
	return @return;
}

sub visit_name_link {
	my $self = shift; my $element = shift;
	my @return;

	# Anchors cannot be nested
	if (@{$self->{anchors}}) {
		push @return, HtmlUtils::close_tags($self->{anchors}[-1]);
	}
	my %att = %{$element->{Attributes}};
	my $href;
	if (exists $att{target}) {
		$href = $att{target};
	}
	if (exists $att{fragment}) {
		my $fragment = $att{fragment};
		$href .= "#$fragment";
	}
	my $elt = {Name=>'A', Attributes=>{HREF=>$href}};
	push @{$self->{anchors}}, $elt;
	push @return,
		HtmlUtils::open_tags($elt),
		$element->children_accept_name ($self, @_),
		HtmlUtils::close_tags($elt),
	;
	pop @{$self->{anchors}};
	if (@{$self->{anchors}}) {
		push @return, HtmlUtils::open_tags($self->{anchors}[-1]);
	}
	return @return;
}

sub visit_name_bkmarkstart {
	my $self = shift; my $element = shift;
	my @return;

	my %att = %{$element->{Attributes}};
	my $name = $att{name};
	if (@{$self->{anchors}}) {
		push @return, HtmlUtils::close_tags($self->{anchors}[-1]);
	}
	my $elt = {Name=>'A', Attributes=>{NAME=>$name}};
	push @return,
		HtmlUtils::open_tags($elt),
		HtmlUtils::close_tags($elt),
	;
	if (@{$self->{anchors}}) {
		push @return, HtmlUtils::open_tags($self->{anchors}[-1]);
	}
	return @return;
}

sub visit_name_span {
	my ($self, $element, %context) = @_;

	my %att; %att = %{$element->{Attributes}} if exists $element->{Attributes};
	my @htmlEl = map {
		my @return;
		if (/^class$/) {
			push @return, map @{{
				'italic' => [{Name=>'I'}],
				'bold' => [{Name=>'B'}],
				'underline' => [{Name=>'U'}],
				'super' => [{Name=>'SUP'}, {Name=>'SMALL'}],
			}->{$_}}, split(' ',$att{class});
		} elsif (/^character-style/) {
			for ($att{'character-style'}) {
				/^footnote reference$/ && do {
					@return = ({Name=>'SUP'}, {Name=>'SMALL'});
				last};
				/^literal$/ && do {
					$context{literal} = 1;
				last};
				/^Nom propre$/ && do {
					$context{small_caps} = 1;
				last};
				/^(?:Formule|HTML Code)$/ && do {
					@return = {Name=>'CODE'};
				}
				#	@return = s/\s/-/g; 	# Character styles
			}
		}
		@return;
	} keys %att;
	return (
		HtmlUtils::open_tags(@htmlEl),
		$element->children_accept_name ($self, %context),
		HtmlUtils::close_tags(@htmlEl),
	);
}

sub visit_name_p {
	my ($self, $element, %context) = @_;

	my %att; %att = %{$element->{Attributes}} if exists $element->{Attributes};

	my @htmlEl;
	{
		my @para_atts;
		if (exists $att{align}) {
			push @para_atts, (ALIGN => $att{align});
		}
		{
			my $style;
			if (exists $att{spacebefore}) { $style .= "margin-top   :$att{spacebefore}em;" }
			if (exists $att{spaceafter} ) { $style .= "margin-bottom:$att{spaceafter }em;" }
			if (defined $style) {
				push @para_atts, (STYLE => $style);
			}
		}
		my $style = $att{style};
		my %trans = (
			'heading 1' =>	[{Name=>'H1', Attributes=>{@para_atts}}],
			'heading 2' =>	[{Name=>'H2', Attributes=>{@para_atts}}],
			'heading 3' =>	[{Name=>'H3', Attributes=>{@para_atts}}],
			'heading 4' =>	[{Name=>'H4', Attributes=>{@para_atts}}],
			'heading 5' =>	[{Name=>'H5', Attributes=>{@para_atts}}],
			'heading 6' =>	[{Name=>'H6', Attributes=>{@para_atts}}],
			'Plain Text' =>	[{Name=>'PRE'}],
			'caption' =>	[{Name=>'P'}, {Name=>'STRONG'}],
		);
		if (exists $trans{$style}) {
			@htmlEl = @{$trans{$style}};
		} elsif ($style eq 'macro') {
			$context{literal} = 1;
		} else {
			# ($htmlEl = $style) =~ s/\s/-/g;
			@htmlEl = {Name=>'P', Attributes=>{@para_atts}};
		}
	}
	return (
		HtmlUtils::open_tags(@htmlEl),
		$element->children_accept_name ($self, %context),
		HtmlUtils::close_tags(@htmlEl),
	);
}

use RTF::WordGrove;

sub visit_name_sect {
	my ($self, $element, @context) = @_;
	my @return;

	my $att = $element->{Attributes};
	if ($att->{_no} == -1) {
		push @return,
			HtmlUtils::close_tags({Name=>'HEAD'}),
			HtmlUtils::open_tags({Name=>'BODY'}),
		;
		RTF::WordGrove::list_group($element);
		RTF::WordGrove::table_group($element);
		RTF::WordGrove::div_group($element);
	} else {
		push @context, (literal => 1);
	}
	return (
		@return,
		$element->children_accept_name ($self, @context),
	);
}

sub visit_name_div {
	my $self = shift; my $element = shift;

	my $att = $element->{Attributes} or die;

	my $style = '';
	for ($att->{border}) {
		$style .= "border-left-style:solid;padding-left:.5em;" if (/l/);
		$style .= "border-right-style:solid;padding-right:.5em;" if (/r/);
		$style .= "border-top-style:solid;" if (/t/);
		$style .= "border-bottom-style:solid;" if (/b/);
	}
	
	my @htmlEl = (
		{
			Name => 'DIV',
			Attributes => {
				STYLE => $style,
			},
		},
	);
	return (
		HtmlUtils::open_tags(@htmlEl),
		$element->children_accept_name ($self, @_),
		HtmlUtils::close_tags(@htmlEl),
	)
}

sub meta_tag {
	my ($name, $self, $element) = @_;

	HtmlUtils::empty_tag({
		Name => 'META',
		Attributes => {
			NAME => $name,
			CONTENT => $element->{Attributes}{text},
		},
	})
}

sub visit_name_subject {meta_tag('Description', @_)}
sub visit_name_author {meta_tag('Author', @_)}
sub visit_name_keywords {meta_tag('Keywords', @_)}
sub visit_name_doccomm {meta_tag('Comment', @_)}

sub visit_name_title {
	my ($self, my $element) = @_;
	my $text = $element->{Attributes}{text};

	$element->{Name} = 'TITLE';
	$element->{Contents}[0] = new XML::Grove::Characters ({Data => $text});
	delete $element->{Attributes};

	&visit_element
}

sub visit_name_info {
	my $self = shift; my $element = shift;
	$element->children_accept_name ($self, @_)
}

sub css_style {
	my @return;

	if (exists $opts{s}) {
		my $stylefile = $opts{s};
		open(STYLE, "< ".$opts{s}) || die("Can't open CSS '$opts{s}': $!");
		my $styleElt = {Name=>'STYLE', Attributes=>{type=>'text/css'}};
		push @return, (
			HtmlUtils::open_tags($styleElt),
			<STYLE>,
			HtmlUtils::close_tags($styleElt),
		);
		close(STYLE);
	}
	return @return
}

use constant econoweb_URL => 'http://laurent.caprani.free.fr/econoweb/rtf/';

use constant econoweb_plug => q(
	
<!-- Shameless plug -->
<p class="plug">
	This is an MS-Word document,
	processed by
	<a href=").econoweb_URL.q(" class="small-caps">EconoWEB</a> Open Source Software.
</p>);

sub visit_name_rtf {
	my $self = shift; my $element = shift;
	my @return;
	
	push @return,
		'<!DOCTYPE HTML "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'."\n",
		HtmlUtils::open_tags({Name=>'HTML'}, {Name=>'HEAD'}),
		HtmlUtils::empty_tag({Name=>'META', Attributes=>{NAME=>"GENERATOR", CONTENT=>econoweb_URL}}),
		HtmlUtils::empty_tag({Name=>'META', Attributes=>{'HTTP-EQUIV'=>"Content-Type", CONTENT=>
#			"text/html; charset=iso-8859-1"
			"text/html; charset=windows-1252"
		}}),
		css_style(),
	;
	{
	#	As of current rtfml dtd, <info> & <sections> may not be the only content of <rtf>.
	#	There can be anchors for instance.
		my $sectno = -1;
		foreach my $child (reverse @{$element->{Contents}}) {
			if ($child->{Name} eq 'sect') {
				$child->{Attributes}{_no} = $sectno;
				$sectno--;
			}
		}
	}
	push @return,
		$element->children_accept_name ($self, @_),
		footnotes($self, @_),
		econoweb_plug,
		HtmlUtils::close_tags({Name=>'HTML'}, {Name=>'BODY'}),
	;
	return @return;
}

sub visit_element {
	my $self = shift; my $element = shift;

	my @return;
	if (defined $element->{Contents}) {
		@return = (
			HtmlUtils::open_tags($element),
			$element->children_accept_name ($self, @_),
			HtmlUtils::close_tags($element)
		);
	} else {
		@return = HtmlUtils::empty_tag($element);
	}
	return @return;
}

sub visit_characters {
	my $self = shift; my $characters = shift;
	my %options = @_;
	return if $options{no_pcdata};
	my @return;

	# This is made for browsers that do not support font-variant:small-caps (that is, version<6)
	if (exists $options{small_caps}) {
		my $lower;
		foreach (HtmlUtils::utf8_split_uc($characters->{Data}))
		{
			my $element = $lower ?
				new XML::Grove::Element ({
					Name => 'SMALL',
					Contents => [ new XML::Grove::Characters ({Data => $_}), ],
				}):
				new XML::Grove::Characters ({Data => $_});
			push @return, $element->accept_name($self); # no other %options.
			$lower = not $lower;
		}
	} else {
		my (@lines) = split(/\n/, $characters->{Data}, -1);	# -1 preserves trailing fields
		for (my $i=0; $i<@lines; $i++) {
			push @return, HtmlUtils::empty_tag({Name => 'BR'}) unless $i==0;
			my $line = $lines[$i];
			HtmlUtils::xml_safe($line) unless exists $options{literal};
			HtmlUtils::utf8_to_latin1($line);
			push @return, $line;
		}
	}
	return @return;
}
