#
# Copyright (C) 1997, 2000 Laurent CAPRANI (laurent.caprani@laposte.net)
# Copyright (C) 1997, 1999 Laboratoire de recherche en gestion du logiciel (www.lrgl.uqam.ca)
# Econoweb RTF is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
use strict;

package HtmlUtils;

# some characters are replaced by look-alike equivalents to accomodate old browsers
my %_utf8_to_latin1_ents=(
	"\xe2\x80\x82"=>' ',	# &ensp; -> &nbsp;
	"\xe2\x80\x83"=>'  ',	# &emsp; -> &nbsp;&nbsp;
	"\xe2\x80\x98"=>'`',	# &lsquo; -> '`' LEFT SINGLE QUOTATION MARK -> GRAVE ACCENT
	"\xe2\x80\x99"=>'´',	# &rsquo; -> &acute; RIGHT SINGLE QUOTATION MARK
	"\xe2\x80\x93"=>'-',	# &ndash; EN DASH
	"\xe2\x80\x94"=>'--',	# &mdash; EM DASH
	"\xe2\x80\x9a"=>'&sbquo;',	#SINGLE LOW-9 QUOTATION MARK
	"\xe2\x80\x9e"=>'&bdquo;',	#DOUBLE LOW-9 QUOTATION MARK
	"\xe2\x80\x9c"=>'&ldquo;',	#LEFT DOUBLE QUOTATION MARK
	"\xe2\x80\x9d"=>'&rdquo;',	#RIGHT DOUBLE QUOTATION MARK
	"\xe2\x80\xa0"=>'&dagger;',	#DAGGER
	"\xe2\x80\xa1"=>'&Dagger;',	#DOUBLE DAGGER
	"\xe2\x80\xa2"=>'&bull;',	#BULLET
	"\xe2\x80\xa6"=>'&hellip;',	#HORIZONTAL ELLIPSIS
	"\xe2\x80\xb0"=>'&permil;',	#PER MILLE SIGN
	"\xe2\x80\xb9"=>'&lsaquo;',	#SINGLE LEFT-POINTING ANGLE QUOTATION MARK
	"\xe2\x80\xba"=>'&rsaquo;',	#SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
	"\xe2\x82\xac"=>'&euro;',	#EURO SIGN
	"\xe2\x84\xa2"=>'&trade;',	#TRADE MARK SIGN
);

sub utf8_to_latin1 {
	for (@_) {
		s/(\xe2..)/$_utf8_to_latin1_ents{$1}/g;
		s{([\xc0-\xc3])(.)}{
			my $hi = ord($1);
			my $lo = ord($2);
			chr(
				(($hi & 0x03) <<6) | ($lo & 0x3F)
			)
		}ge;
	}
}

my %_utf8_uc = (
	"\xc2\x9a" => "\xc2\x8a", # s with caron
	"\xc2\x9c" => "\xc2\x8c", # ligature oe
	"\xc2\x9e" => "\xc2\x8e", # z with caron
	"\xc3\xa0" => "\xc3\x80", # a with grave
	"\xc3\xa1" => "\xc3\x81", # a with acute
	"\xc3\xa2" => "\xc3\x82", # a with circumflex
	"\xc3\xa3" => "\xc3\x83", # a with tilde
	"\xc3\xa4" => "\xc3\x84", # a with diaeresis
	"\xc3\xa5" => "\xc3\x85", # a with ring above
	"\xc3\xa6" => "\xc3\x86", # ae
	"\xc3\xa7" => "\xc3\x87", # c with cedilla
	"\xc3\xa8" => "\xc3\x88", # e with grave
	"\xc3\xa9" => "\xc3\x89", # e with acute
	"\xc3\xaa" => "\xc3\x8a", # e with circumflex
	"\xc3\xab" => "\xc3\x8b", # e with diaeresis
	"\xc3\xac" => "\xc3\x8c", # i with grave
	"\xc3\xad" => "\xc3\x8d", # i with acute
	"\xc3\xae" => "\xc3\x8e", # i with circumflex
	"\xc3\xaf" => "\xc3\x8f", # i with diaeresis
	"\xc3\xb0" => "\xc3\x90", # eth
	"\xc3\xb1" => "\xc3\x91", # n with tilde
	"\xc3\xb2" => "\xc3\x92", # o with grave
	"\xc3\xb3" => "\xc3\x93", # o with acute
	"\xc3\xb4" => "\xc3\x94", # o with circumflex
	"\xc3\xb5" => "\xc3\x95", # o with tilde
	"\xc3\xb6" => "\xc3\x96", # o with diaeresis
	"\xc3\xb7" => "\xc3\xb7",
	"\xc3\xb8" => "\xc3\x98", # o with stroke
	"\xc3\xb9" => "\xc3\x99", # u with grave
	"\xc3\xba" => "\xc3\x9a", # u with acute
	"\xc3\xbb" => "\xc3\x9b", # u with circumflex
	"\xc3\xbc" => "\xc3\x9c", # u with diaeresis
	"\xc3\xbd" => "\xc3\x9d", # y with acute
	"\xc3\xbe" => "\xc3\x9e", # thorn
	"\xc3\xbf" => "\xc2\x9f", # y with diaeresis
);

# Sort of change case
sub utf8_uc {
	tr/[a-z]/[A-Z]/;
	s{(\xc2[\x9a\x9c\x9e]|\xc3[\xa0-\xbf])}{$_utf8_uc{$1}}g;
}

sub utf8_split_uc {
	my $lower;
	return map {
		$lower && utf8_uc;
		$lower = not $lower;
		$_;
	} split /((?:[a-z]|\xc2[\x9a\x9c\x9e]|\xc3[\xa0-\xbf])+)/, $_[0];
}

my %_xml_safe_ents=('&'=>'amp','<'=>'lt','>'=>'gt',"'"=>'#39','"'=>'quot');
# above should go outside any loop
# &apos; is not recognized by IE

sub xml_safe {
	for (@_) {
		s/([&<>'"])/&$_xml_safe_ents{$1};/g;
	}
}

sub filename_safe {
	for (@_) {
		tr(/|\\<>*?:."')/-/s;
	}
}

sub uri_safe {
	for (@_) {
		s{([^/#a-zA-Z0-9\$-_\@.&!*"'(),+])}{sprintf('%%%X',ord $1)}xge;
	}
}

my %_html_block_elts = map(($_, undef), qw(HTML HEAD META TITLE STYLE P H1 H2 H3 H4 H5 H6 DIV HR TABLE TR TD UL OL LI));

sub tag_nl {
	my ($name) = @_;
	return exists $_html_block_elts{$name}? "\n": "";
}

sub open_tag {
	my ($element, $empty) = @_;
	my @tag = $element->{Name};

	if (exists $element->{Attributes}) {
		my %att = %{$element->{Attributes}};
		push @tag, map {
			my $value = $att{$_};
			xml_safe($value);	# Is it right to escape '& and <>' with HTML?
			HtmlUtils::utf8_to_latin1($value);
			qq($_="$value")
		} keys %att;
	}
	return "<@tag$empty>".tag_nl($element->{Name});
}

sub empty_tag { open_tag(@_, ' /') }
sub open_tags { map open_tag($_, ''), @_ }
sub close_tags { map("</$_->{Name}>".tag_nl($_->{Name}), reverse @_) }

1;
