#!/usr/bin/perl
# $Id: $
# Written by Adrian Mariano, additional features by Eric Backus and
# Jeff Conrad.

# Script to translate a texinfo file into an nroff/troff manual page.
# last revision: 19 May 2020 Jeff Conrad

$thisprog = $0;
$thisprog =~ s/.*[\/\\]//;
$version="1.2.4";

$html=0;
$example=0;
$ignore=0;
$tex=0;
$doman=0;
$title=0;
$diditem=0;
$justdidlp=1;
$noman=0;
$manprefix="";
$args=($#ARGV < 0) ? "stdin" : "@ARGV";

printf(".\\\"Do not edit this file.  It was created from %s\n", $args);
printf(".\\\"using %s version %s on %s", $thisprog, $version, `date`);

while(<>)
{
    # use font CW in tables
    if (/\@c man\s+l\s/)
    {
	s/\@c man //;
	s/l/lfCWp-1/;
	print;
	next;
    }
    if (/\@c\s+man\s+program/) { 
	chop;
	s/\@c\s+man\s+program\s+//;
	$program = $_;
	next;
    }
    if (s/\@c man //)
    {
	print;
	if (/AUTHOR/) {
	    $errors = 0;
	    if (! $author) {
		printf(STDERR "%s: missing '\@author'\n", $thisprog);
		$errors++;
	    }
	    if (! $program) {
		printf(STDERR "%s: missing '\@c man program'\n", $thisprog);
		$errors++;
	    }
	    if ($errors) { exit; }
	    else { printf(".I %s\nwas written by %s\n", $program, $author); }
	}
	if (/\.TH/) { add_extensions(); }
	next;
    }
    if (/\@c noman/) { $noman=1; next; }
    if (/\@c end noman/) { $noman=0; next; }
    if ($noman) { next; }

    if (/\@c ifman\s*(.*)/) { $doman=1; $manprefix = $1; next; }
    if (/\@c end ifman/) { $doman=0; $manprefix = ""; next; }

    if (/^\@c [^m]/) { next; }

    if (/^\\input/) { next; }
    if (/^\*/) { next; }
    if (/^START-INFO-DIR-ENTRY/) { next; }
    if (/^END-INFO-DIR-ENTRY/) { next; }

    if (/\@author/) {
	chop;
	s/\@author\s+//;
	$author = $_;
	next;
    }
    if (/\@titlepage/) { $title=1; next; }
    if (/\@end titlepage/) { $title=0; next; }
    if (/\@tex/) { $tex=1; next; }
    if (/\@end tex/) { $tex=0; next; }
    if (/\@ignore/) { $ignore=1; next; }
    if (/\@end ignore/) { $ignore=0; next; }
    if (/\@ifhtml/) { $html=1; next; }
    if (/\@end ifhtml/) { $html=0; next; }
    if (/\@html/) { $html=1; next; }
    if (/\@end html/) { $html=0; next; }
    if (!$doman && ($ignore || $html || $title || $tex)) { next; }

    s/\@\*$/\n\.br/g;
    s/^\@\*/.br/g;
    s/\@\*$/\n.br/g;
    s/\@ / /g;
    s/\@dmn\{([^}]*)}/\\|$1/g;
    s/\@tie\{}/\@no_break_space\{}/g;
    s/\@w\{}/\@no_break_space\{}/g;
    s/\@backslashchar\{}/\\e/g;

    # opening and closing double quotes
    s/``(\S)/\\(lq$1/g;
    s/(\S)''/$1\\(rq/g;

    # ellipsis
    s/\@dots\{}/.../g;

    s/\@cite\{([^}]*)}/\@in_sgl_quotes\{$1}/g;
    s/\@url\{([^}]*)}/\@in_sgl_quotes\{$1}/g;
    s/\@email\{([^}]*)}/\@in_sgl_quotes\{$1}/g;

    s/\@dfn\{([^}]*)}/\@in_italics\{$1}/g;

    s/\@emph\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@i\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@r\{([^}]*)}/\@in_roman\{$1}/g;
    s/\@var\{([^}]*)}/\@in_italics\{$1}/g;

    s/\@b\{([^}]*)}/\@in_bold\{$1}/g;
    s/\@strong\{([^}]*)}/\@in_bold\{$1}/g;

    # remove trailing comma from xref because man won't include the page number
    s/\@xref\{([^}]*)},/\@xref\{$1}/g;
    s/\@xref\{([^}]*)}/See \@in_italics\{$1}/g;
    s/\@ref\{([^}]*)}/\@ref\{$1}/g;
    s/\@ref\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@pxref\{([^}]*)}/see \@in_italics\{$1}/g;
    s/\@uref\{([^}]*)}/\@in_roman\{$1}/g;

    if (/\@chapter.*\@command/)
    {
	s/\@command\{([^}]*)}/\@in_italics\{$1}/g;
    }

    # show in constant-width font
    s/\@code\{([^}]*)}/\@constwid\{$1}/g;
    s/\@command\{([^}]*)}/\@constwid\{$1}/g;
    s/\@env\{([^}]*)}/\@constwid\{$1}/g;

    # show in constant-width oblique font
    s/\@kbd\{([^}]*)}/\@constwidI\{$1}/g;

    # handle backslash character in Windows pathname
    # starts with a drive specifier ...
    s/(\@file\}]*)}/\@constwidQ\{$1}/g;
    if (/(\@file\{[[:alpha:]]:)/) {
	# don't change font switches or escaped spaces
	s/(\S)\\(?!(\s|f[RIBP]|f\([A-Z]{2}))/$1\\e/g;
    }
    # handle backslash character in sample
    s/(\@samp\{[^}]*)\\/$1\\e/g;

    # prevent double hyphens in options from being converted to en dashes
    s/(\@option\{)--/$1-\\&-/g;

    # show in constant-width font with single quotes
    s/\@file\{([^}]*)}/\@constwidQ\{$1}/g;
    s/\@option\{([^}]*)}/\@constwidQ\{$1}/g;
    s/\@samp\{([^}]*)}/\@constwidQ\{$1}/g;

    s/\@sc\{([^}]*)}/\@to_upper\{$1}/g;

    s/\@key\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@footnote\{([^}]*)}/\@in_square_br\{$1}/g;

    s/\@math\{([^}]*)}/\@no_decoration\{$1}/g;

    if (/\@w\{([^}]*)}/) {
	s/\@w\{([^}]*)}/\@no_break_word\{$1}/g;
    }

    # leave minus (dash) lists so they can be recognized later
    if (! /^\@itemize/) { s/\@minus\{}/\\-/g; }
    s/\@copyright\{}/\\(co/g;
    s/\@noindent//;
    s/\@\{/{/g;
    s/\@}/}/g;
    s/\@\@/@/g;
    s/---/\\(em/g;

    # FIXME?
    # assume en dashes will be closed up to previous word
    #s/([^" ]+?)--/$1\\(en/g;
    s/(\w+)--(\w)/$1\\(en$2/g;
    # allowable line break escape: groff only?
    s/\@\//\\:/g;
    s/^\@raggedright/.na/;
    s/^\@end raggedright/.ad b/;

    s/\@in_sgl_quotes\{([^}]+)}/`$1'/g;
    s/\@in_dbl_quotes\{([^}]+)}/\"$1\"/g;
    s/\@in_italics\{([^}]+)}/\\fI$1\\fP/g;
    s/\@in_roman\{([^}]+)}/\\fR$1\\fP/g;
    s/\@in_bold\{([^}]+)}/\\fB$1\\fP/g;
    s/\@to_upper\{([^}]*)}/\U$1\E/g;
    s/\@no_decoration\{([^}]*)}/$1/g;
    if (/\@no_break_word\{([^}]+)}(\S*)/) {
	$_ = no_break_word("$_", '@no_break_word');
    }
    s/\@no_break_space\{}/\\ /g;
    s/\@[ ]/ /g;
    s/\@in_angle_br\{([^}]*)}/<$1>/g;
    s/\@in_square_br\{([^}]*)}/[$1]/g;

    # convert constwid[IQ]* to inline troff escapes
    s/\@constwid\{([^}]*)}/\\%\\f(CW$1\\fR/g;
    s/\@constwidI\{([^}]*)}/\\%\\f(CI$1\\fR/g;
    s/\@constwidQ\{([^}]*)}/\\%\\(oq\\f(CW$1\\fR\\(cq/g;

    if (/\@set codequotebacktick|\@codequotebacktick on/) {
      printf(".if n .tr `\\`\n");
      next;
    }
    if (/\@clear codequotebacktick|\@codequotebacktick off/) {
      printf(".ie .if '\*[.T]'utf8' .tr `\\(oq\n");
      printf(".el .if n .tr `'\n");
      printf(".tr '\\(cq\n");
      next;
    }

    if (/\@set codequoteundirected|\@codequoteundirected on/) {
      printf(".tr '\\(aq\n");
      next;
    }
    if (/\@clear codequoteundirected|\@codequoteundirected off/) {
      printf(".tr '\\(cq\n");
      next;
    }

    s/\@value\{([^\s]+)}/$value{$1}/eg;
    if (/\@set\s+([^\s]+)\s+(.*)$/) { $value{$1} = $2; next; }
    if (/\@clear\s+([^\s]+)\s+(.*)$/) { delete $value{$1}; next; }

    # tables of command-line options as used in units(1)
    if (/\@table (.*)/) { $intable = 1; next; }
    if (/\@end  *table/)
    {
	$intable = 0;
	if ($in_taggedlist == 1) { $in_taggedlist = 0; }
	next;
    }
    if ($intable == 1)
    {
	if (/\@itemx (.*)/)
	{
	    $samp = $1;
	    # add hair space to visually separate the hyphens in roman type
	    $samp =~ s/--/\\-\\^\\-/;
	    $samp =~ s/-([[:alnum:]])/-\\^$1/;
	    if (!$diditem)
		{ printf(".TP\n.BR \"$samp\""); }
	    else
		{ printf(" \", \" \"$samp\""); }
	    $diditem=1;
	    $new_paragraph = "";
	    next;
	}
	elsif ($diditem) { printf("\n"); $diditem=0; }
	if (/\@item (.*)/)
	{
	    $in_taggedlist = 1;
	    $samp = $1;
	    # add hair space to visually separate the hyphens in roman type
	    $samp =~ s/--/\\-\\^\\-/;
	    $samp =~ s/-([[:alnum:]])/\\-\\^$1/;
	    printf("%s.TP\n%s.BR \"$samp\"", $manprefix, $manprefix);
	    $diditem=1;
	    $new_paragraph = "";
	    next;
	}
    }
    # output a paragraph macro unless already done with a TP macro above
    if ($new_paragraph)
    {
	printf("%s\n", $new_paragraph);
	$justdidlp = 1;
	$new_paragraph = "";
    }

    # unordered list: bullet or minus
    if (/^\@itemize *$/ || /^\@itemize +@(bullet|minus)(\{})?/)
    {
	if ($1 =~ "minus") { $listmark = "\\-"; }
	else { $listmark = "\\(bu"; }
	$in_ulist = 1;
	next;
    }
    if ($in_ulist == 1 && /^\@end +itemize/) { $in_ulist = 0; next; }
    if ($in_ulist == 1)
    {
	if (/^\@item *$/) { printf("%s.IP \\h'1n'%s 4n\n", $manprefix, $listmark); }

    }

    if (s/\@chapter (.*)/.SH \U$1\E/)
    {
	if (/GNU FREE DOCUMENTATION/) { next; }
	# restore proper case on font switches
	s/\\FR/\\fR/g;
	s/\\FI/\\f(BI/g;	# chapter headings (SH in man) are bold
	s/\\FP/\\fP/g;
	printf("%s%s", $manprefix, $_);
	$justdidlp=1;
	next;
    }

    if (s/\@section (.*)/$1/)
    {
	printf("%s.SS %s", $manprefix, $_);
	next;
    }

    # FIXME? why do we need $manprefix for these?
    # input/output example macros
    if (/\@example/) { printf("%s.EX\n", $manprefix); $example=1; next; }
    if (/\@end example/) { printf("%s.EE\n", $manprefix); $example=0; $justdidlp=0; next; }

    if (/\@smallexample/) { printf("%s.EX\n", $manprefix); $example=1; next; }
    if (/\@end smallexample/) { printf("%s.EE\n", $manprefix); $example=0; $justdidlp=0; next; }

    # no CW font
    if (/\@display/) { printf("%s.RS 5n\n", $manprefix, $manprefix); $example=1; next; }
    if (/\@end display/) { printf("%s.RE\n", $manprefix, $manprefix); $example=0; next; }

    # no CW font, no indent
    if (/\@format/) { printf("%s.nf\n", $manprefix); $example=1; next; }
    if (/\@end format/) { printf("%s.fi\n", $manprefix); $example=0; next; }


    if ($example) { s/\\\s*$/\\e\n/ };

    # blank line: new paragraph; don't output until we see if @item or @itemx follows
    if (!$example && /^\s*$/ && !$doman)
    {
	if ($justdidlp) { next; }
	if ($in_taggedlist == 1)
	{
	    $new_paragraph = ".IP";
	}
	else
	{
	    $new_paragraph = ".PP";
	}
	next;
    }

    if (/^\@/) { next; }

    printf("%s%s", $manprefix, $_);

    if (!$doman) { $justdidlp=0; }
}

# Override a few default groff man settings. groff loads the man macro file
# after the call of TH, so these settings must likewise follow that call
# of TH.

sub add_extensions
{
    printf(".\\\"\n");
    printf(".\\\"------------------------------------------------------------------------\n");
    printf(".\\\" ensure that ASCII circumflex U+005E (^) and tilde U+007E (~)\n");
    printf(".\\\" are not remapped, so that example text can be copied and pasted\n");
    printf(".tr ^\\(ha\n");
    printf(".tr ~\\(ti\n");
    printf(".\\\" override translation in troffrc\n");
    printf(".ie .if '\\*[.T]'utf8' .tr `\\(oq'\\(cq\n");
    printf(".\\\" override mapping of ` to 60h with Tascii; assume\n");
    printf(".\\\" we don't need a backquote for an example\n");
    printf(".el .if n .tr `'\n");

    # bullet: use '*' rather than 'o' for ASCII/Latin1; override groff's
    # translation to MIDDLE DOT for others
    printf(".if n .tr \\(bu\*\n");
    printf(".\\\" override translation to MIDDLE DOT\n");
    printf(".if '\\*(.T'utf8' .tr \\(bu\\(bu\n");
    printf(".if '\\*(.T'cp1252' .tr \\(bu\\(bu\n");
    printf(".if '\\*(.T'ansi' .tr \\(bu\\(bu\n");
    printf(".\\\"------------------------------------------------------------------------\n");
    printf(".\\\"\n");
}

# convert all spaces within @w{...} to unbreakable
sub no_break_word
{
    my $line = shift;
    my $pattern = (shift) . "\{";
    my $len = length($pattern);
    my $ndx = -1;
    my $bracelevel = 0;
    my $char;

    while (($ndx = index($line, $pattern, $ndx)) > -1) {
	# get rid of the @ command and opening brace
	substr($line, $ndx, $len, '');
	$bracelevel = 1;
	while ($bracelevel > 0) {
	    $char = substr($line, $ndx, 1);
	    # end of line and braces not closed
	    if ($char eq "") {
		last;
	    }
	    elsif ($char eq '{') {
		$bracelevel++;
	    }
	    elsif ($char eq '}') {
		$bracelevel--;
	    }
	    # make spaces nonbreaking
	    if ($char eq ' ') {
		substr($line, $ndx++, 1, '\ ');
		$ndx++;
		# assume multiple spaces are not wanted
		while (substr($line, $ndx, 1) eq ' ') {
		    substr($line, $ndx, 1, '');
		}
	    }
	    $ndx++;
	}
	# get rid of the closing brace for the @ command. This should
	# always be true unless there's an internal brace mismatch ...
	if (substr($line, $ndx - 1, 1) eq '}' ) {
	    substr($line, $ndx - 1, 1, '');
	}
	else {
	    die "Missing closing '}'";
	}
    }

    return $line;
}
