#!/bin/sh
#
# dwww-convert -- convert docs to HTML
#
# Simple usage: $0 <type> <location>
# 	<type> is document type: text, man, html, ps, and so on
#	<location> is full pathname to original document
#
# In future versions, the converted HTML will be stored into a cache.
#
# Part of the Debian dwww package.  Written by Lars Wirzenius.
# "@(#)dwww:$Id: dwww-convert,v 1.2 1996/09/24 15:24:41 liw Exp $"


#
# Setup defaults and read in the configuration file.
#

DWWW_DOCPATH="/usr/doc:/usr/info:/usr/man:/usr/X11R6/man:/usr/local/man"
DWWW_HTMLDIR="/var/lib/dwww"
DWWW_MAN2HTML=builtin_man2html
DWWW_INFO2HTML=builtin_info2html
DWWW_DIR2HTML=builtin_dir2html
DWWW_TEXT2HTML=builtin_text2html
DWWW_COMPRESS=yes

if [ -f /etc/dwww.conf ]
then
	. /etc/dwww.conf
fi

#
# Setup other variables.
#

PATH="/usr/sbin:/usr/bin:/bin"

if test "$DWWW_COMPRESS" = yes
then
	COMPRESS="gzip -9"
else
	COMPRESS="cat"
fi

#######################################################################
#
# Utility function
#


#
# Deduce the type of a file based on its name.
#
guess_type() {
	case "$1" in
	*.html)		echo "html" ;;
	*.html.gz)	echo "html" ;;
	*)		echo "text" ;;
	esac
}


#
# Are we allowed to show this file?
#
# Note: getting this check wrong compromises security.
#
badfile() {
	d="$1"
	for i in `echo $DWWW_DOCPATH | tr : ' '`
	do
		j="`cd $i; /bin/pwd`"
		case "$d/" in
		"$j"/*) return 1 ;;
		esac
	done
	return 0
}


#
# Remove headers and footers from a formatted manual page.  
# Input from stdin.
#
remove_headers_and_footers() {
	nawk 'BEGIN { pagelen = 66 }
	END { printpage() }
	pagelen > 0 && n == pagelen { printpage(); n = 0 }
	/^[ \t]*$/ { line[n++] = ""; next }
	{ line[n++] = $0 }
	function printpage() {
		if (headerlen == 0) {
			for (i = 0; i < n && line[i] == ""; ++i);
			if (i < n-1 && line[i+1] == "") ++i
			for (; i < n && line[i] == ""; ++i);
			headerlen = i
			
			for (i = n; i > headerlen && line[i-1] == ""; --i);
			if (i > headerlen && line[i-2] == "") --i
			for (; i > headerlen && line[i-1] == ""; --i);
			footerstart = i
		}
		
		for (i = headerlen; i < footerstart; ++i) print line[i]
	}
	'
}


#
# Convert a manual page reference ("name/section") to a path.
#
manref2path() {
	name="`echo \"$1\" | sed 's/\/.*//'`"
	section="`echo \"$1\" | sed 's/.*\///'`"
	file="`man --location \"$section\" \"$name\" | sed 's/ .*//'`"
	echo "`realpath $file`"
}


#
# Convert text to HTML; input from stdin.  The output can be
# embedded in the body (no headers are output).
#
text2html_nohead() {
	# BUG: this doesn't handle boldface and italics correctly
	# I think it may require a bit more sed than is comfortable
	sed 's/&/\&amp;/g;s/</\&lt;/g;s/>/\&gt;/g;
		s:\(.\)\(\1\):<b>\2\</b>:g;
		s:_\(.\):<i>\1\</i>:g;
		s:</\([ib]\)><\1>::g'
}


#
# Convert a reference to an Info file to a complete path.
#
inforef2html() {
	f="$1"
	case "$f" in
	/*) ;;
	*) f="/usr/info/$f" ;;
	esac
	if test ! -r "$f"
	then
		f="$f.info"
	fi
	if test ! -r "$f"
	then
		f="$f.gz"
	fi
	echo "$f"
}


#######################################################################
#
# Builtin converters
#


#
# Create a directory listing in HTML.
#
builtin_dir2html() {
	if [ -f "$1/index.html" ]
	then
		cat "$1/index.html"
		return 0
	fi
	
	echo "<html><head><title>Files in $1</title></head><body>"
	echo "<h1>Files in $1</h1>"

	find "$1" -type f -follow -maxdepth 1 | 
	sed "s#^$1/##" | sort |
	awk -v dir="$1" '
	/.\.html$/ {
		printf "<a href=\"file://localhost%s/%s\">%s</a>\n", dir, $1, $1
		next
	}
	{
		printf "<a href=\"/cgi-bin/dwww?type=file&location=%s/%s\">%s</a>\n",
			dir, $1, $1
	}'

	if  find "$1/." -type d -maxdepth 1 ! -name . ! -name .. | grep . > /dev/null
	then
		echo "<p><h2>Subdirectories:</h2>"
		find "$1/." -type d -maxdepth 1 ! -name . ! -name .. |
		while read i
		do
			j=`echo "$i" | sed "s#^$1/\./##"`
			if [ -f "$i/index.html" ]
			then
				echo "<a href=\"file://localhost$i/index.html\">$j</a>"
			else
				echo "$j" |
				sed "s#.*#<a href=\"/cgi-bin/dwww?type=dir\&location=$1/&\">&</a>#"
			fi
		done
	fi
	
	echo "<hr>Created automatically: `date`</body></html>"
}


#
# Convert a manual page source code file to HTML.
#
builtin_man2html() {
	echo "<html><head><title>$1</title></head><body><pre>"
	(cd "`dirname \"$1\"`"/..; man -P/bin/cat -l "$1") | 
	remove_headers_and_footers |
	text2html_nohead |
	sed 's#\([-a-zA-Z_0-9]\+\)[(]\([a-zA-Z0-9]*\)[)]#<a href="/cgi-bin/dwww?type=runman\&location=\1/\2\">&</a>#g'
	echo "</pre></body></html>"
}


#
# Convert plain text to HTML.  This is really trivial, and buggy.
# Input from stdin.
#
builtin_text2html() {
	echo "<html><head><title>$1</title></head><body><pre>"
	if [ "$2" = "--stdin" ]
	then
		zcat -f | text2html_nohead
	else
		zcat -f "$1" | text2html_nohead
	fi
	echo "</pre></body></html>"
}



#
# Convert an Info file to HTML.
#
builtin_info2html() {
	echo "<html><head><title>$1</title></head><body><pre>"
	info -f "$2" | text2html_nohead
	echo "</pre></body></html>"
}



################################################################
#
# Main program
#


if [ "$1" = "" -o "$2" = "" ]
then
	echo "Error: invalid arguments" 1>&2
	echo "Usage: $0 <type> <location>" 1>&2
	exit 1
fi

type="$1"
file="$2"

if test "$type" = file
then
	type="`guess_type $file`"
fi

case "$type" in
	man)	converter=$DWWW_MAN2HTML
		;;
	runman)	converter=$DWWW_MAN2HTML
		type=man
		file="`manref2path \"$2\"`" 
		;;
	info)	converter=$DWWW_INFO2HTML
		file="`inforef2path \"$2\"`" 
		;;
	dir)	converter=$DWWW_DIR2HTML
		;;
	*)	converter=$DWWW_TEXT2HTML
		;;
esac

file="`realpath $file`"

if badfile "$file"
then
	echo "<html><head><title>Access denied</title></head><body>"
	echo "<h1>Access denied.</h1>dwww will not allow you to read"
	echo "file $2</body></html>"
	exit 1
fi

if dwww-cache --list "$type" "$file" > /dev/null
then
	dwww-cache --lookup "$type" "$file" | zcat -f
	exit 0
fi

if test "$type" = html
then
	zcat -f "$file"
	exit
fi

$converter "$file" | $COMPRESS | dwww-cache --store "$type" "$file"

dwww-cache --lookup "$type" "$file" | zcat -f
