#!/usr/bin/perl -w
# $Id: sman-update,v 1.24 2004/05/30 22:59:31 joshr Exp $
# man indexing SWISH-E prog 

# Copyright (c) Josh Rabinowitz 2004

# A descendent of the example in the Linux Journal 
# article "How To Index Anything".

use strict;
use warnings;
use bytes; # NOTE: swish-e won't understand UTF8 nor multi-byte chars 
use Getopt::Long qw(:config no_ignore_case); 
use Sman;	# for $VERSION
use Sman::Util;
use Sman::Config;
use Sman::Swishe;
use Sman::Man::Find;
use Sman::Man::Convert;
use Sman::Autoconfig;

#use Data::Dumper;
use vars qw( $swisheconfigfile );	 
	# if this is defined, we created this tmp file

BEGIN {	
	$swisheconfigfile = "";
	$|++;
	$ENV{PATH} = "/bin:/usr/bin:/usr/local/bin";
} 

my $configfile = "";
# these will overwrite corresponding config settings 
# if set by GetOptions() 

# if no $config, we try /etc/sman.conf, then /usr/local/etc/sman.conf, then
# $FindBin::Bin/sman.conf

#################################################
main();
#################################################

sub main {
	my $help = 0;
	my $verbose;
	my $dryrun = 0; 
	my $warn;
	my $debug;
	my $debugxml;
	my $rman = "";
	my $zcat = "";
	my $swishe = "";
	my $testfile = ""; 
	my $index; 
	my $showversion = ""; 
	my $clearcache = 0;

	GetOptions( "help"        => \$help, 
					"configfile"  => \$configfile,
					"n"           => \$dryrun,
					"dryrun"      => \$dryrun,
					"clearcache"      => \$clearcache,
					"verbose!"     => \$verbose,
					"VERSION"     => \$showversion,
					"warn!"        => \$warn,
					"debug!"       => \$debug,
					"debugxml!"    => \$debugxml,
					"index=s"     => \$index,
					"rman=s"      => \$rman,
					"zcat=s"      => \$zcat,
					"swishe=s"      => \$swishe,
					"testfile=s"  => \$testfile,	# run just this file through. 
	) || die "$0: Problem reading command line options.\n" . Usage(); 

	if ($help) {
		print Usage();
		exit(0);
	} 

	my $smanconfig = new Sman::Config(); 
	if ($configfile) { 
		#@configfiles = ($self->FindDefaultConfigFile(), $configfile);
		$smanconfig->ReadSingleConfigFile($configfile);
	} else {	# otherwise use all the configfiles we find (see FindConfigFiles())	
		$smanconfig->ReadDefaultConfigFile($verbose);
	}
	if ($showversion) {
		$|++;
		my $str = Sman::Util::GetVersionString(
			"sman-update",
			$smanconfig->GetConfigData("SWISHECMD") || 'swish-e');
		print "$str\n";
		exit(0);
	}

	# overwrite settings with command line values if present
	if ($rman) {    $smanconfig->SetConfigData( "RMANCMD",          $rman); }
	if ($zcat) {    $smanconfig->SetConfigData( "ZCATCMD",          $zcat); }
	if ($swishe) {    $smanconfig->SetConfigData( "SWISHECMD",      $swishe); }
	if (defined($verbose)) { $smanconfig->SetConfigData( "VERBOSE",          $verbose); }
	if (defined($warn)) {    $smanconfig->SetConfigData( "WARN",             $warn); }
	if (defined($debug)) {   $smanconfig->SetConfigData( "DEBUG",            $debug); }
	if (defined($debugxml)){ $smanconfig->SetConfigData( "DEBUGXML",         $debugxml); }
	if (defined($index)) {   $smanconfig->SetConfigData( "SWISHE_IndexFile", $index); }

	my (@files) = ($testfile || Sman::Man::Find::FindManFiles());

	if ($smanconfig->GetConfigData("MANCMD") =~ /(^AUTOCONFIG$)|(^$)/) {
		print "Autoconfiguring MANCMD...\n" if $smanconfig->GetConfigData("VERBOSE"); 
		my $newmancmd = Sman::Autoconfig::GetBestManCommand($smanconfig, \@files);
		print "MANCMD autoconfigured to '$newmancmd'\n" if $smanconfig->GetConfigData("VERBOSE");
		$smanconfig->SetConfigData("MANCMD", $newmancmd);
	}

	print $smanconfig->Dump() if $smanconfig->GetConfigData("VERBOSE");

	# set environment variables. Affects children from here forward.
	my @envs_set = $smanconfig->SetEnvironmentVariablesFromConfig();

	my $converter = new Sman::Man::Convert($smanconfig);
	if ($clearcache) {   
		print "Clearing Sman cache...\n";
		$converter->ClearCache();
		exit(0);
	}

	my $smanswishe = new Sman::Swishe($smanconfig);

	$swisheconfigfile = $smanswishe->WriteConfigFile();

	if ($verbose) {
		print "SWISHE CONFIG FILE:\n";
		print "=======================\n";
		print Sman::Util::ReadFile($swisheconfigfile);
		print "=======================\n";
	}


	my $swishecmd = $smanconfig->GetConfigData("SWISHECMD");
	my $cmd = "| $swishecmd -S prog -c $swisheconfigfile -i stdin";
	print "Running '$cmd'\n" if $debug;

	unless($dryrun) {
		open(SWISHE, $cmd) || die "couldn't open '$cmd'"; 
	}
	my $cnt = 0;
	print scalar @files, " man pages to index...\n" if $verbose; 
	for my $f (@files) {
		#warn "** processing $cnt\n" unless ++$cnt % 20;
		warn "** working on $f\n" if $debug;
		my ($type, $outputref) = 
			$converter->ConvertManfile($f); 
		#my $size = length $$outputref;	# hm. Could this be wrong?

		# next two lines are from swish-e 2.4.0's 'spider.pl':
		# 'ugly and maybe expensive, but perhaps more portable than "use bytes"'
		my $bytecount = length pack 'C0a*', $$outputref;

		unless($dryrun) {
			print SWISHE "Path-Name: $f\n",
				"Document-Type: $type\n",
				"Content-Length: $bytecount\n\n", $$outputref;
		}
		if($smanconfig->GetConfigData("DEBUGXML")) {
			print "**==== BEGIN XML of $f =========\n" . 
					$$outputref .
					" **====  END  XML of $f =========\n\n";
		}
	} 
	unless ($dryrun) {
		close(SWISHE) || die "Failure closing pipe to $swishe";
	}
	# note that the swisheconfig file is in Sman::Swishe for now
	#unlink($swisheconfigfile) || warn "Couldn't delete $swisheconfigfile: $!";
	$swisheconfigfile = ""; 
}
sub Usage {
	return "sman-update: [--help] [--config=s] [--rman=s] [--zcat=s] [--col=s]\n" .
			 "             [--(no)verbose] [--(no)warn] [--(no)debug] [--index=s]\n".
			 "             [--clearcache]\n".
			 "Builds index for sman.\n" .  
			 "  --config=/file/sman.conf     config file to read\n" .
			 "  --man='/path/to/man -opt'    path to prog like 'man'\n" . 
			 "  --zcat='/path/to/zcat -f'    path to prog like 'zcat -f'\n" .
			 "  --col='/path/to/col -b'      path to prog like 'col -b'\n" .
			 "  --rman='/path/to/rman -opt'  path to prog like 'rman -f XML'\n" .
			 "  --verbose/--noverbose        verbosity, default off\n" . 
			 "  --warn   /--nowarn           warnings from children, default off\n" . 
			 "  --debug  /--nodebug          debug output, default off\n" . 
			 "  --clearcache                 clear the cache of converted pages\n" . 
			 "  --testfile                   just one file, for testing\n" . 
			 "  --dryrun  (or -n)            don't write anything to the index.\n" .
			 "  --help: this text. For more info, see 'perldoc sman-update'\n";
	
}
END {	
	if ($swisheconfigfile && -e $swisheconfigfile) {
		unlink($swisheconfigfile) || warn "Couldn't delete $swisheconfigfile: $!";
		$swisheconfigfile = "";
	}
}

__END__

=head1 NAME

  sman - Perl tool for searching man pages

=head1 SYNOPSIS

  % sman-update --conf=/my/dir/sman.conf --verbose

or just

  % sman-update

=head1 ABSTRACT

 sman-update: [--help] [--config=s] [--rman=s] [--zcat=s] [--col=s]
              [--(no)verbose] [--(no)warn] [--(no)debug] [--index=s]
              [--clearcache]

 Builds index for sman.
  --config=/file/sman.conf     config file to read
  --man='/path/to/man -opt'    path to prog like 'man'
  --zcat='/path/to/zcat -f'    path to prog like 'zcat -f'
  --col='/path/to/col -b'      path to prog like 'col -b'
  --rman='/path/to/rman -opt'  path to prog like 'rman -f XML'
  --verbose/--noverbose        verbosity, default off
  --warn   /--nowarn           warnings from children, default off
  --debug  /--nodebug          debug output, default off
  --clearcache                 clear the cache of converted pages 
  --testfile                   just one file, for testing
  --dryrun  (or -n)            don't write anything to the index.
  --help: this text. For more info, see 'perldoc sman-update'

=head1 DESCRIPTION

Sman-update creates the index for Sman, the Searcher for Man pages. 
By default the index is stored in /var/lib/sman.

Sman-update should be run periodically to keep your sman index in sync
with your system's man pages.

Both sman and sman-update search for the first configuration file 
named sman.conf in /etc, /usr/local/etc/, $HOME, and the directory 
with sman. If no sman.conf file is found, (or specified through 
sman or sman-update's -conf option), then the default configuration in 
/usr/local/etc/sman-defaults.conf will be used.

In all cases command line options take precendence over directives read from
configuration files.

=head1 AUTHOR

Josh Rabinowitz

=head1 SEE ALSO

L<sman>, L<sman-update>, L<sman.conf>

=cut

