#!/usr/bin/env perl

use strict;
use warnings;

use lib "$ENV{HOME}/sw_projects/doc-experiment/p5-Quiver/lib";
use FindBin qw($Bin);
use Path::Class;
use JSON::MaybeXS;
use Set::Scalar;
use UnderscoreJS;
use List::UtilsBy qw(sort_by);
use IO::String;
use Devel::Assert -all;
use List::AllUtils qw(first any);
use Regexp::Common qw(comment);



my $top = dir($Bin)->parent;
my $json_f = $top->file('doc.json');

my $src_dir = $top->subdir( qw(.. leptonica-1.70 src));
my $all_headers_f = $src_dir->file('allheaders.h');
my $leptonica_h_f = $top
	->subdir( qw(Image-Leptonica-0.03 lib Image Leptonica))
	#->subdir( qw(.. .build latest lib Image Leptonica))
	->file('leptonica.h');

my $json_data = decode_json( $json_f->slurp );

# Find all functions that are in all_headers.h.
my ($func_to_sig,$all_fn) = get_functions(~~ $all_headers_f->slurp);
# Find all functions in leptonica.h
my $lep_fn = get_functions(~~ $leptonica_h_f->slurp);
# Mark the difference as NOT IMPLEMENTED
my $not_implemented = $all_fn->difference( $lep_fn );

# only make docs for functions in leptonica.h
my $json_data_lep = [ grep { $lep_fn->has( $_->{function} ) } @$json_data ];
$json_data_lep = clean_up($json_data_lep);

# the number of functions in leptonica.h must match the number of functions in
# the comment data structure
assert( $lep_fn->size == ~~ @$json_data_lep );

# DEBUG: if the functions occur more than once {{{
## find all functions that occur more than once
#my $grouped_by_func = _->group_by($json_data_lep => sub { $_[0]->{function} } );
#my @multiple_entries = grep { @{ $grouped_by_func->{$_} } > 1 } keys %$grouped_by_func;
## there should not be any functions that occur more than once [same meaning as the previous test]
#assert( @multiple_entries == 0 );
#my @multiple_slice = @$grouped_by_func{ @multiple_entries };
#use DDP; p @multiple_slice;
# DEBUG }}}

# group the functions by their filename, sorted by name
my $grouped_by_file = _->group_by($json_data_lep => sub { $_[0]->{file} } );
my @files = sort keys %$grouped_by_file;

my $merge = {
	'utils.c' => [
		{ functions => [ qw( startTimerNested stopTimerNested ) ], merge => 1 },
		{ functions => [ qw( startTimer stopTimer ) ], merge => 1 },
		{ functions => [ qw( convertOnLittleEnd32 convertOnBigEnd32 ) ], merge => 1 }, # text?
		{ functions => [ qw( convertOnBigEnd16 convertOnLittleEnd16 ) ], merge => 1 }, # text?
	]
};

# collect all functions that are not documented properly while iterating
my @func_not_doc;
# mark some functions as "to document" (make a list) because some are not
# documented properly
my $not_documented = Set::Scalar->new(qw(
	boxChangeRefcount boxGetRefcount nextOnPixelInRasterLow dpixChangeRefcount
	dpixCopyResolution dpixGetData dpixGetRefcount dpixGetResolution dpixGetWpl
	dpixSetData dpixSetResolution dpixSetWpl fpixChangeRefcount fpixCopyResolution
	fpixGetData fpixGetRefcount fpixGetResolution fpixGetWpl fpixSetData
	fpixSetResolution fpixSetWpl fpixFlipLR pixChangeRefcount pixCopyInputFormat
	pixCopyResolution pixCopyText pixGetColormap pixGetDepth pixGetHeight
	pixGetInputFormat pixGetRefcount pixGetSpp pixGetWidth pixGetWpl pixGetXRes
	pixGetYRes pixScaleResolution pixSetDepth pixSetHeight pixSetInputFormat
	pixSetWidth pixSetWpl pixSetXRes pixSetYRes l_psWriteBoundingBox
	ptaChangeRefcount ptaGetRefcount rotateAMColorCornerLow rotateAMColorLow
	rotateAMGrayCornerLow rotateAMGrayLow l_chooseDisplayProg
));
# these are exceptions because they are auto-generated
my $header_exception = Set::Scalar->new(qw(
dwacomb.2.c
dwacomblow.2.c
fhmtgen.1.c
fhmtgenlow.1.c
fmorphgen.1.c
fmorphgenlow.1.c
));

for my $file (@files) {
	my $doc_output = IO::String->new;
	my $file_func = $grouped_by_file->{$file};
	my ($pm_file, $pm_info) = get_pm_info($file);

	# get the indexes out of each .c file
	my $c_file = $src_dir->file( $file );
	die unless -f $c_file;
	my $c_file_text = $c_file->slurp();
	my $header_idx;
	while($c_file_text =~ /$RE{comment}{'C++'}{-keep}/g) {
		$header_idx = $1;
		last if $header_idx !~ /Copyright/si;
	}
	die "header not found in $file: $header_idx"
		unless $header_idx =~ /\Q$file\E/s
			or $header_exception->has($file);
	# this would mean something has changed
	die "autogenerated weirdness in $file: $header_idx"
		if $header_exception->has($file)
			and $header_idx !~ /auto-generated/s;
	my $header_text = comment_inner_text( $header_idx );

	# print out top of file
	print $doc_output "package $pm_info;\n\n";
	print $doc_output "=head1 C<$file>\n\n";
	print $doc_output "$header_text\n\n";
	print $doc_output "=head1 FUNCTIONS\n\n";

	# print out POD for each function
	my $file_func_sort = [ sort_by { $_->{function} } @$file_func ];
	for my $func (@$file_func_sort) {
		my $text = comment_inner_text( $func->{comment} );

		# NOTE this can be more efficient
		my $in_merge = 0;
		my $merge_file = $merge->{$file};
		for my $func_info (@$merge_file) {
			if( any { $_ eq $func->{function} } @{$func_info->{functions}} ) {
				$in_merge = 1;
				last;
			}

		}
		next if $in_merge; # skipping these for now

		# need to make sure the comment begins with the function name
		if($text =~ /^(\n|\s|\*)*$func->{function}/s) {
			#use DDP; p $func;
			my $func_sig = $func_to_sig->{$func->{function}};
			die unless $func_sig;
			print $doc_output "=head2 $func->{function}\n\n";
			print $doc_output "$func_sig\n\n";
			print $doc_output $text;
			print $doc_output "\n\n";
		} else {
			push @func_not_doc, $func unless $in_merge;
			#print "====== something is wrong\n"
		}

	}
	print $doc_output "=cut\n\n";

	print $doc_output "1;\n";

	# Write out each set of functions to an appropriate .pm file
	$pm_file->spew( ${$doc_output->string_ref} );
}
assert( $not_documented->size == @func_not_doc );
#use DDP; p @func_not_doc;
#my $s = [ map { $_->{function} } @func_not_doc ]; use DDP; p $s;


sub get_functions {
	my ($code) = @_;
	my $text = $code;
	$text =~ s{  /\*   .*?    \*/}{}xgsm;
	my @list = $text =~ /^
		(             # wrap entire line to get signature (signature group)
		[^(]*?        # no parens
		(\w+)         # and identifier (identifier group)
		\s*           # whitespace [opt]
		\(            # begin argument list
		.*            # anything
		)             # end of (signature group)
		\s*;          # semicolon at end
		/xgm;
	my $func_to_signature = { reverse @list };
	s/^LEPT_DLL extern // for values %$func_to_signature; # get rid of prefix before return type
	($func_to_signature, Set::Scalar->new(keys %$func_to_signature) );
}

sub comment_inner_text {
	my ($comment) = @_;

	my $text = $comment;

	$text =~ s,
		^/\* # start of comment
		!?
		\s*  # whitespace [opt]
		\**  # any number of stars
		,,gsx;
	$text =~ s,
		\**  # any number of stars
		\s*  # whitespace [opt]
		\*/$ # end of comment
		,,gsx;
	$text =~ s,^\s*\*+,,gms;
	$text;
}

sub clean_up {
	my ($fn) = @_;
	# get rid of little-endian versions of same functions
	my $little_endian_f = Set::Scalar->new(qw(convertOnLittleEnd16 convertOnLittleEnd32 convertOnBigEnd16 convertOnBigEnd32));
	# if a function is in $little_endian_f and has L_LITTLE_ENDIAN in its comment, it will be removed
	my $cleaned = [ grep { not $little_endian_f->has($_->{function}) && $_->{comment} =~ /L_LITTLE_ENDIAN/ } @$fn ];

	# these functions have versions that are under a WIN32 ifdef but are
	# documented under Unix version too with a comment that begins with
	# "/*!"
	my $need_bang_comment = Set::Scalar->new(qw(startTimer getFilenamesInDirectory stopTimerNested l_getCurrentTime startTimerNested stopTimer));
	# if a function is in $need_bang_comment and does not have /*! in its comment, it will be removed
	$cleaned = [ grep { not $need_bang_comment->has($_->{function}) && $_->{comment} !~ m,/\*!, } @$cleaned ];

	# find pixWriteStreamWebP in webpio.c
	my $pixWriteStreamWebP = first { $_->{function} eq 'pixWriteStreamWebP' && $_->{file} eq 'webpio.c' } @$cleaned;
	assert( defined $pixWriteStreamWebP );
	$pixWriteStreamWebP->{comment} =~ s/\QpixWriteStreampWebP()\E/pixWriteStreamWebP()/; # spelling fix

	for my $captext (qw(sarrayGetRefcount sarrayChangeRefcount l_dnaChangeRefcount l_dnaGetRefcount numaChangeRefcount numaGetRefcount)) {
		my $cap_f = first { $_->{function} eq $captext } @$cleaned;
		assert( defined $cap_f );
		$cap_f->{comment} =~ s/\Q$captext()\E/$captext()/i; # capitalisation fix
	}

	$cleaned;
}

sub get_pm_info {
	my ($c_file_name) = @_;
	my $base = $c_file_name =~ s/\.[ch]$//r;
	$base =~ s/[.]//; # dot not allowed in identifier
	my @seg = qw(Image Leptonica Func);
	my $pm_file = $top->subdir('lib', @seg,)->file("$base.pm");
	$pm_file->dir->mkpath;
	$pm_file->touch;

	my $pm_name = join "::", (@seg, $base);
	($pm_file, $pm_name);
}


