package Zim::Repository::Files;

use strict;
use vars qw/$CODESET/;
use POSIX qw(strftime);
use Encode;
use File::Spec;
use File::MimeInfo;
use Zim::File;
use Zim::Repository::Base;
use Zim::Page::Text;

our $VERSION = '0.17';
our @ISA = qw/Zim::Repository::Base/;

my $case_tolerant_fs = File::Spec->case_tolerant();

*CODESET = \$Zim::CODESET;
$CODESET ||= 'utf8';

=head1 NAME

Zim::Repository::Files - A file system based repository

=head1 DESCRIPTION

This module implements a file system based repository for zim.
See L<Zim::Repository> for the interface documentation.

=head1 METHODS

=over 4

=item C<new(NAMESPACE, DIR)>

Simple constructor. DIR is the root directory of the repository.
NAMESPACE is the namespace that maps to that directory.

=cut

sub init { # called by new
	my ($self, $dir, $format) = @_;
	
	$dir = Zim::File->abs_path($dir);
	$self->{dir} = $dir;
	$self->{format} = $format || 'wiki';
	$self->{ext} = ($self->{format} eq 'wiki') ? 'txt' : 'html';
		# FIXME HACK FIXME - this belongs in a Formats.pm
	
	$self->{cache} = Zim::File->new($dir, '.zim.cache');
	
	# Check version of cache
	if (-w $self->{cache}->dir) {
		my $line = '';
		if ($self->{cache}->exists) {
			my $fh = $self->{cache}->open();
			$line = <$fh>;
			$fh->close;
		}
		$self->{cache}->write("zim: version $VERSION\n")
			unless $line =~ m/zim: version $VERSION/;
	}
	
	return $self;
}

=item C<has_pages(NAMESPACE)>

Returns boolean whether NAMESPACE exists.

=item C<list_pages(NAMESPACE)>

Returns a list of pages. If possible it uses a cache.

=cut

# Directory structure:
#
# a.txt
# a/b.txt
# a/c.txt
# a/c/d.txt
#
# Page structure:
#
# a
# |__ b
# |__ c
#     |_d
#
# Cache:
#
# a: mtime /
# a:b mtime > links
# a:c: mtime /
# a:c: mtime > links
# a:c:d mtime > links

sub has_pages {
	my ($self, $namespace) = @_;

	my $dir = $self->dir($namespace);
	return -d $dir;
}

sub list_pages {
	my ($self, $namespace) = @_;
	
	my $dir = $self->dir($namespace);
	return () unless -d $dir;

	my $mtime = (stat $dir)[9];
	my ($cache_mtime, @pages);
	for ($self->{cache}->read) {
		/^\Q$namespace\E(?:([^:\s]+:?) \d+ >| (\d+) \/)/ or next;
		if (defined $1 and length $1) { push @pages, $1 }
		else { # namespace itself - check index time
			$cache_mtime = $2;
			#warn "Found cache mtime $cache_mtime for $namespace (mtime is $mtime)\n";
			return $self->_cache_dir($namespace, $dir)
				unless $cache_mtime == $mtime ;
		}
	}
	#warn "Did not find cache mtime for $namespace\n" unless $cache_mtime;
	return $self->_cache_dir($namespace, $dir) unless $cache_mtime;
	return @pages;
}

sub _flush_cache {
	my $self = shift;
	$self->{cache}->remove if $self->{cache}->exists;
}

sub _cache_dir {
	my ($self, $namespace, $dir) = @_;
	warn "# Indexing $namespace\n";
	
	my @pages =
		grep defined($_),
		map {
			my $item = "$dir/$_";
			s/[^:\w\.\-\(\)]/_/g;
			(-d $item)		? [$_.':' => $item] :
			(s/\.$$self{ext}$//)	? [$_ => $item]     : undef ;
		} 
		grep /^\w/, Zim::File->list_dir($dir);
	#use Data::Dumper; warn Dumper \@pages;
	
	@pages = sort {lc($$a[0]) cmp lc($$b[0])} @pages;
	for (0 .. $#pages-1) { # cut doubles due to directories
		$pages[$_] = undef if $pages[$_+1][0] eq $pages[$_][0].':' ;
	}
	@pages = grep defined($_), @pages;
	#use Data::Dumper; warn Dumper \@pages;

	return map {$$_[0]} @pages if $self->{parent}{config}{read_only};
	
	my %items = ();
	my $index = '';
	for ($self->{cache}->read) {
		if (/^\Q$namespace\E(?:([^:\s]+:?) \d+ >| (\d+) \/)/) {
			$items{$1} = $_ if defined $1 and length $1;
			#warn "Item: $_\n";
		}
		else { $index .= $_ }
	}
	#use Data::Dumper; warn Dumper \%items;

	$index .= $namespace.' '.(stat $dir)[9]." /\n"; # cache mtime
	for my $p (@pages) {
		my ($name, $file) = @$p;
		#warn "Page: >>$$p[1]<< >>$$p[0]<<\n";
		if (exists $items{$name}) {
			$items{$name} =~ / (\d+) /;
			if ($1 == (stat $file)[9]) {
				$index .= $items{$name};
				next;
			}
		}
		#warn "Indexing page: $namespace$name\n";
		$index .= $self->_cache_string($namespace.$name);
	}
	$self->{cache}->write( $index );
	
	return map {$$_[0]} @pages;
}

sub _cache_page {
	my ($self, $page) = @_;
	my $name = $page->name;
	my ($index, $is_dir);
	for ($self->{cache}->read) {
		if (/^\Q$name\E(:?) \d+ >/) { $is_dir = $1 }
		else { $index .= $_ }
	}
	$self->{cache}->write($index, $self->_cache_string($page, $is_dir))
}

sub _cache_string {
	my ($self, $page, $is_dir) = @_;
	unless (ref $page) {
		$is_dir ||= ($page =~ /:$/);
		$page = $self->get_page($page);
	}
	my $mtime = (stat $page->{source}->path)[9] || '0';
	my @links = eval{ $page->_list_links };
	my $key = $page->name;
	$key .= ':' if $is_dir;
	return $key . ' ' . $mtime . ' > ' . join(' ', @links) . "\n" ;
}

sub _list_backlinks {
	my ($self, $page) = @_;
	my $name = $page->name;
	my @links;
	for ($self->{cache}->read) {
		/^(:\S+) \d+ >.*?\s\Q$name\E\s/ or next;
		my $l = $1;
		$l =~ s/:$//;
		push @links, $l;
	}
	return @links;
}

sub _search { # query is a hash ref with options etc
	my ($self, $query, $callback, $ns) = @_;
	$ns ||= $self->{namespace};
	warn "# Searching: $ns\n";
	
	my $reg = $$query{regex};
	unless ($reg) {
		$reg = quotemeta $$query{string};
		$reg = "\\b".$reg."\\b" if $$query{word};
		$reg = "(?i)".$reg unless $$query{case};
		$reg = qr/$reg/;
		#warn $reg;
		$$query{regex} = $reg;
	}
	
	for ($self->list_pages($ns)) {
		my $p = $ns.$_;
		my $is_dir = ($p =~ s/:$//);
		my $match = $self->file($p)->grep($reg);
		#warn "$1 matches $match times\n" if $match;
		$callback->($match ? [$p, $match] : ());
		$self->_search($query, $callback, $p.':') if $is_dir; # recurs
	}
}

sub _match_word {
	my ($self, $page, $word) = @_;
	my $namespace = $page->namespace;
	$word =~ s/[^\w\.\:\-]/_/g;
	my $seen = 0;
	#warn "looking up \"$word\" in $namespace\n";
	for ($self->{cache}->read) {
		next unless /^\s*\Q$namespace\E(?i)\Q$word\E(_|:?\s)/;
		if ($1 eq '_') { return 2 }
		elsif ($seen) { return 2 }
		else { $seen = 1 }
	}
	return $seen;
}

=item C<get_page(PAGE_NAME)>

Returns an object of the type L<Zim::Page::Text>.

=cut

sub get_page {
	my ($self, $name, $source) = @_; # source is a private argument

	my $page = Zim::Page::Text->new($self, $name);
	$source ||= $self->file($name); # case sensitive lookup
	$page->set_source($source);
	$page->set_format($self->{format});
	$page->properties->{base} = $source->dir;
	
	unless ($source->exists) {
		$page->{parse_tree} = $self->_template($page);
		$page->status('new');
	}

	return $page;
}

=item C<resolve_page(NAME)>

Like C<get_page()> but with case in-sensitive lookup for a page name.

=cut

sub resolve_page {
	my ($self, $name) = @_;
	my $source = $self->file($name, 1); # case tolerant lookup
	$name = $self->pagename($source->path);
	#warn "Resolved page: $name => $file => $page\n";
	$self->get_page($name, $source);
}

sub _template {
	# FIXME make template configurable
	my ($self, $page) = @_;
	$page->name =~ /([^:]+):*$/;
	my $title = ucfirst($1);
	$title =~ s/_/ /g;
	my $format = $self->root->{date_string} || '%A %d/%m/%Y';
	my $date = Encode::decode($CODESET,
		strftime($format, localtime)   );
	return	['Document', $page->properties,
			['head1', {}, $title],
			['Para',  {empty_lines => 1}, "Created $date\n"]
		];
}

=item C<copy_page(SOURCE, TARGET)>

=cut

sub copy_page {
	my ($self, $old, $new) = @_;
	my $source = $self->file($old);
	my $target = $self->file($new);
	Zim::File->copy($source, $target);
	@$new{'status', 'parse_tree'} = ('', undef);
}

=item C<move_page(SOURCE, TARGET)>

=cut

sub move_page {
	my ($self, $old, $new) = @_;
	
	# Move file
	my $source = $self->file($old);
	my $target = $self->file($new);

	die "No such page: $source\n" unless $source->exists;
	if ($old->equals($new)) { # maybe user wants to change case
		$target = $target->{case_sensitive}
			if defined $target->{case_sensitive};
		$new->{name} = $self->pagename($target);
		#warn "set name: $new->{name}\n";
	}
	#warn "Moving $source to $target\n";
	Zim::File->move($source, $target);

	# Move tree below file
	# FIXME make this optional (?) - think boutupdating links here
	#my $old_tree = $self->dir($old);
	#my $new_tree = $self->dir($new);
	#Zim::File->move($old_tree, $new_tree) if -d $old_tree;
	
	# update objects
	@$old{'status', 'parse_tree'} = ('deleted', undef);
	@$new{'status', 'parse_tree'} = ('', undef);
}

=item C<delete_page(PAGE)>

=cut

sub delete_page {
	my ($self, $page) = @_;

	my $file = $self->file($page);
	my $dir = $file->dir;
	if ($file->exists) { $file->remove }
	else { # border case where empty dir was left for some reason
		$dir = $self->dir($page);
		Zim::File->remove_dir($dir);
	}
	
	@$page{'status', 'parse_tree'} = ('deleted', undef) if ref $page;
}

=item C<search()>

TODO

=cut

sub search {
	my ($self, $page, $query) = @_;
	
}

=back

=head2 Private methods

=over 4

=item C<file(PAGE, NOCASE)>

Returns a L<Zim::File> object for a page name.

NOCASE is a boolean that triggers a case in-sensitive lookup when true.

=item C<dir(PAGE, NOCASE)>

Returns a dir for a page name. This dir maps to the namespace below this page.

NOCASE is a boolean that triggers a case in-sensitive lookup when true.

=cut

sub file {
	my ($self, $page, $case_tolerant) = @_;
	#warn "Looking up filename for: $page\n";

	if (ref $page) {
		return $page->{source} if defined $page->{source};
		$page = $page->name;
	}

	$page =~ s/^:*$self->{namespace}:*//i;
	my @parts = grep length($_), split /:+/, $page;

	my $file = $case_tolerant
		? Zim::File->resolve_file({ext => $$self{ext}}, $$self{dir}, @parts)
		: join('/', $$self{dir}, @parts).'.'.$$self{ext} ;

	#warn "\t=> $file\n";
	$file = Zim::File->new($file);
	return $file;
}

sub dir {
	my ($self, $page, $case_tolerant) = @_;

	if (ref $page) { $page = $page->name } # looking for dir _below_

	$page =~ s/^:*$self->{namespace}:*//i;
	my @parts = grep length($_), split /:+/, $page;

	my $dir = $case_tolerant
		? Zim::File->resolve_file({ext => $$self{ext}, is_dir => 1}, $$self{dir}, @parts)
		: join('/', $$self{dir}, @parts)  ;

	return $dir;
}

=item C<pagename(FILE)>

Returns the page name corresponding to FILE. FILE does not actually
need to exist and can be a directory as well as a file.

=cut

sub pagename {
	my ($self, $file) = @_;
	#warn "looking up pagename for: $file\n";
	$file = File::Spec->abs2rel($file, $self->{dir})
		if File::Spec->file_name_is_absolute($file);
	my @parts = grep length($_), File::Spec->splitdir($file);
	$parts[-1] =~ s/\.$$self{ext}$//;
	return $self->{namespace} . join ':', @parts;
}

=item C<get_source(SOURCE, MODE)>

Returns an IO::File object or undef.

=cut

sub get_source {
	my ($self, $source, $mode) = @_;
	
	my $file = $source->path;
	my $cb = undef;
	if ($mode eq 'r') {
		return undef unless -e $file;
		die "File not readable: $file\n" unless -r _;
		$source->{mtime} = $source->stat->{mtime};
	}
	elsif ($mode eq 'w') {
		die "File not writable: $file\n" if -e $file && ! -w _;
		die "File has changed on disk since reading\n"
			if  defined $source->{mtime}
			and $source->{mtime} < $source->stat->{mtime} ;
		$source->make_dir;
		$cb = sub { $source->{mtime} = $source->stat->{mtime} };
	}
	else { die "unknown mode: $mode" }
	
	return $source->open($mode, $cb);
}

1;

__END__

=back

=head1 BUGS

Please mail the author if you find any bugs.

=head1 AUTHOR

Jaap Karssenberg || Pardus [Larus] E<lt>pardus@cpan.orgE<gt>

Copyright (c) 2005 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<Zim>, L<Zim::Page>

=cut
