#!/usr/bin/perl -w

use strict;
use utf8;


=head1 NAME

dl10n-spider -- crawl translator mailing lists (and BTS) for status updates

=head1 SYNOPSIS

dl10n-spider [options] lang+

=head1 DESCRIPTION

This script parses the debian-l10n-E<lt>languageE<gt> mailing list
archives. It looks for emails which title follow a specific format
indicating what the author intend to translate, or the current status of
his work on this translation.

Those informations are saved to a dl10n database which can then be used to
build a l10n coordination page or any other useless statistics.

=cut

use Getopt::Long; #to parse the args
use LWP::UserAgent;
use Debian::L10n::Html;
use File::Path;
use POSIX qw(strftime);


my $progname = $0;
   $progname = $1 if $progname =~ m,([^/])+$,;

my $VERSION = "4.0";			 # External Version Number
my $BANNER  = "Debian l10n infrastructure -- mailing list spider v$VERSION"; # Version Banner - text form

my $cmdline_year  = undef;
my $cmdline_month = undef;
my $cmdline_msg   = undef;
my $cmdline_file  = undef;

my %Language = (
	ar    => 'arabic',
	ca    => 'catalan',
	cs    => 'czech',
	de    => 'german',
	en    => 'english',
	es    => 'spanish',
	fr    => 'french',
	gl    => 'galician',
	nl    => 'dutch',
#	pt    => 'portuguese',
	pt_BR => 'brazilian',
	ro    => 'romanian',
	ru    => 'russian',
	sk    => 'slovak',
	sv    => 'swedish',
	tr    => 'turkish',
	all   => 'all',
	);


=head1 Command line option parsing

=over

=item General options:

=over

=item -h, --help

display short help text

=item -V, --version

display version and exit

=back

=item Begin point of the crawling:

=over

=item --year=YYYY

=item --month=MM

=item --message=msg

=back

if not specified, will crawl for new messages.

=item Database to fill:

=over

=item --sdb=STATUS_FILE

use STATUS_FILE as status file (instead of $STATUS_FILE)

=back

=back

=cut

# This is put into a block to avoid main namespace pollution
{
	sub syntax_msg {
		my $message = shift;
		if (defined $message) {
		        print "$progname: $message\n";
		} else {
		        print "$BANNER\n";
		}
		print <<EOF
Syntax: $0 [options] [lang]+
General options:
    -h, --help                display short help text
    -V, --version             display version and exit

Database to fill:
    --sdb=STATUS_FILE         use STATUS_FILE as status file
EOF
		;

		if (defined $message) {
			exit 1;
		} else {
			exit 0;
		}
	}


	# Display Version Banner
	# Options: -V|--version, --print-version
	sub banner {
		if ($_[0] eq 'print-version') {
			print "$VERSION\n";
		} else {
			print "$BANNER\n";
		}
		exit 0;
	}

	# Hash used to process commandline options
	my %opthash = (
		# ------------------ general options
		"help|h"    => \&syntax_msg,
		"version|V" => \&banner,

		# ------------------ configuration options
		"sdb=s"     => \$cmdline_file,
	);


	# init commandline parser
	Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');

	# process commandline options
	GetOptions(%opthash)
		or syntax_msg("error parsing options");
}


my $lang = $ARGV[0];
my $language = $Language{$lang};

Html::html($cmdline_file, $lang);

{
	my $head = <<EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="$lang" lang="$lang">

<head>
  <title>Coordination of debian-l10n-$language</title>
  <link href="../l10n.css" rel="stylesheet" />
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
  <meta name="Copyright" content="Copyright (C) 2004 Nicolas Bertolissio" />
</head>

<body>

<p style="text-align:center;">
  <a href="http://www.debian.org/"><img src="http://www.debian.org/logos/openlogo-nd-50.png" style="border:0; width:50px; height:61px;" alt="" /></a>
  <a href="http://www.debian.org/"><img src="http://www.debian.org/Pics/debian.jpg" style="border:0; width:179px; height:61px;" alt="Debian Project" /></a>
</p>

<h1>Coordination of debian-l10n-$language</h1>

<p>
This page is made to aid the coordination of translating debian related text to
$language. As documented <a href='http://i18n.debian.net/debian-l10n/docs/robot/pseudo-urls.html'>here</a>, translators and
reviewers use pseudo-urls in the subject of e-mails to the debian-l10n-$language
list for coordination. 
</p>

<p>
A program parses these pseudo-urls and collects the relevant data, which are
then displayed below.
</p>

EOF
	;
	my $date = strftime('%a, %d %b %Y %H:%M:%S %z', gmtime);
	my $tail = <<EOF

<hr />

<p>
<small>Comments:
<a href='mailto:debian-l10n-devel\@lists.alioth.debian.org'>Debian L10N
Development Team</a></small>
</p>
<p>
<small>Generated on $date</small>
</p>

</body>
</html>
EOF
	;

	opendir D, './include'	or die "Cannot open .: $!";
	my @files = readdir D;
	closedir D;
	mkpath ("html/include", 02775) or die "Cannot create include directory\n" unless (-d "html/include");
	mkpath ("html/$Language{$lang}", 02775) or die "Cannot create $Language{$lang} directory\n" unless (-d "html/$Language{$lang}");

	foreach (grep (/^$lang\./, @files)) {
		next unless /\.inc$/;
		s/\.inc$//;
		open I, "<include/$_.inc"	or die "Cannot open $_.inc: $!";
		my @inc = <I>;
		close I;
		open I, ">html/include/$_.inc"	or die "Cannot open $_.inc $_";
		print I @inc;
		close I;
		open H, ">html/$Language{$lang}/$_.html"	or die "Cannot open $_.html: $_";
		print H $head;
		print H @inc;
		print H $tail;
		close H;
	}
}


=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

=head1 COPYRIGHT (C)

 2003,2004 Tim Dijkstra
 2004 Nicolas Bertolissio
 2004 Martin Quinson

=cut

1;
