#! /usr/bin/perl -w
use lib '/usr/lib/perl'; use INN::Config;

##  Submit Path header field statistics based on ninpaths.
##
##  A rewrite of the sendinpaths shell script in Perl, based on Mohan Kokal's
##  initial work.  Improved by Julien Elie.
##
##  Prerequisites:
##    ninpaths dump files are in the ${INN::Config::pathlog}/path directory.
##    ninpaths dump filenames begin with "inpaths.".
##    ninpaths program is installed in ${INN::Config::pathbin}.

use strict;
use Getopt::Std;

my $pathhost = "$INN::Config::pathhost";
my $ninpaths = "$INN::Config::pathbin/ninpaths";
my $ninpathsdir = "$INN::Config::pathlog/path";
my $alsoto = "$INN::Config::newsmaster";
my $sm = "$INN::Config::mta";

# Define variables Getopt::Std uses for --help and --version.
$Getopt::Std::STANDARD_HELP_VERSION = 1;
our $VERSION = $INN::Config::version;
$VERSION =~ s/INN //;

##  Default to report up to 32 days (ideal for monthly statistics).  It works
##  fine for daily stats too because already processed dump files are deleted
##  by default (0 day of kept articles).
my $reportdays = 32;
my $keepdays = 0;

my @emailto = ('top1000@anthologeek.net');
my $emaildef = join(' ', @emailto);
my $debug = 0;

my %opt;

$0 =~ s!.*/!!;

my $usage = "Usage:
  $0 [-cdhn] [-k keep-days] [-r report-days] [address [address ...]]

  If called without any arguments, reports are generated and auto
  submitted to the inpaths accumulation site.

Options:
  -c               sends a copy of the submitted mail to \"$alsoto\"
                                                      (newsmaster's address)
  -d               enables debug messages
  -h               prints this help message
  -k keep-days     specifies how many days to keep processed dump files
  -n               nomail: gathers stats, but does not auto-submit e-mails
  -r report-days   specifies how many days of dump files should be processed

  Current default submit address: [$emaildef].
  The optional arguments [address [address ...]] may be used to override
  this default.
";

sub HELP_MESSAGE {
    print $usage;
    exit(0);
}

sub main {
    my (@files, @validfiles, @oldfiles, @appendargs, @cmd);
    my $pid;
    my $sendout = '';

    getopts('cdhk:nr:', \%opt) || die $usage;
    HELP_MESSAGE() if defined $opt{'h'};

    # If we took an e-mail argument, override the default submission addresses.
    @emailto = @ARGV if ($#ARGV >= 0);
    push(@emailto, $alsoto) if defined $opt{'c'};

    # Override default parameters.
    $keepdays = $opt{'k'} if defined $opt{'k'};
    die $usage if $keepdays !~ /\d+/;

    $reportdays = $opt{'r'} if defined $opt{'r'};
    die $usage if $reportdays !~ /\d+/;

    # Set debug.
    $debug = 1 if defined $opt{'d'};

    # Scan the ninpaths directory.
    opendir(my $DIR, $ninpathsdir) || die "cannot open $ninpathsdir: $!\n";
    @files = readdir($DIR);
    closedir($DIR);

    chdir($ninpathsdir) || die "cannot chdir $ninpathsdir: $!\n";

    foreach my $file (@files) {
        # Process only inpaths files.
        next if (!-f $file);
        next if ($file !~ /^inpaths\./);

        # Get a listing of all the wanted files to process.
        # -s for nonzero size, -M for last modification age in days.
        if (-s $file && int(-M $file) <= $reportdays) {
            push @validfiles, $file;
        }

        # Now get the listing of all the files that will be removed.
        if (int(-M $file) >= $keepdays) {
            push @oldfiles, $file;
        }
    }

    if ($#validfiles < 0) {
        print
          "No data has been collected since the last run of this script!\n";
        return;
    }

    # Process each dump file.
    foreach my $file (@validfiles) {
        @cmd = ($ninpaths, '-u', $file, '-r', $pathhost);

        printf("exec'ing %s\n", join(' ', @cmd)) if $debug;

        $pid = open(my $NINPATHS, '-|');
        die "cannot fork: $!\n" if $pid < 0;
        if ($pid == 0) {
            exec(@cmd) || die "cannot exec ninpaths: $!\n";
        } else {
            while (<$NINPATHS>) {
                ;
            }
            close($NINPATHS) || next;
        }

        if ($? == 0) {
            push(@appendargs, ('-u', $file));
        }
    }

    if ($#appendargs < 0) {
        print "No valid data has been collected "
          . "since the last run of this script!\n";
        return;
    }

    # Prepare to send reports, and purge old entries from disk.
    @cmd = ($ninpaths, @appendargs, '-r', $pathhost);

    printf("exec'ing %s\n", join(' ', @cmd)) if $debug;

    $pid = open(my $NINPATHS, '-|');
    die "cannot fork: $!\n" if $pid < 0;
    if ($pid == 0) {
        exec(@cmd) || die "cannot exec ninpaths: $!\n";
    } else {
        while (<$NINPATHS>) {
            $sendout .= $_;
        }
        close($NINPATHS) || die "execution of ninpaths failed: $!\n";
    }

    if (defined $opt{'n'}) {
        # We are not sending this report anywhere, but to stdout.
        print $sendout;
    } else {
        if ($sm =~ /%s/) {
            $sm = sprintf($sm, join(' ', @emailto));
        } else {
            $sm .= ' ' . join(' ', @emailto);
        }

        print "exec'ing $sm\n" if $debug;

        # Fork and spawn the MTA without using the shell.
        $pid = open(my $MTA, '|-');
        die "cannot fork: $!\n" if $pid < 0;
        if ($pid == 0) {
            exec(split(/\s+/, $sm)) || die "cannot exec $sm: $!";
        } else {

            print $MTA "To: " . join(",\n\t", @emailto) . "\n";
            print $MTA "Subject: inpaths $pathhost\n";
            print $MTA "Auto-Submitted: auto-generated\n";
            print $MTA "\n";
            print $MTA $sendout;
            print $MTA "\n";
            close($MTA) || die "execution of $sm failed: $!\n";
        }

        # Remove old dumps.
        foreach my $file (@oldfiles) {
            print "removing $file\n" if $debug;
            unlink($file);
        }
    }

    return;
}

main();

exit 0;
