#!/usr/bin/perl -w

use strict;
use Pod::Usage;
use PDF::Reuse;  
use Getopt::Std;

use vars qw($opt_s $opt_j);

my $VERSION = 0.2;

###########
# Main program
#

&getopts("s:j:");

unless ( $ARGV[0] && -f $ARGV[0] ) {
    print "** You must specify a PDF document as first argument\n";
    usage();
    exit;
}
my $pdfinfile = $ARGV[0];

#
# If a second arg. is present, it must be a digit being the first page number
#
my $first = 1;
if ($ARGV[1] ) {   
    unless ($ARGV[1] =~ m/\d+/) {
	print "** Second argument must be a page number\n";
	usage();
	exit;
    }
    $first = $ARGV[1];
}

#
# If a third arg. is present, it must be a digit being the last page number
#
my $last = $first;
if ($ARGV[2] ) {
    unless ($ARGV[2] =~ m/\d+/) {
	print "** Second argument must be a page number\n";
	usage();
	exit;
    }
    $last = $ARGV[2];
    unless ($last >= $first) {
	print "** Last page number must be equal to or greater than first page number\n";
	usage();
	exit;
    }
}

prDocDir(".");

my $pg;

my $jump = 0;

# must be a digit
if ($opt_s) {
    unless ($opt_s > 0) {
	print "** Split argument must be a number > 0\n";
	usage();
	exit;
    }
}
if ($opt_j) {
    unless ($opt_s) {
	print "** Jump can only be used with split option\n";
	usage();
	exit;
    } 
    unless ($opt_j > 0) {
	print "** Jump argument must be a number > 0\n";
	usage();
	exit;
    }
    $jump = $opt_j;
    unless ($jump > 0 && $jump < $opt_s) {
	print "** Jump number must be smaller than split count\n";
	usage();
	exit;
    } 


}

if ($opt_s) {
    # split output on separate files, one per $opt_s page
    for (my $pg = $first; $pg <= $last; $pg+=$opt_s) {
	last unless (dump_pages($pdfinfile, $pg+$jump, $pg+$opt_s-1));
    }
} else {
    dump_pages($pdfinfile, $first, $last);
}

1;


#######################
#
# Subs
#


sub dump_pages {
    my ($pdfinfile, $first, $last) = @_;

    my $pdfoutfile = get_out_file_name ($pdfinfile, $first, $last);

    prFile($pdfoutfile);
    #print "dump_pages: $first, $last\n";
    my $pages = prDoc ($pdfinfile, $first, $last);

    if ($pages ) { 
	my $actlast = $first + $pages -1;

	prEnd();

	if ($actlast != $last) {
	    my $newpdfoutfile = get_out_file_name ($pdfinfile, $first, $actlast);
	    `mv $pdfoutfile $newpdfoutfile`;
	    $pdfoutfile = $newpdfoutfile;
	}
	print "$pages pages extracted from $pdfinfile and dumped to $pdfoutfile\n";
	return $pages;
    } else {
	print "Page $first is not found in $pdfinfile\n";
	unlink $pdfoutfile;
	return;
    }
}

sub usage {
    pod2usage(1);
}

sub get_out_file_name {
    my ($pdfinfile, $first, $last) = @_;
    my $pdfoutfile = $pdfinfile . "-pg";
    $pdfoutfile .= $first if ($first);
    $pdfoutfile .= "_" . $last if ($last);
    $pdfoutfile .= ".pdf";

    return $pdfoutfile;
}

############### DOCS ####################

=pod

=head1 NAME

pdfxtract.pl - PDF Page Extracter Utility

=head1 SYNOPSIS

pdfxtract.pl -sX -jY [pdffile] {firstpage} {lastpage}

Mandatory
  pdffile: PDF file name to extract pages from

When ran without page arguments, the first page is extracted.

Optional:
 -sX      : Split, e.g. save each X page to a separate page file,
            else save is done to one target file.
 -jY      : Jump, skip Y pages before next file, Y=0 default

 firstpage: page no of first page, default is 1
 lastpage : page no of last page, default is 1 or firstpage

 Run perldoc(1) on this file for additional documentation,
 or use a text viewer on the file and look look at the DOCS section

=head1 PREREQUISITES

This script requires the C<PDF::Reuse> module

=head1 EXAMPLES

Example:
 'pdfxtract.pl 2005-08-01.pdf 2 5' will extract pages 2 to 5 
     to a single new file.
 
 'pdfxtract.pl -s2 2005-08-01.pdf 3 10' will extract pages 3,4; 5,6; 7,8 and 9,10 
     to four separate new files, each consisting of two pages.

 'pdfxtract.pl -s2 -j1 2005-08-01.pdf 3 10' will extract pages 2; 4; 6 and 8 
     to four separate new files, each consisting of one page being the last
     of the two pages specified by X.

Tip: Use 'pdfinfo [pdffile]' to se overall PDF-file info. 

=head1 DESCRIPTION

A utility to crop (extract) one or more pages from a PDF file and dump to separate file or files.

=head1 AUTHOR

Trond Haugen, C<trond.haugen@start.no>

=head1 COPYRIGHT

Copyright 2006 Trond Haugen. All rights reserved.

This module is free software. You can redistribute it and/or modify
it under the same terms as perl itself.


=head1 HISTORY

 Revision 0.1, 2006-12-06: Intial

=pod OSNAMES

any

=pod SCRIPT CATEGORIES

CPAN/Administrative
Fun/Educational

=pod README

A utility to crop (extract) one or more pages from a PDF file and dump to separate file or files.

=cut