################################################################################
#
# Example "peg_ini.pl".
#
# This is user specific code - customize it to your personal preferences :-)
#
################################################################################

use strict;
use warnings;

our %Env;		# An undocumented peg feature...
			#  $Env{x} = 'y'  acts like  $ENV{x} ||= 'y'
			#  ie. the user can override behaviour from the shell.

our %Peg_longopt;

sub Warn {
    my $msg = join '', @_;
    print STDERR "peg_ini: $msg\n";
}

sub Die {
    Warn @_;
    exit(2);
}

################################################################################
#
# Define some 'long options':
#

# Find files matching a given PERLEXPR/ALIAS.
# eg% peg -find /foo/
#
$Peg_longopt{find} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected EXTENSION or /PATTERN/ argument";
    my $p_arg = shift @$argv_ref;
    if ($p_arg =~ /^[\w\.\-]{2,}/) {
	$p_arg = "m," . quotemeta($p_arg) . "\[^/]*\$,i";
    }
    Warn "-find magic: $p_arg";
    unshift @$argv_ref, '-Y,p', '+1', '-dlnp', $p_arg;
};

# Pipe output thro a pager.
#
$Peg_longopt{pager} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-Y,#', '-##';
    $! = $? = 0;
    open(PAGER_OUT, '|-', "C:/cygwin/bin/less.exe -mR") && !$! && !$?
	or die "unable to pipe STDOUT via less\n";
    *STDOUT = \*PAGER_OUT;
    $@ and die $@;
};

# Option to comment out -pager on cmdline
#
$Peg_longopt{pagerx} = sub {};

# Option to open files matche by the last run of peg in your editor.
# This is _very_ user specific!
# Here's mine on Win32 which invokes Crimson Editor.
# eg% peg -edit 22
#
$Peg_longopt{edit} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected NUM... arguments";
    my @matches = last_matches(1);
    my $cedt = 'C:/Program Files/Crimson Editor/cedt.exe';
    my %done;
    foreach my $n (@$argv_ref) {
	if ($n =~ /^(\d+)-(\d+)$/) { # RANGE
	    push @$argv_ref, $1..$2;
	    next;
	}
	$n =~ /^\d+$/ or die "bad integer: $n";
	$n = @matches if $n > @matches;
	next if $done{$n}++;
	my $file = $matches[$n-1];
	my $size = -s $file;
	if ($size > 10_000_000) {
	    Warn "file $n too large $file: $size";
	    next;
	}
	print "= $file\n";
	$file =~ s|/|\\|g;
	system "\"$cedt\" \"$file\"";
    }
    exit;
};

# Determine total file size of cwd or the given directory.
#
$Peg_longopt{dirsize} = sub {
    my $argv_ref = shift;
    Warn "dirsize!";
    unshift @$argv_ref, (
	'-dPP', q[ $Z += -s $_; return; ],
	'-PPPP', q[
	    if ($Z > 1024*1024) {
		print +int($Z / (1024*1024)), " Mb";
	    } elsif ($Z > 1024) {
		print +int($Z / 1024), " Kb";
	    } else {
		print "$Z b";
	    }
	],
	'die("should not see this")',
    );
};

# Get full #if context.
# eg% peg -ifdef WHATEVER foobar.h
#
$Peg_longopt{ifdef} = $Peg_longopt{define} = sub {
    my $argv_ref = shift;
    # Turn on both context matchers, but don't match.
    # We then set the #ifdef context into $Context_line2 using -P code.
    unshift @$argv_ref, "-z", "+0", "-zz", "+0";
    $Env{PEG_CONTEXT_FORMAT2} = '$_';
    $Env{PEG_Z_INDEPENDENT} = 1;
    unshift @$argv_ref, "-P", <<'EOT';
	# NB. some compilers allow whitespace preceding
	#  the '#' in preprocessor lines.
	if (/^\s*\#/) {
	    my $new_cxt = 1;
	    if (/^\s*\#\s*if(n?def)?\b/) {
		push @cxt, [$_, $.];
	    }
	    elsif (/^\s*\#\s*elif\b/) {
		$cxt[$#cxt] = [$_, $.];
	    }
	    elsif (/^(\s*\#\s*else)\b/) {
		my $else_line = $1;
		my $if_line = $cxt[$#cxt]->[0];
		if ($if_line !~ /^\s*\#\s*elif/) {
		    $if_line =~ s/[\n\r\t ]+\z//;
		    $else_line = "$else_line  /* $if_line */$::Newline";
		} else {
		    $else_line = $_;
		}
		$cxt[$#cxt] = [$else_line, $.];
	    }
	    elsif (/^\s*\#\s*endif\b/) {
		pop @cxt;
	    }
	    else {
		$new_cxt = 0;
	    }
	    if ($new_cxt) {
		if (@cxt) {
		    $Context_line2 = '';
		    foreach my $cxt (@cxt) {
			my ($line, $lineno) = @$cxt;
			$Context_line2 .= "($lineno)\t$line";
		    }
		} elsif ($Printed_Context_line2) {
		    $Context_line2 = '*none*' . $::Newline;
		}
		if (defined $Printed_Context_line2
				and
			$Context_line2 eq $Printed_Context_line2)
		{
		    # Ensure we don't reprint the same context.
		    undef $Context_line2;
		}
	    }
	}

EOT
};

# Option to open a file in the "vim" editor.
# eg% peg -vim 22
#
$Peg_longopt{vim} = sub {
    my $argv_ref = shift;
    my $n = shift @$argv_ref or die;
    my @matches = last_matches();
    $n = @matches if $n > @matches;
    my $file = $matches[$n-1];
    system "vim \"$file\"";
    exit;
};

# Option to ignore files within the specified directory.
# eg% peg -idir CVS whatever
#
$Peg_longopt{'ignore-dir'} = $Peg_longopt{'idir'} = sub {
    my $argv_ref = shift;
    my $dir_name = quotemeta shift @$argv_ref or die;
    unshift @$argv_ref, "-p", qq{ \$File !~ m:(^|/)$dir_name/: };
};

################################################################################
#
# General peg configuration variables:
#

# This is the key to getting good performance for recusive finds on Win32:
my $qfind = $::Bin_dir . "qfind.exe";
$Env{'PEG_R_CMD'} = $qfind if -f $qfind;

# This looks good on a black background:
$Env{'PEG_COLOR'} = 'f=dg,c=dy,l=dc,b=dm,n=dw,m=dr,z=wob,y=dyor';

# Default options:
$Env{'PEG_OPTIONS'} = '-JJJss#+_';

################################################################################
#
# Define some -p ALIASes:
#

$Env{'PEG_P_C'} = '/\.(?:c|cpp|h|hpp|xs)$/i';
$Env{'PEG_P_H'} = '/\.(?:h|hpp)$/i';
$Env{'PEG_P_P'} = '/\.(?:pm|pl|t)$/i';

################################################################################
#
# Define some -z ALIASes:
#

# C functions/struct/template/#define context.
#
$Env{'PEG_Z_C'} = <<'EOT';
# PEG_FAST_Z_CONTEXT
	(/^\w[\w\s\*\&:~]*\(/ and not /^(?:if|for|switch|while)\b/
	    and (($L = $_), ($L =~ s/\/\*.*?\*\/|\/[\*\/].*//g), ($L !~ /[!^%;"]/)))
    or
	(/^typedef\s+struct\s*(?:\{[^\}]*)?$/ and do {{
	    # Read forward to find the struct name! Do the entire file in one pass.
	    unless ($::Last_file eq $File) {
		$::Last_file = $File;
		%::Typedef_struct = ();
		my $start_pos = tell(F);
		my $start_line = $.;
		my $typedef_struct_line = $.;
		my $inside = 1;
		while (<F>) {
		    if ($inside) {
			if (/^\}\s+(\w+)/) {
			    $::Typedef_struct{$typedef_struct_line} = $1;
			    $inside = undef;
			}
		    } elsif (/^typedef\s+struct\s*(?:\{[^\}]*)?$/) {
			$typedef_struct_line = $.;
			$inside = 1;
		    }
		}
		# Restore IO position.
		$. = $start_line;
		seek F, $start_pos, 0
		    or die "PEG_Z_C: cannot seek back in $File: $!\n";
	    }
	    my $found;
	    if (exists $::Typedef_struct{$.}) {
		$_ = "typedef struct " . $::Typedef_struct{$.} . " {" . $::Newline;
		$found = 1;
	    }
	    $found;
	}})
    or
	(/^(?:typedef\s+struct|struct|template)\s+\w+/ and not /[,;\)]/)
    or
	(/^\#\s*define\s+\w+.*\\$/)
EOT

#$Env{'PEG_ZZ_C'} = '/^class\s+\w+/ and not /;/';

$Env{'PEG_Z_P'} = '/^(?:\s*sub\s+\w|=head|__(?:END|DATA)__)/';

$Env{'PEG_Z_T'} = '/^\s*(?:proc|namespace)\b/';

################################################################################
#
# Some -P code ALIASes:
# NB. if these contain a comment matching "/# -(P+)/", then this is used
#   as the -P option. (This feature is not documented).
#

# Process backslashed lines as one.
$Env{'PEG_CODE_BS'} = <<'EOT';

    # -P
    # PEG_SAFE_BEFORE_CONTEXT
    if (defined $orign) { $. = 1 + $orign; $orign = undef }
    if (/\\$/) { $startn = $. unless defined $l; $l .= $_; next }
    if (defined $l) { $_ = $l . $_; $orign = $.; $. = $startn; $l = undef }
EOT

# Ignore Perl 'block' comments
$Env{'PEG_CODE_IPC'} = <<'EOT';

    # -P
    last if /^__END__/;
    next if /^=head/ .. /^=cut/;
EOT

# Ignore C comments
$Env{'PEG_CODE_ICC'} = <<'EOT';

    # -P
    # BEGIN { $Opts{W} = 1 }; # restore the comments in the output
    s|/\*.*?\*/||g; # /* ... */
    s|/\*.*$||;     # /* ...
    s|//.*$||;      # // ...
    s|^\s*\*.*$||;  # * ...
EOT

# -PP code to print checksums
$Env{'PEG_CODE_CKSUM'} = <<'EOT';

    # -PP
    print $Col{filename}, $File, $Col_Reset, "\t= ", cksum($Filepath), "\n";
    push @Matched_files, $File;
    return;
EOT

# -P code to shrink whitespace
$Env{'PEG_CODE_SHRINK'} = <<'EOT';

    # -P
    s/^[ \t]+//;
    s/[ \t]+$//;
    s/[ \t]+/ /g;
EOT


################################################################################
#
# -S code.
#
# Relies on the availability of the following external programs:
#   tar, unzip, gzip & pdftotext.
#

%::Peg_S = (
    'pdf'     => \&process_pdf,
    '*gz'     => \&process_gz,
    '*tar'    => \&process_tar,
    '*tar.gz' => \&process_targz,
    '*zip'    => \&process_zip,
);

# The routines below do 'quick' scans _unless_ the -pp option is specified,
#  in which case each file within each archive is individually processed.

sub process_tar {
    return process_tar_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar file"
	    unless $::Done_process_archive_warning++;
    return process_tar_fast(@_);

} # process_tar


sub process_targz {
    return process_targz_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar.gz file"
	    unless $::Done_process_archive_warning++;
    return process_targz_fast(@_);

} # process_targz


sub process_zip {
    return process_zip_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the zip file"
	    unless $::Done_process_archive_warning++;
    return process_zip_fast(@_);

} # process_zip


sub process_tar_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -tf \"$file\"";
    Warn "running $cmd" if $::Verbose;
    my @filelist = `$cmd`;
    if ($?
	    # Heuristic - seen "tar -tf" give correct results AND error code!
	    and @filelist < 3
    ) {
	Warn "failed to get file list from $fullpath: $?\n", @filelist;
	return 0; # signal to process the file as usual
    }
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next if $f =~ m|/$|; # skip directory names
	next unless pp($f);
	$cmd = qq(tar -xOf "$file" "$f");
	Warn "running $cmd" if $::Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	S($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_tar_slow


sub process_tar_fast {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -xOf \"$file\"";
    my $fh;
    Warn "running $cmd" if $::Verbose;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_tar_fast


# Process the contents of a .tar.gz file by file.
sub process_targz_slow {
    require File::Temp;
    my ($file, $fullpath) = @_;
    my ($fh, $tempfile) = File::Temp::tempfile("peg-targz-XXXXX", SUFFIX => '.tar', UNLINK => 1);
    close $fh;
    my $cmd = qq(gzip -dc "$file" > "$tempfile");
    Warn "running $cmd" if $::Verbose;
    system $cmd and Die "error: $cmd: $?";
    process_tar_slow($tempfile, $fullpath);
    unlink $tempfile;
    return 1;

} # process_targz_slow


# Process the contents of a .tar.gz as one entity.
sub process_targz_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file" | tar -xOf -);
    Warn "running $cmd" if $::Verbose;
    my $fh;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_targz_fast


# Process each individual file within a ".zip" file.
sub process_zip_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "unzip -Z1 \"$file\" 2>&1";
    Warn "running $cmd" if $::Verbose;
    my @filelist = `$cmd`;
    if ($?) {
	Warn "unzip failed with $fullpath: $?\n", @filelist;
	return 0; # signal to process the file as usual
    }
    Warn "zip contains @{[ scalar @filelist ]} files" if $::Verbose;
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next unless pp($f);
	my $cmd = qq(unzip -p "$file" "$f");
	Warn "running $cmd" if $::Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	S($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_zip_slow


# Process the entire contents inside a ".zip" file as one.
sub process_zip_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(unzip -p "$file");
    Warn "running $cmd" if $::Verbose;
    open(my $fh, "$cmd|")
	or Die "can't unzip $fullpath: $!";
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_zip_fast


sub process_gz {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file");
    Warn "running $cmd" if $::Verbose;
    open(my $fh, "$cmd|")
	or Die "error: $cmd: $!";
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_gz


sub process_pdf {
    require File::Temp;
    my ($file, $fullpath) = @_;
    my ($fh, $tempfile) = File::Temp::tempfile("peg-pdf-XXXXX", SUFFIX => '.pdf', UNLINK => 1);
    close $fh;
    my $cmd = "pdftotext \"$file\" $tempfile";
    Warn "running $cmd" if $::Verbose;
    system $cmd;
    if ($?) {
	Warn "pdftotext failed: $?";
	unlink $tempfile;
	return 0;
    }
    unless (open($fh, "<", $tempfile)) {
	Warn "could not open $tempfile: $!";
	unlink $tempfile;
	return 0;
    }
    S($fh, $fullpath);
    close $fh;
    unlink $tempfile;
    return 1;

} # process_pdf

################################################################################
#
# Misc functions available to PERLEXPR:
#

sub say {
    my $msg = join '', @_;
    $msg =~ s/[\012\015]+\z//;
    print $msg, $::Newline;

} # say


sub mv {
    @_ == 2 or die "Usage: mv(SRC, DEST)\n";
    require File::Copy;
    File::Copy::move(@_);

} # mv


sub cp {
    @_ == 2 or die "Usage: cp(SRC, DEST)\n";
    require File::Copy;
    File::Copy::copy(@_);

} # cp


# Provide a checksum subroutine:
sub cksum {
    require Digest;
    my $file = shift;
    open my $fin, "<", $file
	or return "cksum: can't open $file: $!";
    binmode $fin;
    my $ctx = Digest->new("MD5");
    $ctx->addfile($fin);
    return $ctx->b64digest() . "-" . sprintf('%x', -s($file));

} # cksum



# Avoid "used only once" warnings.
1 or ($::Newline, $::Bin_dir, %::Peg_S, %::Peg_S);


1;