#!/usr/bin/perl -w use strict; use vars qw($VERSION); require 5.005; $VERSION = sprintf "%d.%d", q$Revision: 1.24 $ =~ /(\d+)/g; # jhi@iki.fi use Getopt::Long; my $Rules = 0; my $Stdout = 1; my $Out = 1; my $Format = "%o --> %t"; my $Help = 0; my $Version = 0; sub usage { die <<__EO__; $0: Usage: $0 [--nostdout] [--nofile] [--format=s] [--rules] [--help] [--version] [ lexicon_file rules_file ] The options can be abbreviated and one dash used instead of two. If no lexicon and rules files are specified, they will be asked interactively. $0: Sound change applier (C) 2003 Jarkko Hietaniemi $0: Based on Mark Rosenfelder's C version, $0: see http://www.zompist.com/sounds.htm __EO__ exit(1); } usage() unless GetOptions( 'Rules' => \$Rules, 'Stdout!' => \$Stdout, 'Out!' => \$Out, 'Format=s' => \$Format, 'Help' => \$Help, 'Version' => \$Version, ); usage() if $Help; if ($Version) { print $VERSION, "\n"; exit(1); } my ($LX, $SC); sub do_ask { my ($type) = @_; print "Enter the name of the $type file: "; my $name = ; if (defined $name) { chomp $name } unless (defined $name && length $name) { die "$0: \u$type file undefined\n"; } return $name; } if (@ARGV == 2) { ($LX, $SC) = @ARGV; } elsif (@ARGV == 0) { $LX = do_ask("lexicon"); $SC = do_ask("rules"); } else { usage(); } sub do_ext { my ($self, $type, $ext) = @_; unless (-f $_[0]) { if (-f "$_[0]$ext") { $_[0] = "$_[0]$ext"; } else { die "$0: No $type file '$_[0]'\n"; } } } do_ext($LX, "lexicon", ".lex"); do_ext($SC, "rules", ".sc"); my %Cat; my @Rule; sub hasCat { my $Cat = join('', keys %Cat); $_[0] =~ /[$Cat]/; } sub expandCat { if (hasCat($_[0])) { $_[0] =~ s/(.)/exists $Cat{$1} ? $Cat{$1} : $1/eg; } } sub addRule { my ($from, $to, $env) = @_; my (@From, @To); my $i = 1; # print "from=$from to=$to env=$env\n"; while ($env =~ /(?:\((.)\)|(_)|(\#)|(.))/g) { my ($opt, $foc, $anc, $els) = ($1, $2, $3, $4); # print "opt=$opt foc=$foc anc=$anc els=$els\n"; my ($f, $t); if (defined $opt) { # Optional. $els = $opt; $opt = 1; } if (defined $foc) { # The focus. $f = $from; my $fHasCat = hasCat($f); if ($fHasCat) { expandCat($f); # Delay [] wrapping. } $t = $to; if (hasCat($t)) { expandCat($t); if ($fHasCat) { # Both sides have variables. $t = qq{substr("$t", index("$f", \$$i), 1)}; } } else { $t = qq{"$t"}; } $f = "[$f]" if $fHasCat; # Do the [] wrapping now. $i++; } elsif (defined $anc) { # An anchor. if (pos($env)) { push @From, "\$"; } else { push @From, "\^"; } next; # Next token, please. } elsif (defined $els) { # Something else. my $s = $els; if (hasCat($s)) { expandCat($s); $f = "[$s]"; } else { $f = $els; } $f .= '?' if $opt; $t = "\$$i"; $i++; } else { die "$0: Error: opt=$opt foc=$foc anc=$anc els=$els\n"; } push @From, "($f)"; push @To, $t; } my $From = join('', @From); my $To = join('.', @To); eval qq{sub { my \$o = \$_; return "\$env\t$from -> $to\t\$o -> \$_" if s/$From/$To/ge }}; die "From = $From, To = $To: $@" if $@; push @Rule, eval qq{sub { my \$o = \$_; return "\$env\t$from -> $to\t\$o -> \$_" if s/$From/$To/ge && \$_ ne \$o }}; } sub parseRule { return if /^\s*(\*.*)?$/; if (m{^(.)=(.+)}) { my ($var, $val) = ($1, $2); $Cat{$var} = quotemeta $val; } elsif (m{^([^/]+)/([^/]*)/(.+)$}) { my ($from, $to, $env) = ($1, $2, $3); unless ($env =~ /^[^_]*_[^_]*$/) { die "$SC:$.: must have exactly one _ in environment\n"; } elsif ($env =~ /\#/) { unless ($env =~ m{^\#([^\#]*)\#?$} || $env =~ m{^([^\#]*)\#$}) { die "$SC:$.: \# must be at the beginning or at the end\n"; } } addRule(quotemeta $from, quotemeta $to, $env); } else { die "$SC:$.: unknown syntax\n"; } } if (open(SC, $SC)) { while () { parseRule() } close(SC); } else { die "$0: Failed to open rules file '$SC' for reading: $!\n"; } my $out; my $OUT = $SC; $OUT =~ s/\.sc$/.out/; if ($Out) { unless (open($out, ">$OUT")) { die "$0: Failed to open output file '$OUT' for writing: $!\n"; } } sub do_print { print @_ if $Stdout; print $out @_ if $Out; } if (open(LX, $LX)) { while () { return if /^\s*(\*.*)?$/; s/^\s+//; s/\s+$//; my $o = $_; my @v; for my $r (@Rule) { while (my $v = &{ $r }) { push @v, $v if defined $v } } my $out = $Format; $out =~ s/%o/$o/g; $out =~ s/%t/$_/g; do_print $out, "\n"; if ($Rules) { for (@v) { do_print "\t$_\n"; } } } close(LX); close $out if defined $out; } else { die "$0: Failed to open lexicon file '$LX' for reading: $!\n"; } exit(0); __END__ =head1 NAME sounds - apply sound changes =head1 SYNOPIS sounds.pl [--nostdout] [--noout] [--format=s] [--rules] [--help] [--version] [lexicon_file rules_file] =head1 DESCRIPTION The F converts the B using the B and displays the result. =head2 The Lexicon File The lexicon file contains words, one per line. The lexicon file may contain empty lines, and also comment lines that have (optional leading whitespace followed by) a "*". Anything on that line following the "*" is ignored. If no lexicon file by the specified name does not exist, also the name with F<.lex> appended will be tried. =head2 The Rules Language The F contains rules of the forms category=characters from/to/environment With the first form, called category definitions, you can define classes or groups of characters, for example V=aeiou You can use these definitions in the rules of the second form, called transformation rules. For example: z/s/_ to change 'z' into 's' everywhere. The '_' stands for the I part. Another example is m/n/_# which changes word-final 'm' into 'n'. The '#' stands for either the end of the word or the beginning of the word, depending on the context. Further example using category definitions: F=ie c/i/F_t meaning that c changes to 'i' after the vowels 'i' and 'e' and before a 't'. Both parts can have category definitions, too: S=ptc Z=bdg S/Z/V_V meaning that the letters 'ptc' change to their voiced equivalents 'bdg' between vowels. If the lengths of I and I are not equal, weird results will ensue. Optional elements are indicated by enclosing them in parenthesis: s(s) means one or two esses. Similarly to the lexicon file, the rules file may contain empty lines, and also comment lines that have (optional leading whitespace followed by) a "*", and anything on the line following the "*" is ignored. If no rules file by the specified name does not exist, also the name with F<.sc> appended will be tried. =head2 Options --help Display the basic help text. --nostdout Do not display the results in screen. --noout Do not produce the output file. The default is to create F from F. --format=s The default format is C<< %o --> %t >> which means the original followed by an "arrow" and the transformed result. --rules Display the individual rules being applied. --version Show version of the F script. All the options can be shortened to their unique prefixes, and also the leading C<--> be shortened to a single C<->. =head1 ACKNOWLEDGEMENTS Heavy intellectual debt to Mark Rosenfelder, see http://www.zompist.com/sounds.htm The user interface is not exactly the same. =head1 PREREQUISITES Getopt::Long strict vars =head1 SCRIPT CATEGORIES Linguistics =head1 README Apply sound changes. Hopefully amusing for linguists, hobby or serious. =head1 SEE ALSO http://www.zompist.com/sounds.htm =head1 COPYRIGHT (C) 2003 by Jarkko Hietaniemi All rights reserved. You may distribute this code under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. =cut