%% This OTP, part of the Makor2 package for typesetting Hebrew with %% Omega, is responsible for the minimal contextual analysis that %% Hebrew demands. That is, where appropriate, consonants at the ends %% of words is converted to final forms. %% ThisOTP handles the special case of BENT lamed or lameddagesh followed %% bythe holam dot. %% Also, this OTP converts the pattern `_a' at a word end to %% `a'. input: 1; output: 1; aliases: %% Here is a subset of the gutterals... GUTTERAL = (103 | 114) % het and ayin ; USCORE = 95 % underscore character ; CIRCUM = 94 % circumflex char ; FINAL = (107|110|112|116|118|%khaf,mem,nun,phe,tsadi 155|158|160|164) % dagesh forms ; PATAH = 71 ; VOWEL = (65-73|75-77|182-187) % vwls EXCEPT sheva ; SHEVA = 74 ; HOLAM = 85 ; ALLVWL = ({SHEVA}|{VOWEL}) ; TROPE = (209-225|227-239) % cantorial trope ; LAMED = 140 ; LAMEDDOT = 172 ; HOLAMDOT = 73 ; TSADI = 118 ; RQUOTE = 39 ; %% Here we identify the characters which are valid parts of words. VALID = (85|96-122|128-135|144-173|176-181|192-199|140|189) ; INVALID = ^(85|96-122|128-135|144-173|176-181|192-199|140|189) ; SPACE = 32 ; DOTORCOLON = (46|58) % period or colon ; expressions: %% Hebrew has no word initial forms, so we can get rid of the start-of-word %% markers right away... 254 `(' => ; %% Let's get the LAMED stuff out of the way... {LAMED} {HOLAMDOT} => 173 ; {LAMEDDOT} {HOLAMDOT} => 189 ; %% Here is the convention for gutteral+patah at the word end... {USCORE}{PATAH}{GUTTERAL} =>\3 \2 ; %% There are two `kinds' of vowels---regular vowels, and trope. The %% may precede or follow the vowels. Usually, there's one trope (at most) %% per syllable, but it's possible for there to be two. %% First, we handle the final-suppressing machinery... {FINAL}{USCORE} => \1 ; %% Now for the enforced finals... {FINAL}{CIRCUM} => #(\1 - 1) ; %% Now, the finals. But first, another special case: %% tsadi at the end of a word followed by a %% single right quote. {TSADI}{SHEVA} `)' 254 => #(\1 - 1) 39 ; %% Because of some bug or other in Omega1.15, it's not possible for %% m2contest.otp to be as general as it should be. Consequently, %% we need the following rule, which can hopefully be removed when %% Omega is fixed... {FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>{DOTORCOLON} => #(\1 - 1) \(* + 1 ) ; %% Words can be ended by invalids, spaces, or the end %% of input.. {FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>`)' 254 => #(\1 - 1 ) \(* + 1 - 2) ; %% Finally, let's strip off any remaining end-of-word markers... `)' 254 => ;