|
- sub loadVerbData
- {
- local('$handle $text $base $past $participle $plural $present $data $temp $_past $_participle $_plural $_present ');
- $handle = openf("data/rules/irregular_verbs.txt");
-
- $data = %(past => %(), base => %(), participle => %(), plural => %(), present => %());
-
- while $text (readln($handle))
- {
- ($base, $_past, $_participle, $_plural, $_present) = split('[\s\t]+', $text);
- $temp = %( base => $base,
- past => sort({ return Pword($2) <=> Pword($1); }, split(',', $_past))[0],
- participle => sort({ return Pword($2) <=> Pword($1); }, split(',', $_participle))[0],
- plural => sort({ return Pword($2) <=> Pword($1); }, split(',', $_plural))[0],
- present => sort({ return Pword($2) <=> Pword($1); }, split(',', $_present))[0]
- );
-
- $data['base'][$base] = $temp;
-
- foreach $past (split(',', $_past))
- {
- $data['past'][$past] = $temp;
- }
-
- foreach $participle (split(',', $_participle))
- {
- $data['participle'][$participle] = $temp;
- }
-
- foreach $plural (split(',', $_plural))
- {
- $data['plural'][$plural] = $temp;
- }
-
- foreach $present (split(',', $_present))
- {
- $data['present'][$present] = $temp;
- }
- }
-
- return $data;
- }
-
- sub positiveWord
- {
- local('$word');
- $word = $1;
-
- if ($1 eq "unusual") { return "common"; }
- if ($1 eq "until") { return "after"; }
-
-
- if (strlen($1) > 2 && substr($1, 0, 2) eq "un")
- {
- $word = substr($1, 2);
- }
-
- if ($word in $dictionary)
- {
- return $word;
- }
- else
- {
- return $1;
- }
- }
-
- sub determiner
- {
- local('@determiners @top');
- @determiners = @('a', 'an', 'either', 'every', 'his', 'her', 'its', 'my', 'neither', 'one', 'our', 'that', 'the', 'their', 'this', 'your');
- @top = sort(lambda({ return Pbigram2($2, $word) <=> Pbigram2($1, $word); }, $word => $1), @determiners);
- return @top[$2];
- }
-
- sub determiner-u
- {
- local('$value $w');
- if (lc($1) in $dictionary) { $w = lc($1); }
- else { $w = $1; }
-
- $value = determiner($w);
- if (strlen($value) == 1) { return uc($value); }
- return uc(charAt($value, 0)) . substr($value, 1);
- }
-
- #
- # convert a verb to its base form
- #
- sub baseVerb
- {
- if ($1 in $verbs['base'])
- {
- return $1;
- }
- else if ($1 in $verbs['past'])
- {
- return $verbs['past'][$1]['base'];
- }
- else if ($1 in $verbs['participle'])
- {
- return $verbs['participle'][$1]['base'];
- }
- else if ($1 in $verbs['present'])
- {
- return $verbs['present'][$1]['base'];
- }
- else if ($1 in $verbs['plural'])
- {
- return $verbs['plural'][$1]['base'];
- }
- else if ([$1 endsWith: "ing"])
- {
- local('$base');
- $base = left($1, -3);
- if (right($base, 1) !isin "oy" && "$base $+ e" in $dictionary)
- {
- return "$base $+ e";
- }
- else
- {
- return $base;
- }
- }
- else if ([$1 endsWith: "ed"])
- {
- if ($1 ismatch "deed|exceed|heed|need|seed|speed|succeed|unheed|unneed|weed")
- {
- return $1;
- }
- else if ($1 eq "created")
- {
- return left($1, -1);
- }
- else if (left($1, -2) in $dictionary)
- {
- return left($1, -2);
- }
- else if (left($1, -1) in $dictionary)
- {
- return left($1, -1);
- }
- }
- else if ([$1 endsWith: "es"])
- {
- if ($1 in @('uses', 'changes', 'continues'))
- {
- return left($1, -1);
- }
- else if (left($1, -2) in $dictionary)
- {
- return left($1, -2);
- }
- }
- else if ([$1 endsWith: "s"])
- {
- return pluralToSingular($1);
- }
-
- return $1;
- }
-
- #
- # convert a verb to its past participle form
- #
- sub pastParticipleVerb
- {
- local('$base');
- $base = baseVerb($1);
- if ($base in $verbs['base'])
- {
- return $verbs['base'][$base]['participle'];
- }
-
- return simplePastVerb($base);
- }
-
- #
- # convert a verb to its simple past form
- #
- sub simplePastVerb
- {
- local('$base');
-
- $base = baseVerb($1);
- if ($base in $verbs['base'])
- {
- return $verbs['base'][$base]['past'];
- }
-
- if ([$base endsWith: "y"])
- {
- if ((left($base, -1) . 'ied') in $dictionary)
- {
- return left($base, -1) . 'ied';
- }
- }
-
- if ("$base $+ ed" !in $dictionary)
- {
- return "$base $+ d";
- }
-
- return "$base $+ ed";
- }
-
- #
- # convert a verb to its present participle form
- #
- sub presentParticipleVerb
- {
- local('$base');
-
- if ([$1 endsWith: "ed"] && (substr($1, -2) . "ing") in $dictionary)
- {
- return substr($1, -2) . "ing";
- }
-
- $base = baseVerb($1);
-
- if ($base in $verbs['base'])
- {
- return $verbs['base'][$base]['present'];
- }
-
- if ([$base endsWith: "e"] && $base ne "be")
- {
- return substr($base, 0, -1) . "ing";
- }
-
- return "$base $+ ing";
- }
-
- #
- # convert a singular to its plural form
- #
- sub singularToPlural
- {
- this('$mappings $words');
-
- if ($mappings is $null)
- {
- ($mappings, $words) = getWordMappings();
- $words = putAll(%(), values($words), keys($words));
-
- $mappings = copy($mappings);
- $mappings['s'] = $null;
- $mappings = putAll(%(), values($mappings), keys($mappings));
- }
-
- if ($1 in $words)
- {
- return $words[$1];
- }
- else if ($1 in $verbs['base'])
- {
- return $verbs['base'][$1]["plural"];
- }
- else
- {
- local('$e_plural $e_singular $temp');
-
- foreach $e_plural => $e_singular ($mappings)
- {
- if ([$1 endsWith: $e_plural])
- {
- $temp = substr($1, 0, -1 * strlen($e_plural)) . $e_singular;
-
- if ($temp in $dictionary)
- {
- return $temp;
- }
- }
- }
- }
-
- if (right($1, 1) ne "s" && "$1 $+ s" in $dictionary)
- {
- return "$1 $+ s";
- }
- return $1;
- }
-
- sub getWordMappings
- {
- this('$mappings $words');
- if ($mappings is $null)
- {
- $words = %(
- addenda => 'addendum',
- algae => 'alga',
- alumnae => 'alumna',
- alumni => 'alumnus',
- analyses => 'analysis',
- antennas => 'antenna',
- apparatuses => 'apparatus',
- appendices => 'appendix',
- axes => 'axis',
- bacilli => 'bacillus',
- bacteria => 'bacterium',
- bases => 'basis',
- beaux => 'beau',
- bison => 'bison',
- buffalos => 'buffalo',
- bureaus => 'bureau',
- busses => 'bus',
- cactuses => 'cactus',
- calves => 'calf',
- children => 'child',
- corps => 'corps',
- corpora => 'corpus',
- crises => 'crisis',
- criteria => 'criterion',
- curricula => 'curriculum',
- data => 'datum',
- deer => 'deer',
- dice => 'die',
- dwarfs => 'dwarf',
- diagnoses => 'diagnosis',
- echoes => 'echo',
- elves => 'elf',
- ellipses => 'ellipsis',
- embargoes => 'embargo',
- emphases => 'emphasis',
- errata => 'erratum',
- firemen => 'fireman',
- fish => 'fish',
- focuses => 'focus',
- feet => 'foot',
- formulas => 'formula',
- fungi => 'fungus',
- genera => 'genus',
- geese => 'goose',
- halves => 'half',
- heroes => 'hero',
- hippopotami => 'hippopotamus',
- hoofs => 'hoof',
- hypotheses => 'hypothesis',
- indices => 'index',
- knives => 'knife',
- leaves => 'leaf',
- lives => 'life',
- loaves => 'loaf',
- lice => 'louse',
- men => 'man',
- matrices => 'matrix',
- means => 'means',
- media => 'medium',
- memoranda => 'memorandum',
- millenniums => 'millennium',
- moose => 'moose',
- mosquitoes => 'mosquito',
- mice => 'mouse',
- nebulae => 'nebula',
- neuroses => 'neurosis',
- nuclei => 'nucleus',
- oases => 'oasis',
- octopi => 'octopus',
- ova => 'ovum',
- oxen => 'ox',
- paralyses => 'paralysis',
- parentheses => 'parenthesis',
- people => 'person',
- phenomena => 'phenomenon',
- potatoes => 'potato',
- prices => 'price',
- radii => 'radius',
- scarfs => 'scarf',
- selves => 'self',
- series => 'series',
- sheep => 'sheep',
- shelves => 'shelf',
- scissors => 'scissors',
- species => 'species',
- stimuli => 'stimulus',
- strata => 'stratum',
- syllabi => 'syllabus',
- symposia => 'symposium',
- syntheses => 'synthesis',
- synopses => 'synopsis',
- tableaux => 'tableau',
- those => 'that',
- theses => 'thesis',
- thieves => 'thief',
- these => 'this',
- tomatoes => 'tomato',
- teeth => 'tooth',
- torpedoes => 'torpedo',
- vertebrae => 'vertebra',
- vetoes => 'veto',
- vitae => 'vita',
- watches => 'watch',
- wives => 'wife',
- wolves => 'wolf',
- women => 'woman',
- zeros => 'zero',
-
- # words that we're not going to guess with our super elite c0de
- children => 'child',
- men => 'man',
- geese => 'goose',
- oxen => 'ox',
- women => 'woman',
- feet => 'foot',
- teeth => 'tooth',
- people => 'person',
-
- # weird endings I was too lazy to devise a scheme for
- bacteria => 'bacterium',
- corpora => 'corpus',
- criteria => 'criterion',
- curricula => 'curriculum',
- genera => 'genus',
- media => 'medium',
- memoranda => 'memorandum',
- phenomena => 'phenomenon',
- strata => 'stratum',
-
- # words that are the same whether plural or singular
- deer => 'deer',
- sheep => 'sheep',
- species => 'species',
- means => 'means',
- offspring => 'offspring',
- series => 'series',
- fish => 'fish',
- media => 'media', # this is debateable but I'd rather avoid the heart ache.
- data => 'data',
- bachelors => 'bachelors',
- masters => 'masters',
- tuna => 'tuna',
-
- # words that are always plural
- none => 'none',
- pants => 'pants',
- shorts => 'shorts',
- police => 'police',
- jeans => 'jeans',
- clippers => 'clippers',
- scissors => 'scissors',
- binoculars => 'binoculars',
- i => 'I',
- thermos => 'thermos',
- English => 'English',
- physics => 'physics',
- economics => 'economics',
- selfishness => 'selfishness',
- blues => 'blues'
- );
-
- $mappings = ohash(
- men => 'man',
- es => 'is',
- ices => 'ix',
- ies => 'y',
- eaux => 'eau',
- ae => 'a',
- ouse => 'ice',
- i => 'us',
- s => '');
- }
-
- return @($mappings, $words);
- }
-
- #
- # kill suffix
- #
- sub noSuffix
- {
- local('$strip');
-
- if ([$1 endsWith: "able"] || [$1 endsWith: "ible"])
- {
- $strip = left($1, -4);
-
- if ("$strip $+ ated" in $dictionary)
- {
- return "$strip $+ ated";
- }
- else if ("$strip $+ e" in $dictionary)
- {
- return "$strip $+ e";
- }
- else if ("$strip $+ y" in $dictionary)
- {
- return "$strip $+ y";
- }
- else if ($strip in $dictionary)
- {
- return $strip;
- }
- }
-
- return $1;
- }
-
- #
- # convert a plural noun to a singular noun (fun stuff)
- #
- sub pluralToSingular
- {
- local('$words $mappings');
- ($mappings, $words) = getWordMappings();
-
- if ($1 in $words)
- {
- return $words[$1];
- }
- else if ($1 in $verbs['plural'])
- {
- return $verbs['plural'][$1]['base'];
- }
- else
- {
- local('$e_plural $e_singular $temp');
-
- foreach $e_plural => $e_singular ($mappings)
- {
- if ([$1 endsWith: $e_plural])
- {
- $temp = substr($1, 0, -1 * strlen($e_plural)) . $e_singular;
-
- if ($temp in $dictionary)
- {
- return $temp;
- }
- }
- }
- }
-
- return $1;
- }
|