Skip to content

Commit

Permalink
wip: need to decline gerunds
Browse files Browse the repository at this point in the history
  • Loading branch information
noomorph committed Dec 15, 2023
1 parent 786f5cc commit b4ad5b6
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 453 deletions.
1 change: 1 addition & 0 deletions scripts/dictionary.txt

Large diffs are not rendered by default.

51 changes: 38 additions & 13 deletions scripts/generate-rule-exceptions.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import fs from "node:fs";
import utils from '../dist/index.js';
import _ from 'lodash';

function* extractWords(str) {
// include letters and combining marks
Expand All @@ -18,20 +19,27 @@ function* extractWordsFromFile(filePath) {
yield* extractWords(raw);
}

function* allWords() {
function* readAllWords() {
yield* extractWordsFromFile('src/adjective/testCases.json');
yield* extractWordsFromFile('src/noun/__snapshots__/declensionNoun.test.ts.snap');
yield* extractWordsFromFile('src/numeral/testCases.json');
yield* extractWordsFromFile('src/pronoun/testCases.json');
yield* extractWordsFromFile('src/verb/testCases.json');
yield* extractWordsFromFile('scripts/misc-nj-exceptions.txt');
yield* extractWordsFromFile('scripts/dictionary.txt');
}

console.log('Reading all words...');
const allWords = _.uniq([...readAllWords()]).map(word => {
const lower = word.toLowerCase();
const standard = utils.transliterate(lower, 'art-Latn-x-interslv');
return [lower, standard];
});

function buildExceptionList(predicate) {
const set = new Set();
for (const word of allWords()) {
if (predicate(word)) {
set.add(utils.transliterate(word.toLowerCase(), 'art-Latn-x-interslv'));
for (const [lower, standard] of allWords) {
if (predicate(lower, standard)) {
set.add(standard);
}
}
return [...set].sort();
Expand Down Expand Up @@ -80,25 +88,43 @@ function containsLjj(word) {
return word.includes('ľj');
}

function endsWithNj(word) {
function containsNjj(word, standard) {
return word.includes('ńj') && !endsWithNonTypicalNje(standard) && !standard.endsWith('nju');
}

function endsWithNonTypicalNje(word) {
return word.endsWith('nja')
|| word.endsWith('njah')
|| word.endsWith('njam')
|| word.endsWith('njami')
|| word.endsWith('nje')
|| word.endsWith('njem')
|| word.endsWith('nju')
|| word.endsWith('njų');
|| word.endsWith('njem');
}

function endsWithNonTypicalNjju(word) {
return word.endsWith('ńju') || word.endsWith('ńjų');
}

console.log('Generating fixtures...');

fs.writeFileSync(
'src/transliterate/lj-nj/exceptions-lj.json',
'src/transliterate/lj-nj/list-ljj.json',
generateRuleExceptions(containsLjj)
);

fs.writeFileSync(
'src/transliterate/lj-nj/exceptions-nj.json',
generateRuleExceptions(endsWithNj)
'src/transliterate/lj-nj/list-njj.json',
generateRuleExceptions(containsNjj)
);

fs.writeFileSync(
'src/transliterate/lj-nj/exceptions-nje.json',
generateRuleExceptions(endsWithNonTypicalNje)
);

fs.writeFileSync(
'src/transliterate/lj-nj/exceptions-njju.json',
generateRuleExceptions(endsWithNonTypicalNjju)
);

fs.writeFileSync(
Expand All @@ -110,6 +136,5 @@ fs.writeFileSync(
'njami%',
'nje%',
'njem%',
'nju%',
]),
);
Loading

0 comments on commit b4ad5b6

Please sign in to comment.