Skip to content

Commit

Permalink
feat: helper function to convert aksara to latin
Browse files Browse the repository at this point in the history
  • Loading branch information
DedeKurnn committed Oct 28, 2024
1 parent 4701ee1 commit df9c810
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 31 deletions.
10 changes: 9 additions & 1 deletion deno.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 55 additions & 3 deletions src/mod.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { syllabify, getAksaraBySyllables } from "./utils.ts";

import { regexAnakHurufMap, regexIndukHurufMap } from "./regex.ts";
/**
* Converts a given Latin script string into its corresponding Aksara Lampung script.
*
Expand Down Expand Up @@ -32,7 +32,59 @@ export function getAksara(latin: string): string {
* @returns {string} The converted string in Latin script.
*/
export function getLatin(aksara: string): string {
// TODO: implement aksara to latin conversion
let latin: string = aksara;

// Induk huruf
latin = latin.replace(regexIndukHurufMap.a, "a");
latin = latin.replace(regexIndukHurufMap.ka, "ka");
latin = latin.replace(regexIndukHurufMap.ga, "ga");
latin = latin.replace(regexIndukHurufMap.pa, "pa");
latin = latin.replace(regexIndukHurufMap.ba, "ba");
latin = latin.replace(regexIndukHurufMap.ma, "ma");
latin = latin.replace(regexIndukHurufMap.ta, "ta");
latin = latin.replace(regexIndukHurufMap.da, "da");
latin = latin.replace(regexIndukHurufMap.na, "na");
latin = latin.replace(regexIndukHurufMap.ca, "ca");
latin = latin.replace(regexIndukHurufMap.ja, "ja");
latin = latin.replace(regexIndukHurufMap.ya, "ya");
latin = latin.replace(regexIndukHurufMap.la, "la");
latin = latin.replace(regexIndukHurufMap.ra, "ra");
latin = latin.replace(regexIndukHurufMap.sa, "sa");
latin = latin.replace(regexIndukHurufMap.wa, "wa");
latin = latin.replace(regexIndukHurufMap.ha, "ha");
latin = latin.replace(regexIndukHurufMap.gha, "gha");
latin = latin.replace(regexIndukHurufMap.nga, "nga");
latin = latin.replace(regexIndukHurufMap.nya, "nya");
latin = latin.replace(regexIndukHurufMap.kha, "kha");

// Anak huruf
latin = latin.replace(regexAnakHurufMap.nengen, function (a) {
return a.substring(0, a.length - 2) + "";
});
latin = latin.replace(regexAnakHurufMap.ulan_i, function (a) {
return a.substring(0, a.length - 2) + "i";
});
latin = latin.replace(regexAnakHurufMap.ulan_e, function (a) {
return a.substring(0, a.length - 2) + "é";
}); // Perlu dipertimbangkan alternatif untuk huruf é
latin = latin.replace(regexAnakHurufMap.bicek_e, function (a) {
return a.substring(0, a.length - 2) + "e";
});
latin = latin.replace(regexAnakHurufMap.bitan_o, function (a) {
return a.substring(0, a.length - 2) + "o";
});
latin = latin.replace(regexAnakHurufMap.bitan_u, function (a) {
return a.substring(0, a.length - 2) + "u";
});
latin = latin.replace(regexAnakHurufMap.tekelubang, "ng");
latin = latin.replace(regexAnakHurufMap.rejunjung, "r");
latin = latin.replace(regexAnakHurufMap.datasan, "n");
latin = latin.replace(regexAnakHurufMap.tekelungau, "u");
latin = latin.replace(regexAnakHurufMap.tekelingai, "i");
latin = latin.replace(regexAnakHurufMap.keleniah, "h");

return aksara;
const regextandabaca =
/[\,\!\@\.\*\+\?\$\^\/\\\;\:\'\"\[\]\{\}\(\)\%\#\$\^\-\+\=\_]/;
latin = latin.replace(regextandabaca, "");
return latin;
}
64 changes: 64 additions & 0 deletions src/regex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
export const regexIndukHurufMap = {
ka: /[KkQq]/g,
ga: /[g]/g,
nga: /[G]/g,
pa: /[PpFfvV]/g,
ba: /[Bb]/g,
ma: /[Mm]/g,
ta: /[Tt]/g,
da: /[Dd]/g,
na: /[n]/g,
ca: /[Cc]/g,
ja: /[JjZz]/g,
nya: /[N]/g,
ya: /[y]/g,
a: /[a]/g,
la: /[Ll]/g,
ra: /[r]/g,
sa: /[Ss]/g,
wa: /[w]/g,
ha: /[h]/g,
kha: /[H]/g,
gha: /[H]/g,
};

export const regexAnakHurufMap = {
nengen: /a[\/]/g,
ulan_i: /a[i]/g,
ulan_e: /a[E]/g,
bicek_e: /a[e]/g,
bitan_o: /a[Oo]/g,
bitan_u: /a[u]/g,
tekelubang: /X/g,
rejunjung: /R/g,
datasan: /A/g,
tekelungau: /W/g,
tekelingai: /I/g,
keleniah: /x/g,
};

export const regexPatterns = {
ng: /(ng)[aiueoAIUEO]/g,
ny: /(ny)[aiueoAIUEOkKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHh]/g,
gh: /(gh|kh)[aiueoAIUEOkKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHh]/g,
vocal: /^[iIuUeEoO]/,
tandabaca: /[,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]/,
ang: /[aiueoIUEO](ng)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
an: /[aiueoIUEO](n)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
ah: /[aiueoIUEO](h)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
ar: /[aiueoIUEO](r)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
ai: /[aiueoIUEO](i)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
au: /[aiueoIUEO](u)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
xawal:
/[aiueoIUEO][kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWH][kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWH][aiueoIUEO]/g,
x: /(?![aeiouAEIOU](R|r|u|n|h|ng|gh|kh|i))[aiueoIUEO]([kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWHnhriu])[,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]?$/g,
xng: /[aiueoIUEO]ng$/g,
anx: /[aiueoIUEO]n/g,
ahx: /[aiueoIUEO]h/g,
arx: /[aiueoIUEO]r\b/g,
aix: /[aiueoIUEO]i/g,
aux: /[aiueoIUEO]u/g,
a: /[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr][a]/g,
awal: /\b[iIuUeEoOXAR]/g,
digraph: /(?<!n|g|y|h)(?!ng|ny|gh|kh)[b-df-hj-np-tv-z]{2}/g,
};
28 changes: 2 additions & 26 deletions src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { regexPatterns } from "./regex.ts";

/**
* Splits a syllable into parts based on vowel pairs, allowing specified exceptions.
*
Expand Down Expand Up @@ -127,32 +129,6 @@ export const getAksaraBySyllables = (word: string) => {
.replaceAll("x", "ks")
.replaceAll("z", "j");

const regexPatterns = {
ng: /(ng)[aiueoAIUEO]/g,
ny: /(ny)[aiueoAIUEOkKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHh]/g,
gh: /(gh|kh)[aiueoAIUEOkKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHh]/g,
vocal: /^[iIuUeEoO]/,
tandabaca: /[,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]/,
ang: /[aiueoIUEO](ng)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
an: /[aiueoIUEO](n)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
ah: /[aiueoIUEO](h)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
ar: /[aiueoIUEO](r)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
ai: /[aiueoIUEO](i)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
au: /[aiueoIUEO](u)[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]]/g,
xawal:
/[aiueoIUEO][kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWH][kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWH][aiueoIUEO]/g,
x: /(?![aeiouAEIOU](R|r|u|n|h|ng|gh|kh|i))[aiueoIUEO]([kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWHnhriu])[,!@.*+?$^/\\;:'"[\]{}()%#^+=_-]?$/g,
xng: /[aiueoIUEO]ng$/g,
anx: /[aiueoIUEO]n/g,
ahx: /[aiueoIUEO]h/g,
arx: /[aiueoIUEO]r\b/g,
aix: /[aiueoIUEO]i/g,
aux: /[aiueoIUEO]u/g,
a: /[kKQqgpPfFvVbBmMtTdDcCjJzZyYlLsSwWGHhnr][a]/g,
awal: /\b[iIuUeEoOXAR]/g,
digraph: /(?<!n|g|y|h)(?!ng|ny|gh|kh)[b-df-hj-np-tv-z]{2}/g,
};

// Replace certain consonant with prefix
const patternPrefix = (aksara: string, pattern: RegExp, prefix: string) => {
if (aksara.match(/nya(?![ionuh])/gi)) {
Expand Down
9 changes: 8 additions & 1 deletion test/getAksara.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { assertEquals } from "jsr:@std/assert";
import { getAksara } from "../src/mod.ts";
import { getAksara, getLatin } from "../src/mod.ts";

Deno.test("getAksara should return correct aksara for given input", () => {
const input = "sikam haga mengan";
Expand All @@ -14,3 +14,10 @@ Deno.test("getAksara should handle empty input", () => {
const result = getAksara(input);
assertEquals(result, expectedOutput);
});

Deno.test("should convert aksara string back to latin", () => {
const input = "sikm/ hg meGA";
const expectedOutput = "sikam haga mengan";
const result = getLatin(input);
assertEquals(result, expectedOutput);
});

0 comments on commit df9c810

Please sign in to comment.