-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransliterate.js
168 lines (136 loc) · 5.79 KB
/
transliterate.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/* eslint-disable
no-constructor-return,
no-use-before-define,
*/
/**
* Escapes RegExp characters in a string.
* @param {String} input
* @returns {String}
*/
function escapeRegExp(input) {
return input.replace(/[.*+?^${}()|[\]\\]/gu, `\\$&`)
}
/**
* Gets a random code point from the Unicode geometric shapes block
* @private
* @return {String} Returns the random Unicode character
*/
function getRandomCodePoint() {
const blockBegin = 9632
const blockLength = 95
return String.fromCodePoint(Math.floor(Math.random() * blockLength) + blockBegin)
}
/**
* An alias for the {@link transliterate} method
* @see transliterate
*/
function sanitize(string, substitutions) {
return transliterate(string, substitutions)
}
/**
* An alias for the {@link Transliterator} class
* @see Transliterator
*/
class Sanitizer {
constructor(substitutions) {
this.substitutions = substitutions
return string => sanitize(string, substitutions)
}
}
/**
* Makes a series of substitutions on a string. Can be used to convert a string from one writing system to another (a process known as "transliteration") or to remove unwanted characters or sequences of characters from a string (a process known as "sanitization").
* @param {String} [string=``] The string to transliterate or sanitize.
* @param {Object} [substitutions = new Map] A hash of substitutions to make on the string. Each key in this object should be a string of characters you want to replace, and the value for that key should be the new string of characters to replace it with. For example, setting `"s": "z"` will replace all `s` characters with `z`. To sanitize a string, provide each unwanted character or sequence of characters as as a key, and set the value of that key to an empty string. For example, setting `"ts": ""` in this object will remove all sequences of `ts` from the string (but leave individual instances of `t` and `s` that do not appear in sequence).
* @return {String} Returns a new string with all substitutions made.
* @example {@lang javascript}
* const substitutions = {
* tʼ: `d`,
* ts: `c`,
* };
*
* const input = `tsatʼ`;
* const output = transliterate(input, substitutions);
* console.log(output); // --> "cad"
*/
function transliterate(string = ``, subs = new Map) {
// Type Checking
if (typeof string !== `string`) {
throw new TypeError(`The first argument passed to the transliterate function must be a string.`)
}
if (!(subs instanceof Map || typeof subs === `object`)) {
throw new TypeError(`The substitutions object must be a Map or Object.`)
}
if (!(subs instanceof Map)) {
subs = new Map(Object.entries(subs))
}
const values = Array.from(subs.values())
if (!values.every(val => typeof val === `string`)) {
throw new TypeError(`Replacements must all be strings.`)
}
// Variables
const temps = new Map // Track of any temporary placeholders
let str = string // The string to manipulate
// Transliteration Steps
// Sort the substitutions by length of the input (avoids partial replacements)
subs = new Map(Array.from(subs.entries()).sort(([a], [b]) => b.length - a.length))
// Make each substitution on the string, using temporary placeholders if needed
for (const [input, replacement] of subs) {
// Add the escaped substitution to the set of substitutions to make
subs.set(input, replacement)
// Check for feeding problems, and create temporary placeholder substitutions if found
if (subs.get(replacement)) {
// Get a random temporary placeholder to substitute
let temp = getRandomCodePoint()
// Make sure you haven't already used that placeholder, and generate a new one if so
while (temps.has(temp)) temp = getRandomCodePoint()
// Add the placeholder to the set of temporary substitutions
temps.set(temp, replacement)
// Update the list of substitutions to use the temporary placeholder
subs.set(input, temp)
}
// Escape regexp special characters in the input
const escapedInput = escapeRegExp(input)
// Make the substitution on the string, using the temporary placeholder if present
const regexp = new RegExp(escapedInput, `gu`)
str = str.replace(regexp, subs.get(input))
}
// Replace the temporary placeholders with their original values
for (const [temp, replacement] of temps) {
const regexp = new RegExp(temp, `gu`)
str = str.replace(regexp, replacement)
}
// Return the transliterated string
return str
}
/**
* A Transliterator class that saves a set of transliteration rules for repeated use.
* @prop {Object} substitutions The set of substitution rules for this Transliterator. You can update the substitution rules used by this Transliterator at any time by modifying this object. See the {@link transliterate} method for documentation on how this substitutions object should be formatted.
* @example {@lang javascript}
* const substitutions = {
* tʼ: `d`,
* ts: `c`,
* };
*
* const transliterate = new Transliterator(substitutions);
* const input = `tsatʼ`;
* const output = transliterate(input);
* console.log(output); // --> "cad"
*/
class Transliterator {
/**
* Create a new Transliterator
* @param {Object} substitutions The set of substitution rules that this Transliterator should use. See the {@link transliterate} method for documentation on how this substitutions object should be formatted.
* @return {Function} Returns a transliterate function that accepts a string and makes the substitutions provided in the `transliterate` argument.
*/
constructor(substitutions) {
this.substitutions = substitutions
return string => transliterate(string, this.substitutions)
}
}
// Exports
export {
sanitize,
Sanitizer,
transliterate,
Transliterator,
}