-
Notifications
You must be signed in to change notification settings - Fork 0
/
translate.ts
executable file
·69 lines (58 loc) · 2.03 KB
/
translate.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env deno run --allow-net=api.openai.com --allow-write --allow-read --allow-env
import { gpt } from './gpt.ts';
import { read, write } from './utils.ts';
const REDUNDANCY = 10;
const LINES_PER_REQUEST = 100;
traslateSrtFile('captions.srt');
async function traslateSrtFile(filename: string) {
const content = await read(filename);
const lines = parseSrtFile(content);
const result = [] as string[];
write('input.json', lines);
for (let i = 0; i < lines.length; i += LINES_PER_REQUEST) {
const isFirst = i === 0;
const chunk = lines
.slice(isFirst ? i : i - REDUNDANCY, i + LINES_PER_REQUEST)
.join('\n');
console.warn('Translating lines', i, 'to', i + LINES_PER_REQUEST);
const response = (await translate(chunk)).split('\n');
result.push(...(isFirst ? response : response.slice(REDUNDANCY)));
}
write('output.json', result);
console.log(result);
}
function parseSrtFile(content: string) {
return (
content
.split(/\n/)
// remove whitespace at start and end of lines
.map((x) => x.trim())
// remove lines that only contain digits
.filter((x) => !/^\d+$/.test(x))
// remove lines that contain '-->' (timestamp lines)
.filter((x) => !x.includes('-->'))
// remove empty lines
// this means "convert the strings to booleans and filter out the false ones"
// is the same as .filter(line => Boolean(line)) or .filter(line => !!line)
.filter(Boolean)
);
}
async function translate(text: string, lang = 'Spanish') {
const response = await gpt(
'gpt-4',
{
role: 'system',
content: `
You translate video scripts to ${lang}.
Keep the line breaks, the result should have exactly as many lines as the input.
`.trim(),
},
{ role: 'user', content: text },
{
role: 'assistant',
content:
"Okay, here is the caption file to Spanish. I've kept the line breaks so the result has the same number of line breaks as the original.",
}
);
return response.choices[0].message.content;
}