Skip to content

Commit

Permalink
Fix build for bun
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvNC committed Jul 11, 2024
1 parent 16bce41 commit 37d7c09
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 15 deletions.
Binary file modified bun.lockb
Binary file not shown.
19 changes: 19 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,22 @@ div.gloss-sc-div[data-sc-wikipedia='term-specifier'] {
## License

The code in this repository is licensed under the MIT license.

## Dev

This project uses bun.

To download the abstracts for a language, run:

```sh
/src/downloadDump.sh ja 2022.12.01
```

To build a dictionary, run:

```sh
bun run start ja 2022-12-01
```

where `ja` is the language code and `2022.12.01` is the date of the dump (there
are no newer DBPedia versions).
38 changes: 23 additions & 15 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import fs from 'fs';
import path from 'path';
import readline from 'readline';
import { file } from 'bun';
import { Dictionary, TermEntry } from 'yomichan-dict-builder';

import { parseLine } from './parse/parseLine';
Expand All @@ -27,23 +25,34 @@ const shortAbstractFile = (lang: string) =>
console.log(`Converting ${lang} Wikipedia dump from ${date}...`);

const filePath = shortAbstractFile(lang);
const fileStream = fs.createReadStream(filePath);
const lineReader = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
const fileHandle = file(filePath);
const fileReader = fileHandle.stream();
const lineReader = fileReader.getReader();

const dict = new Dictionary({
// @ts-ignore
fileName: outputZipName(lang, date, version),
});

let processedLines = 0;
for await (const line of lineReader) {
await processLine(line, dict, lang);
processedLines++;
if (processedLines % 1000 === 0) {
console.log(`Processed ${processedLines} lines`);
let buffer = '';

while (true) {
const { done, value } = await lineReader.read();
if (done) break;

buffer += new TextDecoder().decode(value);
let lineEnd;

while ((lineEnd = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, lineEnd);
await processLine(line.trim(), dict, lang);
buffer = buffer.slice(lineEnd + 1);
processedLines++;

if (processedLines % 1000 === 0) {
console.log(`Processed ${processedLines} lines`);
}
}
}

Expand Down Expand Up @@ -154,8 +163,7 @@ function processLine(line: string, dict: Dictionary, lang: string) {

function readArgs() {
// Read arguments: node convertWikipedia.js [language] [date of dump]
const langInput =
process.argv[2].toLowerCase() as keyof typeof languages;
const langInput = process.argv[2].toLowerCase() as keyof typeof languages;
// Assert language is valid
if (!languages[langInput]) {
throw new Error(
Expand Down

0 comments on commit 37d7c09

Please sign in to comment.