Skip to content

Commit

Permalink
use debug and revise aot using lowdb
Browse files Browse the repository at this point in the history
  • Loading branch information
sergeyt committed May 30, 2021
1 parent c65b110 commit 6bfcad2
Show file tree
Hide file tree
Showing 8 changed files with 277 additions and 30 deletions.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"scripts": {
"build": "tsdx build --format cjs",
"ci": "yarn test && yarn build",
"test": "tsdx test"
"test": "tsdx test",
"aot": "cd tools/aot && yarn start ../../db.json"
},
"husky": {
"hooks": {
Expand All @@ -19,6 +20,7 @@
"dependencies": {
"cheerio": "^1.0.0-rc.9",
"compromise": "^13.11.2",
"debug": "^4.3.1",
"fetch-ponyfill": "^7.1.0",
"js-base64": "^3.6.1",
"lodash": "^4.17.21"
Expand Down
8 changes: 6 additions & 2 deletions src/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import {
} from "./types";
import { makeEngine } from "./factory";

const debug = require("debug")("lingua-scraper");

export const sources: Source[] = [
unsplash,
wordnik,
Expand Down Expand Up @@ -249,14 +251,16 @@ export function makeParser(source: Source) {
url = source.url + url;
}

debug(`proc ${source.name} ${url}`);

if (source.getData) {
try {
const data = await source.getData(url, query);
const result = { source: takeMeta(source), data };
result.source.url = url;
return [result];
} catch (error) {
console.log("error", source.name, error);
debug("error", source.name, url, error);
return [{ source: takeMeta(source), error }];
}
}
Expand All @@ -265,7 +269,7 @@ export function makeParser(source: Source) {
const result = await processUrl(url, source.plan, source);
return [result];
} catch (error) {
console.log("error", source.name, error);
debug("error", source.name, url, error);
return [{ source: takeMeta(source), error }];
}
};
Expand Down
27 changes: 0 additions & 27 deletions tools/aot.ts

This file was deleted.

18 changes: 18 additions & 0 deletions tools/aot/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "aot",
"version": "1.0.0",
"type": "module",
"exports": "./main.js",
"engines": {
"node": ">=14.0.0"
},
"dependencies": {
"lingua-scraper": "^0.17.3",
"lowdb": "^2.1.0",
"p-series": "^2.1.0"
},
"scripts": {
"build": "tsc",
"start": "yarn build && node dist/main.js"
}
}
39 changes: 39 additions & 0 deletions tools/aot/src/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import _ from "lodash";
import { Low, JSONFile } from "lowdb";
import pSeries from "p-series";
import { fetchData, ogden, dolch } from "lingua-scraper";

async function main() {
const words = _.uniq(
ogden.categories
.flatMap((t) => t.words)
.concat(dolch.categories.flatMap((t) => t.words))
.map((t) => t.text)
);
type DbShape = {
words: {
text?: string;
data?: any;
};
};
const dbFile = process.argv[2] || "./db.json";
const db = new Low(new JSONFile<DbShape>(dbFile));
await db.read();
db.data ||= { words: {} };
const tasks = words.map((text, index) => async () => {
try {
const data = await fetchData({ text });
db.data.words[text] = data;
if ((index + 1) % 10 === 0) {
await db.write();
}
} catch (err) {
console.log(`fetch '${text}' fail:`, err);
}
});
await pSeries(tasks);
await db.write();
process.exit();
}

main().catch(console.error);
12 changes: 12 additions & 0 deletions tools/aot/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"compilerOptions": {
"outDir": "dist",
"target": "es5",
"sourceMap": true,
"moduleResolution": "node",
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"lib": ["ESNext", "DOM"],
"module": "ES2020"
}
}
192 changes: 192 additions & 0 deletions tools/aot/yarn.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1


"@sindresorhus/is@^0.15.0":
version "0.15.0"
resolved "https://registry.yarnpkg.com/@sindresorhus/is/-/is-0.15.0.tgz#96915baa05e6a6a1d137badf4984d3fc05820bb6"
integrity sha512-lu8BpxjAtRCAo5ifytTpCPCj99LF7o/2Myn+NXyNCBqvPYn7Pjd76AMmUB5l7XF1U6t0hcWrlEM5ESufW7wAeA==

boolbase@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24=

cheerio-select@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-1.4.0.tgz#3a16f21e37a2ef0f211d6d1aa4eff054bb22cdc9"
integrity sha512-sobR3Yqz27L553Qa7cK6rtJlMDbiKPdNywtR95Sj/YgfpLfy0u6CGJuaBKe5YE/vTc23SCRKxWSdlon/w6I/Ew==
dependencies:
css-select "^4.1.2"
css-what "^5.0.0"
domelementtype "^2.2.0"
domhandler "^4.2.0"
domutils "^2.6.0"

cheerio@^1.0.0-rc.5:
version "1.0.0-rc.9"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.9.tgz#a3ae6b7ce7af80675302ff836f628e7cb786a67f"
integrity sha512-QF6XVdrLONO6DXRF5iaolY+odmhj2CLj+xzNod7INPWMi/x9X4SOylH0S/vaPpX+AUU6t04s34SQNh7DbkuCng==
dependencies:
cheerio-select "^1.4.0"
dom-serializer "^1.3.1"
domhandler "^4.2.0"
htmlparser2 "^6.1.0"
parse5 "^6.0.1"
parse5-htmlparser2-tree-adapter "^6.0.1"
tslib "^2.2.0"

compromise@^13.10.4:
version "13.11.2"
resolved "https://registry.yarnpkg.com/compromise/-/compromise-13.11.2.tgz#4bc2dbe615fbc6d5c9f3cafe61327c61f427e10a"
integrity sha512-sAASylAtghacJm3tTIF2AyekjxNMGyWaIsQ3sDdJT1UcIa6mC7VbNLQBI3eSBitf+LwrvZTgrY1nWOetY4SScw==
dependencies:
efrt-unpack "2.2.0"

css-select@^4.1.2:
version "4.1.2"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-4.1.2.tgz#8b52b6714ed3a80d8221ec971c543f3b12653286"
integrity sha512-nu5ye2Hg/4ISq4XqdLY2bEatAcLIdt3OYGFc9Tm9n7VSlFBcfRv0gBNksHRgSdUDQGtN3XrZ94ztW+NfzkFSUw==
dependencies:
boolbase "^1.0.0"
css-what "^5.0.0"
domhandler "^4.2.0"
domutils "^2.6.0"
nth-check "^2.0.0"

css-what@^5.0.0:
version "5.0.1"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-5.0.1.tgz#3efa820131f4669a8ac2408f9c32e7c7de9f4cad"
integrity sha512-FYDTSHb/7KXsWICVsxdmiExPjCfRC4qRFBdVwv7Ax9hMnvMmEjP9RfxTEZ3qPZGmADDn2vAKSo9UcN1jKVYscg==

dom-serializer@^1.0.1, dom-serializer@^1.3.1:
version "1.3.2"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.3.2.tgz#6206437d32ceefaec7161803230c7a20bc1b4d91"
integrity sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.2.0"
entities "^2.0.0"

domelementtype@^2.0.1, domelementtype@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.2.0.tgz#9a0b6c2782ed6a1c7323d42267183df9bd8b1d57"
integrity sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==

domhandler@^4.0.0, domhandler@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.0.tgz#f9768a5f034be60a89a27c2e4d0f74eba0d8b059"
integrity sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA==
dependencies:
domelementtype "^2.2.0"

domutils@^2.5.2, domutils@^2.6.0:
version "2.6.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.6.0.tgz#2e15c04185d43fb16ae7057cb76433c6edb938b7"
integrity sha512-y0BezHuy4MDYxh6OvolXYsH+1EMGmFbwv5FKW7ovwMG6zTPWqNPq3WF9ayZssFq+UlKdffGLbOEaghNdaOm1WA==
dependencies:
dom-serializer "^1.0.1"
domelementtype "^2.2.0"
domhandler "^4.2.0"

[email protected]:
version "2.2.0"
resolved "https://registry.yarnpkg.com/efrt-unpack/-/efrt-unpack-2.2.0.tgz#b05dbec0fb8cb346a27840e00c969df9c72fee52"
integrity sha512-9xUSSj7qcUxz+0r4X3+bwUNttEfGfK5AH+LVa1aTpqdAfrN5VhROYCfcF+up4hp5OL7IUKcZJJrzAGipQRDoiQ==

entities@^2.0.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==

fetch-ponyfill@^7.1.0:
version "7.1.0"
resolved "https://registry.yarnpkg.com/fetch-ponyfill/-/fetch-ponyfill-7.1.0.tgz#4266ed48b4e64663a50ab7f7fcb8e76f990526d0"
integrity sha512-FhbbL55dj/qdVO3YNK7ZEkshvj3eQ7EuIGV2I6ic/2YiocvyWv+7jg2s4AyS0wdRU75s3tA8ZxI/xPigb0v5Aw==
dependencies:
node-fetch "~2.6.1"

htmlparser2@^6.1.0:
version "6.1.0"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.1.0.tgz#c4d762b6c3371a05dbe65e94ae43a9f845fb8fb7"
integrity sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.0.0"
domutils "^2.5.2"
entities "^2.0.0"

js-base64@^3.6.0:
version "3.6.1"
resolved "https://registry.yarnpkg.com/js-base64/-/js-base64-3.6.1.tgz#555aae398b74694b4037af1f8a5a6209d170efbe"
integrity sha512-Frdq2+tRRGLQUIQOgsIGSCd1VePCS2fsddTG5dTCqR0JHgltXWfsxnY0gIXPoMeRmdom6Oyq+UMOFg5suduOjQ==

lingua-scraper@^0.17.3:
version "0.17.3"
resolved "https://registry.yarnpkg.com/lingua-scraper/-/lingua-scraper-0.17.3.tgz#a955e02a12412cf789bfc7c69f13d958761ac6a9"
integrity sha512-Pd1Obmjyi1vLMe/TgW+Lu3SsZIRTpKxYUX7BHgveGga6bMj7Buv/ooOgCivCornB5tjIwW+Zu0GSz60EBdnI/g==
dependencies:
cheerio "^1.0.0-rc.5"
compromise "^13.10.4"
fetch-ponyfill "^7.1.0"
js-base64 "^3.6.0"
lodash "^4.17.21"

lodash@^4.17.21:
version "4.17.21"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==

lowdb@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/lowdb/-/lowdb-2.1.0.tgz#c8063e228b5ab3e082ece90e0512537ecb6e1e2a"
integrity sha512-F4Go8/V37gAidTR3c5poyjprOpZSDNSLJVOmI0ny4D4q9rC37OkBhlzX0bqj7LZlT3UIj4FchmZrrSw7qY+eGQ==
dependencies:
steno "^1.0.0"

node-fetch@~2.6.1:
version "2.6.1"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052"
integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==

nth-check@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.0.0.tgz#1bb4f6dac70072fc313e8c9cd1417b5074c0a125"
integrity sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==
dependencies:
boolbase "^1.0.0"

p-reduce@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/p-reduce/-/p-reduce-2.1.0.tgz#09408da49507c6c274faa31f28df334bc712b64a"
integrity sha512-2USApvnsutq8uoxZBGbbWM0JIYLiEMJ9RlaN7fAzVNb9OZN0SHjjTTfIcb667XynS5Y1VhwDJVDa72TnPzAYWw==

p-series@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/p-series/-/p-series-2.1.0.tgz#7035b3a81e2644d4ba407c1ebbc21776e353fa29"
integrity sha512-vEAnkG1ikRT1kPBrKwpj7AFYQkd1hjt/oHeppxtpoPxy5gEt+OWiHZJN3tMqvFa+UJfVwO3lwHoMUpMYBLKnaQ==
dependencies:
"@sindresorhus/is" "^0.15.0"
p-reduce "^2.1.0"

parse5-htmlparser2-tree-adapter@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz#2cdf9ad823321140370d4dbf5d3e92c7c8ddc6e6"
integrity sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==
dependencies:
parse5 "^6.0.1"

parse5@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b"
integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==

steno@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/steno/-/steno-1.0.0.tgz#475e32c6066ec9760229eaaf1550601764fbecba"
integrity sha512-C/KgCvEa1yWnpHmaPjAXrz1yWxh6hs+HvhqqPa71euaQmNi1wr4+WFo57VQxjKKuFl2KqS7gtlrN0oxj2noQLw==

tslib@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.2.0.tgz#fb2c475977e35e241311ede2693cee1ec6698f5c"
integrity sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==
7 changes: 7 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,13 @@ debug@^2.2.0, debug@^2.3.3, debug@^2.6.9:
dependencies:
ms "2.0.0"

debug@^4.3.1:
version "4.3.1"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
dependencies:
ms "2.1.2"

decamelize@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
Expand Down

0 comments on commit 6bfcad2

Please sign in to comment.