-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_kanji_flashcard
executable file
·105 lines (87 loc) · 2.77 KB
/
generate_kanji_flashcard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
"""
Fetch entries from jisho and save them to a csv.
Meant to get a .csv containing flashcards to import into Anki.
"""
import argparse
import csv
import json
import logging
import pdb
import requests
logging.basicConfig(level=logging.DEBUG)
# Example
WORDS = """
kangoshi
tomodachi
"""
def get_jisho_data(word, page=1):
url = f"https://jisho.org/api/v1/search/words?keyword={word}&page={page}"
# jlpt-based query.
# url_jlpt = 'https://jisho.org/api/v1/search/words?keyword=jlpt-n5&page={page}'
response = requests.get(url)
if response.status_code != 200:
raise Exception("Bad status code")
data = response.json()["data"]
return data
def build_entry(data, word):
for e in data:
if not e["jlpt"]:
continue
if "word" in e["japanese"][0]:
jap_word = e["japanese"][0]["word"]
else:
jap_word = ""
# pdb.set_trace()
pronunciation = e["japanese"][0]["reading"]
jlpt = ",".join(e["jlpt"])
for sense in e["senses"]:
meaning = ', '.join(sense["english_definitions"])
if 'Usually written using kana alone' in sense['tags']:
entry_word = pronunciation
entry_pronunciation = ''
else:
entry_word = jap_word
entry_pronunciation = pronunciation
entry = {
"word": entry_word,
"pronunciation": entry_pronunciation,
"meaning": meaning,
"jlpt": jlpt,
}
yield entry
def main(filename=None):
if filename is not None:
logging.info("Reading file: %s", filename)
with open(filename) as fs:
input = fs.read()
print(WORDS)
else:
logging.info("Reading hardcoded input.")
input = WORDS
input = input.strip()
logging.info("Using input:\nBEGIN\n%s\nEND", input)
entries = []
for word in input.splitlines():
if not word:
continue
data = get_jisho_data(word)
entries.extend(build_entry(data, word))
csv_file_path = "tmp.csv"
with open(csv_file_path, "w", newline="") as csv_file:
csv_writer = csv.writer(csv_file, delimiter="|")
csv_writer.writerow(entries[0].keys())
for entry in entries:
csv_writer.writerow(entry.values())
with open(csv_file_path) as fs:
logging.info("Wrote csv file:\n%s", fs.read())
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Build a flashcard csv from keywords.")
parser.add_argument(
"filename",
nargs="?",
type=str,
help="Path to the file containing the keywords",
)
args = parser.parse_args()
main(args.filename)