-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathcompose.py
198 lines (154 loc) · 5.85 KB
/
compose.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# 本文件用于将 poems.json 和 comprehensions.json 合并为 universal.json
import json, zhon.hanzi, string
# 屏蔽掉 comprehensions.json 有而 poems.json 无的内容
ignored_list = [
"竹里馆", "夜归鹿门歌", "菩萨蛮.其二", "阁夜",
"沁园春.长沙", "雨巷", "再别康桥", "荆轲刺秦王",
"记念刘和珍君", "秋兴八首其一", "咏怀古迹其三", "马嵬",
"寡人之于国也", "水龙吟.登建康赏心亭", "醉花阴", "廉颇蔺相如列传"
]
ignored_list = set(ignored_list)
def is_ignored(s) :
return s in ignored_list
# 标题比较需要跳过的内容
title_filter = string.punctuation + zhon.hanzi.punctuation + ' '
title_filter = set(title_filter)
# 理解性默写去重比较需要跳过的内容
comprehension_filter = string.punctuation + zhon.hanzi.punctuation + '$0123456789'
comprehension_filter = set(title_filter)
# 带有过滤器的字符串比较器
def str_cmp(a, b, filter) :
_a = ''.join(ch for ch in a if ch not in filter)
_b = ''.join(ch for ch in b if ch not in filter)
return _a == _b
# 判断标题一致
def judge_title(a, b) :
x = 'tag' in a
y = 'tag' in b
# 如果一个有 tag 一个无 tag 则他们必不匹配
if x ^ y :
return False
t = str_cmp(a['title'], b['title'], title_filter)
# 如果他们都没有 tag 的话那只需要匹配标题
if not x and not y :
return t
# 如果他们都有 tag 的话需要匹配 tag
return t and a['tag'] == b['tag']
# 判断理解性默写是否重复
def judge_duplicate(poem, comprehension) :
try :
comprehensions = poem['comprehensions']
except KeyError :
return True
for i in comprehensions :
if str_cmp(i['content'], comprehension["content"], comprehension_filter) :
return False
return True
# 检查 s 是否出现在 poem 中
def search_sentence(poem, s) :
if isinstance(poem, list) :
ret = False
for i in poem :
ret = ret or search_sentence(i, s)
return ret
elif isinstance(poem, str) :
return s == poem
else :
return False
# 检查理解性默写答案
def check_answer(poem, comprehension) :
for i in comprehension["answer"] :
if isinstance(i, str):
if not search_sentence(poem["normal"], i) :
return -1
elif isinstance(i, list) :
for j in i :
if not search_sentence(poem["content"], j) :
return -1
else :
return -2
return 0
# 合并理解性默写题目
def process_comprehension(comprehension) :
if is_ignored(comprehension["title"]) :
return 1 # 在屏蔽列表中
for i in universal_repositories :
if judge_title(i, comprehension):
if not judge_duplicate(i, comprehension) :
return -2 # 题目重复
status = check_answer(i, comprehension)
if status == -1 :
print("[警告] 理解性默写题目答案与文本不合")
print(comprehension)
if status == -2 :
print("[警告] 理解性默写题目格式错误")
print(comprehension)
return -3
if "source" not in i :
try :
i["source"] = comprehension["source"]
except KeyError :
pass
if "author" not in i :
try :
i["author"] = comprehension["author"]
except KeyError :
pass
single_record = {}
single_record["content"] = comprehension["content"]
single_record["answer"] = comprehension["answer"]
try :
comprehensions = i['comprehensions']
except KeyError :
i['comprehensions'] = []
comprehensions = i['comprehensions']
comprehensions.append(single_record)
return 0 # 正常处理
return -1 # 在 poems.json 中不存在对应的内容
# 主程序
if __name__ == "__main__":
normal_file = open("normal/poems.json", "rt", encoding='utf-8')
normal = json.loads(normal_file.read())
normal_file.close()
comprehensions_file = open("comprehensions/comprehensions.json", "rt", encoding='utf-8')
comprehensions = json.loads(comprehensions_file.read())
comprehensions_file.close()
universal = {}
universal_repositories = []
universal["$schema"] = "https://reciter.binkic.com/universal/schema.json"
universal["repositories"] = universal_repositories
for i in normal["poems"] :
single_record = {}
single_record["title"] = i["title"]
single_record["normal"] = i["content"]
try :
single_record["source"] = i["source"]
except KeyError :
pass
try :
single_record["author"] = i["author"]
except KeyError :
pass
try :
single_record["tag"] = i["tag"]
except KeyError :
pass
# 索引:title -> tag
# single_record["comprehensions"] = []
universal_repositories.append(single_record)
for comprehension in comprehensions["comprehensions"] :
status = process_comprehension(comprehension)
if status == -1 :
print(comprehension)
for i in range(0,len(universal_repositories)) :
old = universal_repositories[i]
new = {}
for j in ["title", "author", "source", "normal", "comprehensions"] :
try :
new[j] = old[j]
except KeyError :
pass
universal_repositories[i] = new
save = open("universal/universal.json", "w+", encoding='utf-8')
save.write( json.dumps(universal, indent = 4 , ensure_ascii = False ) )
save.close()