-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworker.py
50 lines (39 loc) · 1.15 KB
/
worker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from glob import glob
from os import path
import subprocess
from re import sub
# Source and dist folders
src = './docs/src/'
dist = './docs/ru/vol-1/'
# List with replacements
repls = [
[chr(173), ''],
['---', '--'],
[r'\[\]\{.+\}\*{2}.+\.\s([\w\s]+)\*{2}', r'## \1'],
# Images with description
[r'!\[\]\(media.+\)\{.+\}\n\n\*Фиг\.\s([\d\.]{3,5}.+)\*',
r'<svgz src="vol-1/01/" alt="\1" />'],
# Tables with description
[r'!\[\]\(media.+\)\{.+\}',
r'<svgz src="vol-1/01/" alt="img" />']
]
def beautify(text):
temp = text
for r in repls:
test, res = r
temp = sub(test, res, temp)
return temp
def main():
# Find files
files = glob('{}*.docx'.format(src))
for file in files:
# Convert to markdown
md_file = '{}{}.md'.format(dist, path.basename(file).split('.')[0])
subprocess.call(['pandoc', file, '-o', md_file, '--wrap=none'])
# Read and beautify
with open(md_file, 'r', encoding='utf-8') as f:
text = beautify(f.read())
with open(md_file, 'w', encoding='utf-8') as f:
f.write(text)
if __name__ == '__main__':
main()