-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_docfill.py
229 lines (164 loc) · 6.83 KB
/
_docfill.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import os
import tempfile
import time
import docx
import locale
from copy import deepcopy
from lxml.etree import ElementBase
from docx.document import Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.text.run import CT_R
from docx.oxml.table import CT_Row, CT_Tc
from docx.oxml.ns import nsmap, qn
from _str_decoder import decode_escapes
nsmap['w15'] = "http://schemas.microsoft.com/office/word/2012/wordml"
class FieldOptions:
map_to: str
format: str | None = None
prefix: str | None = None
suffix: str | None = None
skipIfEmpty: bool | None = None
def __init__(self, map_to: str):
self.map_to = map_to
@staticmethod
def from_json(json):
if (isinstance(json, str)):
json = json.loads(json)
fieldOptions = FieldOptions(json['mapTo'])
if ('format' in json):
fieldOptions.format = json['format']
if ('prefix' in json):
fieldOptions.prefix = json['prefix']
if ('suffix' in json):
fieldOptions.prefix = json['suffix']
if ('skipIfEmpty' in json):
fieldOptions.skipIfEmpty = json['skipIfEmpty']
return fieldOptions
@staticmethod
def only_map(map_to: str):
return FieldOptions(map_to)
class PropValueParser:
options: FieldOptions
def __init__(self, options: FieldOptions):
self.options = options
def parse(self, value):
_val = value
if (self.options.format == 'currency'):
locale.setlocale(locale.LC_ALL, 'pt_BR')
_val = locale.format_string('%.2f', value, True)
if (self.options.prefix):
_val = str(self.options.prefix) + str(_val)
if (self.options.suffix):
_val = str(_val) + str(self.options.suffix)
return decode_escapes(str(_val))
def canFill(self, value):
return not self.options.skipIfEmpty or not (value is None or value == "" or value == 0)
def find_dropdown_pr(element):
dropdown = element.xpath('./w:dropDownList', namespaces=nsmap)
return dropdown[0] if len(dropdown) else None
def find_text_pr(element):
text = element.xpath('./w:text', namespaces=nsmap)
return text[0] if len(text) else None
def find_repeating_section_pr(element):
repeatingSection = element.xpath(
'./w15:repeatingSection', namespaces=nsmap)
return repeatingSection[0] if len(repeatingSection) else None
def find_wt(element, direct=True):
text = element.xpath('./w:t' if direct else './/w:t', namespaces=nsmap)
return text[0] if len(text) else None
def map_dropdown(dropdown_element: list[ElementBase]):
possible_values = {}
for listItem in dropdown_element:
possible_values[listItem.get(qn('w:value'))] = listItem.get(
qn('w:displayText'))
return possible_values
def map_row_to_std(row, std: ElementBase, value_parser: PropValueParser):
is_list = isinstance(row, list)
pr: ElementBase = list(std)[0]
sdtContent: ElementBase = list(std)[1]
target: ElementBase = None
if (len(sdtContent) > 1):
target = list(sdtContent)
else:
target = sdtContent[0]
if (isinstance(target, CT_Row)):
cell: CT_Tc
for index, cell in enumerate(list(target)):
run = get_first_paragraph_run_and_remove_others(cell)
if (is_list):
run.text = value_parser.parse(row[index])
def get_first_run_and_remove_others(element):
all_runs: CT_R = element.findall(qn('w:r'))
run: CT_R = all_runs[0]
remove_other_than_first(all_runs, element)
return run
def get_first_paragraph_run_and_remove_others(element):
p: CT_P = element.find(qn('w:p'))
return get_first_run_and_remove_others(p)
def remove_other_than_first(children, element):
i = 0
for child in children:
if (i > 0):
element.remove(child)
i += 1
def create_document(template_path: str, data={}, field_mapping: dict[str, FieldOptions] = {}, output_path: str = None, temp=True):
def get_field_options(field: str):
return field_mapping[field]
def get_prop_value_for_field(field: str):
prop = field_mapping[field].map_to
return data[prop]
document: Document = docx.Document(template_path)
for field in field_mapping:
field_options = get_field_options(field)
value_parser = PropValueParser(field_options)
if not value_parser.canFill(get_prop_value_for_field(field)):
continue
form_field_tag = document.element.xpath(
f'//w:tag[@w:val="{field}"]')[0]
form_field_pr: ElementBase = form_field_tag.getparent()
form_field_sdtContent: ElementBase = form_field_pr.getnext()
dropdown_pr = find_dropdown_pr(form_field_pr)
repeating_section_pr = find_repeating_section_pr(form_field_pr)
is_dropdown = dropdown_pr is not None
is_repeating_section = repeating_section_pr is not None
if (is_repeating_section):
prop_value: list = get_prop_value_for_field(field)
base_section = list(form_field_sdtContent)[0]
clone_base_section = deepcopy(base_section)
form_field_sdtContent.clear()
for row in prop_value:
clone_section = deepcopy(clone_base_section)
id = clone_section.xpath('//w:sdtPr/w:id', namespaces=nsmap)[0]
id.getparent().remove(id)
map_row_to_std(row, clone_section, value_parser)
form_field_sdtContent.append(clone_section)
continue
if (is_dropdown):
prop_value = str(get_prop_value_for_field(field))
possible_values = map_dropdown(dropdown_pr)
dropdown_t = find_wt(form_field_sdtContent, False)
dropdown_t.text = possible_values[prop_value]
continue
""" Unidentified content controls are handled as text by default """
prop_value = get_prop_value_for_field(field)
run: CT_R = None
for child in form_field_sdtContent:
if (isinstance(child, CT_R)):
run = child
break
if (isinstance(child, CT_P)):
run = get_first_run_and_remove_others(child)
break
remove_other_than_first(
list(form_field_sdtContent), form_field_sdtContent)
if (run is None):
str.exit(f"'{field}' not have 'w:p' or 'w:r' in 'sdtContent'")
run.text = value_parser.parse(prop_value)
file_suffix = f'{str(time.time()).replace(".", "_")}{os.path.splitext(template_path)[1]}'
if (output_path is None and temp):
output_path = os.path.join(tempfile.mkdtemp(), f'tmp_{file_suffix}')
if (output_path is None):
output_path = os.path.join(os.path.dirname(
template_path), f'output_{file_suffix}')
document.save(output_path)
return output_path