-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpptxreplace.py
112 lines (94 loc) · 4.32 KB
/
pptxreplace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""Powerpoint file replace text module"""
import logging # noqa: E402
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx.enum.shapes import PP_PLACEHOLDER
from config import replace_substring
# def remove_metadata_from_app_xml(prs):
# """There is currently no functionality for handling app.xml so
# have to find the part and then alter its blob manually
# """
# package_parts = prs.part.package.parts
# for part in package_parts:
# if part.partname.endswith('app.xml'):
# app_xml_part = part
# app_xml = app_xml_part.blob.decode('utf-8')
# tags_to_remove = ('Company', 'Manager', 'HyperlinkBase')
# for tag in tags_to_remove:
# pattern = f'<{tag}>.*<\/{tag}>'
# app_xml = re.sub(pattern, '', app_xml)
# app_xml_part.blob = bytearray(app_xml, 'utf-8')
def process_shape(shape_parent, data, replaced, verbose=False):
for shape in shape_parent.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
for key, value in data.items():
txt_old = run.text
run.text, n = replace_substring(
run.text, key, value)
replaced[value] += n
print(
f'{txt_old} -> {run.text}') if verbose else None
if shape.has_table:
table = shape.table
for cell in table.iter_cells():
# here you can access the text in cell by using
# cell.text
# just remember that the shape object refers to the table in this context not the cell
for key, value in data.items():
txt_old = cell.text
cell.text, n = replace_substring(
cell.text, key, value)
replaced[value] += n
print(
f'{txt_old} -> {cell.text}') if verbose else None
if shape.has_chart:
chart = shape.chart
for series in chart.series:
for key, value in data.items():
txt_old = series.name
txt_new, n = replace_substring(
txt_old, key, value)
replaced[value] += n
series.name.replace(txt_old, txt_new)
print(
f'{txt_old} -> {txt_new}') if verbose else None
for categ in chart.plots[0].categories:
for key, value in data.items():
txt_old = categ
categ, n = replace_substring(
txt_old, key, value)
replaced[value] += n
print(
f'{txt_old} -> {categ}') if verbose else None
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
process_shape(shape, data, replaced, verbose)
if shape.is_placeholder:
ph = shape.placeholder_format
if ph.type == PP_PLACEHOLDER.FOOTER or ph.type == PP_PLACEHOLDER.HEADER:
# print('%d, %s' % (ph.idx, ph.type))
tidx = shape_parent.shapes[ph.idx]
sp = tidx.element
sp.getparent().remove(shape.element)
def replace_pptx(file_path: str, new_path: str, data: dict) -> None:
prs = Presentation(file_path)
# remove_metadata_from_app_xml(prs)
# text_runs will be populated with a list of strings,
# one for each text run in presentation
replaced = {value: 0 for value in data.values()}
verbose = False
for slide in prs.slides:
if slide.has_notes_slide:
notes_slide = slide.notes_slide
for key, value in data.items():
txt_old = notes_slide.notes_text_frame.text
notes_slide.notes_text_frame.text, n = replace_substring(
notes_slide.notes_text_frame.text, key, value)
replaced[value] += n
print(
f'{txt_old} -> {notes_slide.notes_text_frame.text}') if verbose else None
process_shape(slide, data, replaced, verbose)
logging.info(f'Replacements:\n {replaced}')
prs.save(new_path)
logging.info(f'New PPTX file saved to {new_path}')