main
1#!/usr/bin/env python3
2"""Apply text replacements to PowerPoint presentation.
3
4Usage:
5 python replace.py <input.pptx> <replacements.json> <output.pptx>
6
7The replacements JSON should have the structure output by inventory.py.
8ALL text shapes identified by inventory.py will have their text cleared
9unless "paragraphs" is specified in the replacements for that shape.
10"""
11
12import json
13import sys
14from pathlib import Path
15from typing import Any, Dict, List
16
17from inventory import InventoryData, extract_text_inventory
18from pptx import Presentation
19from pptx.dml.color import RGBColor
20from pptx.enum.dml import MSO_THEME_COLOR
21from pptx.enum.text import PP_ALIGN
22from pptx.oxml.xmlchemy import OxmlElement
23from pptx.util import Pt
24
25
26def clear_paragraph_bullets(paragraph):
27 """Clear bullet formatting from a paragraph."""
28 pPr = paragraph._element.get_or_add_pPr()
29
30 # Remove existing bullet elements
31 for child in list(pPr):
32 if (
33 child.tag.endswith("buChar")
34 or child.tag.endswith("buNone")
35 or child.tag.endswith("buAutoNum")
36 or child.tag.endswith("buFont")
37 ):
38 pPr.remove(child)
39
40 return pPr
41
42
43def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
44 """Apply formatting properties to a paragraph."""
45 # Get the text but don't set it on paragraph directly yet
46 text = para_data.get("text", "")
47
48 # Get or create paragraph properties
49 pPr = clear_paragraph_bullets(paragraph)
50
51 # Handle bullet formatting
52 if para_data.get("bullet", False):
53 level = para_data.get("level", 0)
54 paragraph.level = level
55
56 # Calculate font-proportional indentation
57 font_size = para_data.get("font_size", 18.0)
58 level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
59 hanging_indent_emu = int(-font_size * 0.8 * 12700)
60
61 # Set indentation
62 pPr.attrib["marL"] = str(level_indent_emu)
63 pPr.attrib["indent"] = str(hanging_indent_emu)
64
65 # Add bullet character
66 buChar = OxmlElement("a:buChar")
67 buChar.set("char", "•")
68 pPr.append(buChar)
69
70 # Default to left alignment for bullets if not specified
71 if "alignment" not in para_data:
72 paragraph.alignment = PP_ALIGN.LEFT
73 else:
74 # Remove indentation for non-bullet text
75 pPr.attrib["marL"] = "0"
76 pPr.attrib["indent"] = "0"
77
78 # Add buNone element
79 buNone = OxmlElement("a:buNone")
80 pPr.insert(0, buNone)
81
82 # Apply alignment
83 if "alignment" in para_data:
84 alignment_map = {
85 "LEFT": PP_ALIGN.LEFT,
86 "CENTER": PP_ALIGN.CENTER,
87 "RIGHT": PP_ALIGN.RIGHT,
88 "JUSTIFY": PP_ALIGN.JUSTIFY,
89 }
90 if para_data["alignment"] in alignment_map:
91 paragraph.alignment = alignment_map[para_data["alignment"]]
92
93 # Apply spacing
94 if "space_before" in para_data:
95 paragraph.space_before = Pt(para_data["space_before"])
96 if "space_after" in para_data:
97 paragraph.space_after = Pt(para_data["space_after"])
98 if "line_spacing" in para_data:
99 paragraph.line_spacing = Pt(para_data["line_spacing"])
100
101 # Apply run-level formatting
102 if not paragraph.runs:
103 run = paragraph.add_run()
104 run.text = text
105 else:
106 run = paragraph.runs[0]
107 run.text = text
108
109 # Apply font properties
110 apply_font_properties(run, para_data)
111
112
113def apply_font_properties(run, para_data: Dict[str, Any]):
114 """Apply font properties to a text run."""
115 if "bold" in para_data:
116 run.font.bold = para_data["bold"]
117 if "italic" in para_data:
118 run.font.italic = para_data["italic"]
119 if "underline" in para_data:
120 run.font.underline = para_data["underline"]
121 if "font_size" in para_data:
122 run.font.size = Pt(para_data["font_size"])
123 if "font_name" in para_data:
124 run.font.name = para_data["font_name"]
125
126 # Apply color - prefer RGB, fall back to theme_color
127 if "color" in para_data:
128 color_hex = para_data["color"].lstrip("#")
129 if len(color_hex) == 6:
130 r = int(color_hex[0:2], 16)
131 g = int(color_hex[2:4], 16)
132 b = int(color_hex[4:6], 16)
133 run.font.color.rgb = RGBColor(r, g, b)
134 elif "theme_color" in para_data:
135 # Get theme color by name (e.g., "DARK_1", "ACCENT_1")
136 theme_name = para_data["theme_color"]
137 try:
138 run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
139 except AttributeError:
140 print(f" WARNING: Unknown theme color name '{theme_name}'")
141
142
143def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
144 """Detect text overflow in shapes (text exceeding shape bounds).
145
146 Returns dict of slide_key -> shape_key -> overflow_inches.
147 Only includes shapes that have text overflow.
148 """
149 overflow_map = {}
150
151 for slide_key, shapes_dict in inventory.items():
152 for shape_key, shape_data in shapes_dict.items():
153 # Check for frame overflow (text exceeding shape bounds)
154 if shape_data.frame_overflow_bottom is not None:
155 if slide_key not in overflow_map:
156 overflow_map[slide_key] = {}
157 overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
158
159 return overflow_map
160
161
162def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
163 """Validate that all shapes in replacements exist in inventory.
164
165 Returns list of error messages.
166 """
167 errors = []
168
169 for slide_key, shapes_data in replacements.items():
170 if not slide_key.startswith("slide-"):
171 continue
172
173 # Check if slide exists
174 if slide_key not in inventory:
175 errors.append(f"Slide '{slide_key}' not found in inventory")
176 continue
177
178 # Check each shape
179 for shape_key in shapes_data.keys():
180 if shape_key not in inventory[slide_key]:
181 # Find shapes without replacements defined and show their content
182 unused_with_content = []
183 for k in inventory[slide_key].keys():
184 if k not in shapes_data:
185 shape_data = inventory[slide_key][k]
186 # Get text from paragraphs as preview
187 paragraphs = shape_data.paragraphs
188 if paragraphs and paragraphs[0].text:
189 first_text = paragraphs[0].text[:50]
190 if len(paragraphs[0].text) > 50:
191 first_text += "..."
192 unused_with_content.append(f"{k} ('{first_text}')")
193 else:
194 unused_with_content.append(k)
195
196 errors.append(
197 f"Shape '{shape_key}' not found on '{slide_key}'. "
198 f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
199 )
200
201 return errors
202
203
204def check_duplicate_keys(pairs):
205 """Check for duplicate keys when loading JSON."""
206 result = {}
207 for key, value in pairs:
208 if key in result:
209 raise ValueError(f"Duplicate key found in JSON: '{key}'")
210 result[key] = value
211 return result
212
213
214def apply_replacements(pptx_file: str, json_file: str, output_file: str):
215 """Apply text replacements from JSON to PowerPoint presentation."""
216
217 # Load presentation
218 prs = Presentation(pptx_file)
219
220 # Get inventory of all text shapes (returns ShapeData objects)
221 # Pass prs to use same Presentation instance
222 inventory = extract_text_inventory(Path(pptx_file), prs)
223
224 # Detect text overflow in original presentation
225 original_overflow = detect_frame_overflow(inventory)
226
227 # Load replacement data with duplicate key detection
228 with open(json_file, "r") as f:
229 replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
230
231 # Validate replacements
232 errors = validate_replacements(inventory, replacements)
233 if errors:
234 print("ERROR: Invalid shapes in replacement JSON:")
235 for error in errors:
236 print(f" - {error}")
237 print("\nPlease check the inventory and update your replacement JSON.")
238 print(
239 "You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
240 )
241 raise ValueError(f"Found {len(errors)} validation error(s)")
242
243 # Track statistics
244 shapes_processed = 0
245 shapes_cleared = 0
246 shapes_replaced = 0
247
248 # Process each slide from inventory
249 for slide_key, shapes_dict in inventory.items():
250 if not slide_key.startswith("slide-"):
251 continue
252
253 slide_index = int(slide_key.split("-")[1])
254
255 if slide_index >= len(prs.slides):
256 print(f"Warning: Slide {slide_index} not found")
257 continue
258
259 # Process each shape from inventory
260 for shape_key, shape_data in shapes_dict.items():
261 shapes_processed += 1
262
263 # Get the shape directly from ShapeData
264 shape = shape_data.shape
265 if not shape:
266 print(f"Warning: {shape_key} has no shape reference")
267 continue
268
269 # ShapeData already validates text_frame in __init__
270 text_frame = shape.text_frame # type: ignore
271
272 text_frame.clear() # type: ignore
273 shapes_cleared += 1
274
275 # Check for replacement paragraphs
276 replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
277 if "paragraphs" not in replacement_shape_data:
278 continue
279
280 shapes_replaced += 1
281
282 # Add replacement paragraphs
283 for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
284 if i == 0:
285 p = text_frame.paragraphs[0] # type: ignore
286 else:
287 p = text_frame.add_paragraph() # type: ignore
288
289 apply_paragraph_properties(p, para_data)
290
291 # Check for issues after replacements
292 # Save to a temporary file and reload to avoid modifying the presentation during inventory
293 # (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
294 import tempfile
295
296 with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
297 tmp_path = Path(tmp.name)
298 prs.save(str(tmp_path))
299
300 try:
301 updated_inventory = extract_text_inventory(tmp_path)
302 updated_overflow = detect_frame_overflow(updated_inventory)
303 finally:
304 tmp_path.unlink() # Clean up temp file
305
306 # Check if any text overflow got worse
307 overflow_errors = []
308 for slide_key, shape_overflows in updated_overflow.items():
309 for shape_key, new_overflow in shape_overflows.items():
310 # Get original overflow (0 if there was no overflow before)
311 original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
312
313 # Error if overflow increased
314 if new_overflow > original + 0.01: # Small tolerance for rounding
315 increase = new_overflow - original
316 overflow_errors.append(
317 f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
318 f'(was {original:.2f}", now {new_overflow:.2f}")'
319 )
320
321 # Collect warnings from updated shapes
322 warnings = []
323 for slide_key, shapes_dict in updated_inventory.items():
324 for shape_key, shape_data in shapes_dict.items():
325 if shape_data.warnings:
326 for warning in shape_data.warnings:
327 warnings.append(f"{slide_key}/{shape_key}: {warning}")
328
329 # Fail if there are any issues
330 if overflow_errors or warnings:
331 print("\nERROR: Issues detected in replacement output:")
332 if overflow_errors:
333 print("\nText overflow worsened:")
334 for error in overflow_errors:
335 print(f" - {error}")
336 if warnings:
337 print("\nFormatting warnings:")
338 for warning in warnings:
339 print(f" - {warning}")
340 print("\nPlease fix these issues before saving.")
341 raise ValueError(
342 f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
343 )
344
345 # Save the presentation
346 prs.save(output_file)
347
348 # Report results
349 print(f"Saved updated presentation to: {output_file}")
350 print(f"Processed {len(prs.slides)} slides")
351 print(f" - Shapes processed: {shapes_processed}")
352 print(f" - Shapes cleared: {shapes_cleared}")
353 print(f" - Shapes replaced: {shapes_replaced}")
354
355
356def main():
357 """Main entry point for command-line usage."""
358 if len(sys.argv) != 4:
359 print(__doc__)
360 sys.exit(1)
361
362 input_pptx = Path(sys.argv[1])
363 replacements_json = Path(sys.argv[2])
364 output_pptx = Path(sys.argv[3])
365
366 if not input_pptx.exists():
367 print(f"Error: Input file '{input_pptx}' not found")
368 sys.exit(1)
369
370 if not replacements_json.exists():
371 print(f"Error: Replacements JSON file '{replacements_json}' not found")
372 sys.exit(1)
373
374 try:
375 apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
376 except Exception as e:
377 print(f"Error applying replacements: {e}")
378 import traceback
379
380 traceback.print_exc()
381 sys.exit(1)
382
383
384if __name__ == "__main__":
385 main()