skills/skills/pptx/scripts/replace.py at main

  1#!/usr/bin/env python3
  2"""Apply text replacements to PowerPoint presentation.
  3
  4Usage:
  5    python replace.py <input.pptx> <replacements.json> <output.pptx>
  6
  7The replacements JSON should have the structure output by inventory.py.
  8ALL text shapes identified by inventory.py will have their text cleared
  9unless "paragraphs" is specified in the replacements for that shape.
 10"""
 11
 12import json
 13import sys
 14from pathlib import Path
 15from typing import Any, Dict, List
 16
 17from inventory import InventoryData, extract_text_inventory
 18from pptx import Presentation
 19from pptx.dml.color import RGBColor
 20from pptx.enum.dml import MSO_THEME_COLOR
 21from pptx.enum.text import PP_ALIGN
 22from pptx.oxml.xmlchemy import OxmlElement
 23from pptx.util import Pt
 24
 25
 26def clear_paragraph_bullets(paragraph):
 27    """Clear bullet formatting from a paragraph."""
 28    pPr = paragraph._element.get_or_add_pPr()
 29
 30    # Remove existing bullet elements
 31    for child in list(pPr):
 32        if (
 33            child.tag.endswith("buChar")
 34            or child.tag.endswith("buNone")
 35            or child.tag.endswith("buAutoNum")
 36            or child.tag.endswith("buFont")
 37        ):
 38            pPr.remove(child)
 39
 40    return pPr
 41
 42
 43def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
 44    """Apply formatting properties to a paragraph."""
 45    # Get the text but don't set it on paragraph directly yet
 46    text = para_data.get("text", "")
 47
 48    # Get or create paragraph properties
 49    pPr = clear_paragraph_bullets(paragraph)
 50
 51    # Handle bullet formatting
 52    if para_data.get("bullet", False):
 53        level = para_data.get("level", 0)
 54        paragraph.level = level
 55
 56        # Calculate font-proportional indentation
 57        font_size = para_data.get("font_size", 18.0)
 58        level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
 59        hanging_indent_emu = int(-font_size * 0.8 * 12700)
 60
 61        # Set indentation
 62        pPr.attrib["marL"] = str(level_indent_emu)
 63        pPr.attrib["indent"] = str(hanging_indent_emu)
 64
 65        # Add bullet character
 66        buChar = OxmlElement("a:buChar")
 67        buChar.set("char", "•")
 68        pPr.append(buChar)
 69
 70        # Default to left alignment for bullets if not specified
 71        if "alignment" not in para_data:
 72            paragraph.alignment = PP_ALIGN.LEFT
 73    else:
 74        # Remove indentation for non-bullet text
 75        pPr.attrib["marL"] = "0"
 76        pPr.attrib["indent"] = "0"
 77
 78        # Add buNone element
 79        buNone = OxmlElement("a:buNone")
 80        pPr.insert(0, buNone)
 81
 82    # Apply alignment
 83    if "alignment" in para_data:
 84        alignment_map = {
 85            "LEFT": PP_ALIGN.LEFT,
 86            "CENTER": PP_ALIGN.CENTER,
 87            "RIGHT": PP_ALIGN.RIGHT,
 88            "JUSTIFY": PP_ALIGN.JUSTIFY,
 89        }
 90        if para_data["alignment"] in alignment_map:
 91            paragraph.alignment = alignment_map[para_data["alignment"]]
 92
 93    # Apply spacing
 94    if "space_before" in para_data:
 95        paragraph.space_before = Pt(para_data["space_before"])
 96    if "space_after" in para_data:
 97        paragraph.space_after = Pt(para_data["space_after"])
 98    if "line_spacing" in para_data:
 99        paragraph.line_spacing = Pt(para_data["line_spacing"])
100
101    # Apply run-level formatting
102    if not paragraph.runs:
103        run = paragraph.add_run()
104        run.text = text
105    else:
106        run = paragraph.runs[0]
107        run.text = text
108
109    # Apply font properties
110    apply_font_properties(run, para_data)
111
112
113def apply_font_properties(run, para_data: Dict[str, Any]):
114    """Apply font properties to a text run."""
115    if "bold" in para_data:
116        run.font.bold = para_data["bold"]
117    if "italic" in para_data:
118        run.font.italic = para_data["italic"]
119    if "underline" in para_data:
120        run.font.underline = para_data["underline"]
121    if "font_size" in para_data:
122        run.font.size = Pt(para_data["font_size"])
123    if "font_name" in para_data:
124        run.font.name = para_data["font_name"]
125
126    # Apply color - prefer RGB, fall back to theme_color
127    if "color" in para_data:
128        color_hex = para_data["color"].lstrip("#")
129        if len(color_hex) == 6:
130            r = int(color_hex[0:2], 16)
131            g = int(color_hex[2:4], 16)
132            b = int(color_hex[4:6], 16)
133            run.font.color.rgb = RGBColor(r, g, b)
134    elif "theme_color" in para_data:
135        # Get theme color by name (e.g., "DARK_1", "ACCENT_1")
136        theme_name = para_data["theme_color"]
137        try:
138            run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
139        except AttributeError:
140            print(f"  WARNING: Unknown theme color name '{theme_name}'")
141
142
143def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
144    """Detect text overflow in shapes (text exceeding shape bounds).
145
146    Returns dict of slide_key -> shape_key -> overflow_inches.
147    Only includes shapes that have text overflow.
148    """
149    overflow_map = {}
150
151    for slide_key, shapes_dict in inventory.items():
152        for shape_key, shape_data in shapes_dict.items():
153            # Check for frame overflow (text exceeding shape bounds)
154            if shape_data.frame_overflow_bottom is not None:
155                if slide_key not in overflow_map:
156                    overflow_map[slide_key] = {}
157                overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
158
159    return overflow_map
160
161
162def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
163    """Validate that all shapes in replacements exist in inventory.
164
165    Returns list of error messages.
166    """
167    errors = []
168
169    for slide_key, shapes_data in replacements.items():
170        if not slide_key.startswith("slide-"):
171            continue
172
173        # Check if slide exists
174        if slide_key not in inventory:
175            errors.append(f"Slide '{slide_key}' not found in inventory")
176            continue
177
178        # Check each shape
179        for shape_key in shapes_data.keys():
180            if shape_key not in inventory[slide_key]:
181                # Find shapes without replacements defined and show their content
182                unused_with_content = []
183                for k in inventory[slide_key].keys():
184                    if k not in shapes_data:
185                        shape_data = inventory[slide_key][k]
186                        # Get text from paragraphs as preview
187                        paragraphs = shape_data.paragraphs
188                        if paragraphs and paragraphs[0].text:
189                            first_text = paragraphs[0].text[:50]
190                            if len(paragraphs[0].text) > 50:
191                                first_text += "..."
192                            unused_with_content.append(f"{k} ('{first_text}')")
193                        else:
194                            unused_with_content.append(k)
195
196                errors.append(
197                    f"Shape '{shape_key}' not found on '{slide_key}'. "
198                    f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
199                )
200
201    return errors
202
203
204def check_duplicate_keys(pairs):
205    """Check for duplicate keys when loading JSON."""
206    result = {}
207    for key, value in pairs:
208        if key in result:
209            raise ValueError(f"Duplicate key found in JSON: '{key}'")
210        result[key] = value
211    return result
212
213
214def apply_replacements(pptx_file: str, json_file: str, output_file: str):
215    """Apply text replacements from JSON to PowerPoint presentation."""
216
217    # Load presentation
218    prs = Presentation(pptx_file)
219
220    # Get inventory of all text shapes (returns ShapeData objects)
221    # Pass prs to use same Presentation instance
222    inventory = extract_text_inventory(Path(pptx_file), prs)
223
224    # Detect text overflow in original presentation
225    original_overflow = detect_frame_overflow(inventory)
226
227    # Load replacement data with duplicate key detection
228    with open(json_file, "r") as f:
229        replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
230
231    # Validate replacements
232    errors = validate_replacements(inventory, replacements)
233    if errors:
234        print("ERROR: Invalid shapes in replacement JSON:")
235        for error in errors:
236            print(f"  - {error}")
237        print("\nPlease check the inventory and update your replacement JSON.")
238        print(
239            "You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
240        )
241        raise ValueError(f"Found {len(errors)} validation error(s)")
242
243    # Track statistics
244    shapes_processed = 0
245    shapes_cleared = 0
246    shapes_replaced = 0
247
248    # Process each slide from inventory
249    for slide_key, shapes_dict in inventory.items():
250        if not slide_key.startswith("slide-"):
251            continue
252
253        slide_index = int(slide_key.split("-")[1])
254
255        if slide_index >= len(prs.slides):
256            print(f"Warning: Slide {slide_index} not found")
257            continue
258
259        # Process each shape from inventory
260        for shape_key, shape_data in shapes_dict.items():
261            shapes_processed += 1
262
263            # Get the shape directly from ShapeData
264            shape = shape_data.shape
265            if not shape:
266                print(f"Warning: {shape_key} has no shape reference")
267                continue
268
269            # ShapeData already validates text_frame in __init__
270            text_frame = shape.text_frame  # type: ignore
271
272            text_frame.clear()  # type: ignore
273            shapes_cleared += 1
274
275            # Check for replacement paragraphs
276            replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
277            if "paragraphs" not in replacement_shape_data:
278                continue
279
280            shapes_replaced += 1
281
282            # Add replacement paragraphs
283            for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
284                if i == 0:
285                    p = text_frame.paragraphs[0]  # type: ignore
286                else:
287                    p = text_frame.add_paragraph()  # type: ignore
288
289                apply_paragraph_properties(p, para_data)
290
291    # Check for issues after replacements
292    # Save to a temporary file and reload to avoid modifying the presentation during inventory
293    # (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
294    import tempfile
295
296    with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
297        tmp_path = Path(tmp.name)
298        prs.save(str(tmp_path))
299
300    try:
301        updated_inventory = extract_text_inventory(tmp_path)
302        updated_overflow = detect_frame_overflow(updated_inventory)
303    finally:
304        tmp_path.unlink()  # Clean up temp file
305
306    # Check if any text overflow got worse
307    overflow_errors = []
308    for slide_key, shape_overflows in updated_overflow.items():
309        for shape_key, new_overflow in shape_overflows.items():
310            # Get original overflow (0 if there was no overflow before)
311            original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
312
313            # Error if overflow increased
314            if new_overflow > original + 0.01:  # Small tolerance for rounding
315                increase = new_overflow - original
316                overflow_errors.append(
317                    f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
318                    f'(was {original:.2f}", now {new_overflow:.2f}")'
319                )
320
321    # Collect warnings from updated shapes
322    warnings = []
323    for slide_key, shapes_dict in updated_inventory.items():
324        for shape_key, shape_data in shapes_dict.items():
325            if shape_data.warnings:
326                for warning in shape_data.warnings:
327                    warnings.append(f"{slide_key}/{shape_key}: {warning}")
328
329    # Fail if there are any issues
330    if overflow_errors or warnings:
331        print("\nERROR: Issues detected in replacement output:")
332        if overflow_errors:
333            print("\nText overflow worsened:")
334            for error in overflow_errors:
335                print(f"  - {error}")
336        if warnings:
337            print("\nFormatting warnings:")
338            for warning in warnings:
339                print(f"  - {warning}")
340        print("\nPlease fix these issues before saving.")
341        raise ValueError(
342            f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
343        )
344
345    # Save the presentation
346    prs.save(output_file)
347
348    # Report results
349    print(f"Saved updated presentation to: {output_file}")
350    print(f"Processed {len(prs.slides)} slides")
351    print(f"  - Shapes processed: {shapes_processed}")
352    print(f"  - Shapes cleared: {shapes_cleared}")
353    print(f"  - Shapes replaced: {shapes_replaced}")
354
355
356def main():
357    """Main entry point for command-line usage."""
358    if len(sys.argv) != 4:
359        print(__doc__)
360        sys.exit(1)
361
362    input_pptx = Path(sys.argv[1])
363    replacements_json = Path(sys.argv[2])
364    output_pptx = Path(sys.argv[3])
365
366    if not input_pptx.exists():
367        print(f"Error: Input file '{input_pptx}' not found")
368        sys.exit(1)
369
370    if not replacements_json.exists():
371        print(f"Error: Replacements JSON file '{replacements_json}' not found")
372        sys.exit(1)
373
374    try:
375        apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
376    except Exception as e:
377        print(f"Error applying replacements: {e}")
378        import traceback
379
380        traceback.print_exc()
381        sys.exit(1)
382
383
384if __name__ == "__main__":
385    main()