main
 1import os
 2import sys
 3
 4from pdf2image import convert_from_path
 5
 6
 7# Converts each page of a PDF to a PNG image.
 8
 9
10def convert(pdf_path, output_dir, max_dim=1000):
11    images = convert_from_path(pdf_path, dpi=200)
12
13    for i, image in enumerate(images):
14        # Scale image if needed to keep width/height under `max_dim`
15        width, height = image.size
16        if width > max_dim or height > max_dim:
17            scale_factor = min(max_dim / width, max_dim / height)
18            new_width = int(width * scale_factor)
19            new_height = int(height * scale_factor)
20            image = image.resize((new_width, new_height))
21        
22        image_path = os.path.join(output_dir, f"page_{i+1}.png")
23        image.save(image_path)
24        print(f"Saved page {i+1} as {image_path} (size: {image.size})")
25
26    print(f"Converted {len(images)} pages to PNG images")
27
28
29if __name__ == "__main__":
30    if len(sys.argv) != 3:
31        print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
32        sys.exit(1)
33    pdf_path = sys.argv[1]
34    output_directory = sys.argv[2]
35    convert(pdf_path, output_directory)