# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= import os import sys from pdf2image import convert_from_path def convert(pdf_path, output_dir, max_dim=1000): images = convert_from_path(pdf_path, dpi=200) for i, image in enumerate(images): width, height = image.size if width > max_dim or height > max_dim: scale_factor = min(max_dim / width, max_dim / height) new_width = int(width * scale_factor) new_height = int(height * scale_factor) image = image.resize((new_width, new_height)) image_path = os.path.join(output_dir, f"page_{i+1}.png") image.save(image_path) print(f"Saved page {i+1} as {image_path} (size: {image.size})") print(f"Converted {len(images)} pages to PNG images") if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: convert_pdf_to_images.py [input pdf] [output directory]") sys.exit(1) pdf_path = sys.argv[1] output_directory = sys.argv[2] convert(pdf_path, output_directory)