mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-04-28 19:50:34 +00:00
Some checks are pending
CodeQL Advanced / Analyze (actions) (push) Waiting to run
CodeQL Advanced / Analyze (javascript-typescript) (push) Waiting to run
CodeQL Advanced / Analyze (python) (push) Waiting to run
Pre-commit / pre-commit (push) Waiting to run
Test / Run Python Tests (push) Waiting to run
Co-authored-by: Douglas <douglas.ym.lai@gmail.com>
47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
|
|
import os
|
|
import sys
|
|
|
|
from pdf2image import convert_from_path
|
|
|
|
|
|
|
|
|
|
def convert(pdf_path, output_dir, max_dim=1000):
|
|
images = convert_from_path(pdf_path, dpi=200)
|
|
|
|
for i, image in enumerate(images):
|
|
width, height = image.size
|
|
if width > max_dim or height > max_dim:
|
|
scale_factor = min(max_dim / width, max_dim / height)
|
|
new_width = int(width * scale_factor)
|
|
new_height = int(height * scale_factor)
|
|
image = image.resize((new_width, new_height))
|
|
|
|
image_path = os.path.join(output_dir, f"page_{i+1}.png")
|
|
image.save(image_path)
|
|
print(f"Saved page {i+1} as {image_path} (size: {image.size})")
|
|
|
|
print(f"Converted {len(images)} pages to PNG images")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 3:
|
|
print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
|
|
sys.exit(1)
|
|
pdf_path = sys.argv[1]
|
|
output_directory = sys.argv[2]
|
|
convert(pdf_path, output_directory)
|