from pdf2image import convert_from_path
from PIL import Image
import os
# Function to crop an image to specific dimensions
def crop_image(input_path, output_path, left, top, right, bottom):
image = Image.open(input_path)
cropped_image = image.crop((left, top, right, bottom))
cropped_image.save(output_path)
# Function to extract all pages from a PDF and crop them as images
def extract_and_crop_pdf(pdf_path, output_dir, width, crop_up, crop_down):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
images = convert_from_path(pdf_path)
for page_number, pdf_image in enumerate(images):
output_path = os.path.join(output_dir, f'page_{page_number + 1}.png')
# Calculate the crop coordinates
left = (pdf_image.width - width) // 2
top = crop_up
right = left + width
bottom = pdf_image.height - crop_down
# Save the image as a temporary file
temp_image_path = os.path.join(output_dir, f'temp_page_{page_number + 1}.png')
pdf_image.save(temp_image_path)
# Crop the temporary image and save the final cropped image
crop_image(temp_image_path, output_path, left, top, right, bottom)
# Clean up the temporary image
os.remove(temp_image_path)
if __name__ == '__main__':
input_pdf = 'input.pdf' # Specify the input PDF file
output_directory = 'output_images' # Specify the output folder
crop_up = 177 # Change this to your desired crop for height up
crop_down = 118 # Change this to your desired crop for height down
target_width = 2667 # Change this to your desired width
extract_and_crop_pdf(input_pdf, output_directory, target_width, crop_up, crop_down)Goal
Imagine you want to extract pages from a PDF as images and then you want to crop them to an specific size. The following scrip does exactly that
pip install Pillow
pip install pdf2image
brew install poppler