from pdf2image import convert_from_path
from PIL import Image
import os
# Function to crop an image to specific dimensions
def crop_image(input_path, output_path, left, top, right, bottom):
= Image.open(input_path)
image = image.crop((left, top, right, bottom))
cropped_image
cropped_image.save(output_path)
# Function to extract all pages from a PDF and crop them as images
def extract_and_crop_pdf(pdf_path, output_dir, width, crop_up, crop_down):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
= convert_from_path(pdf_path)
images
for page_number, pdf_image in enumerate(images):
= os.path.join(output_dir, f'page_{page_number + 1}.png')
output_path
# Calculate the crop coordinates
= (pdf_image.width - width) // 2
left = crop_up
top = left + width
right = pdf_image.height - crop_down
bottom
# Save the image as a temporary file
= os.path.join(output_dir, f'temp_page_{page_number + 1}.png')
temp_image_path
pdf_image.save(temp_image_path)
# Crop the temporary image and save the final cropped image
crop_image(temp_image_path, output_path, left, top, right, bottom)
# Clean up the temporary image
os.remove(temp_image_path)
if __name__ == '__main__':
= 'input.pdf' # Specify the input PDF file
input_pdf = 'output_images' # Specify the output folder
output_directory = 177 # Change this to your desired crop for height up
crop_up = 118 # Change this to your desired crop for height down
crop_down = 2667 # Change this to your desired width
target_width
extract_and_crop_pdf(input_pdf, output_directory, target_width, crop_up, crop_down)
Goal
Imagine you want to extract pages from a PDF as images and then you want to crop them to an specific size. The following scrip does exactly that
pip install Pillow
pip install pdf2image
brew install poppler