Word Extraction-1
Word Extraction-1
import numpy as np
from pdf2image import convert_from_path
from PIL import Image
import pytesseract
# Find contours
contours, _ = cv2.findContours(edges, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
rectangles = []
for j in range(4):
edge2_start = rect2_vertices[j]
edge2_end = rect2_vertices[(j + 1) % 4]
return False
# Usage
pdf_file_path = r'C:\Users\pcdiv\OneDrive\Desktop\Condensate System P&ID.pdf'
output_image_folder = r'C:\Users\pcdiv\OneDrive\Desktop' # Folder where images
will be saved
detect_and_highlight_shapes(pdf_file_path, output_image_folder)