Spaces:

gahanmakwana
/

my-ocr-demo

Running

App Files Files Community

my-ocr-demo / app.py

gahanmakwana

Prepare project for Hugging Face Spaces with Gradio and bundled models

a914ac3 18 days ago

raw

history blame contribute delete

8.98 kB

	import gradio as gr
	from paddleocr import PaddleOCR, draw_ocr
	from PIL import Image
	import numpy as np
	import os

	# --- Configuration: Model and Font Paths ---
	# IMPORTANT: Ensure these paths and folder names match exactly what you have
	# in your 'paddleocr_models' directory.

	# Define the language
	SELECTED_LANGUAGE = 'en' # This informs which dictionary to look for primarily

	# Base directory for your bundled models
	MODEL_BASE_DIR = 'paddleocr_models'

	# --- Model paths based on your logs ---
	# Detection model: en_PP-OCRv3_det_infer
	DET_MODEL_FOLDER_NAME = 'en_PP-OCRv3_det_infer'
	DET_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, DET_MODEL_FOLDER_NAME)

	# Recognition model: en_PP-OCRv4_rec_infer
	REC_MODEL_FOLDER_NAME = 'en_PP-OCRv4_rec_infer'
	REC_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, REC_MODEL_FOLDER_NAME)

	# Classification model: ch_ppocr_mobile_v2.0_cls_infer (often shared)
	CLS_MODEL_FOLDER_NAME = 'ch_ppocr_mobile_v2.0_cls_infer'
	CLS_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, CLS_MODEL_FOLDER_NAME)

	# --- Character Dictionary Path ---
	# Since en_dict.txt might not be directly in the rec_model_dir after copying from cache,
	# we assume you've copied the default en_dict.txt into your REC_MODEL_FOLDER_NAME.
	# If you copied it from the PaddleOCR package utils, this path should be correct.
	# Ensure 'en_dict.txt' is inside 'paddleocr_models/en_PP-OCRv4_rec_infer/'
	REC_CHAR_DICT_FILENAME = 'en_dict.txt' # Or whatever your .txt file is named
	REC_CHAR_DICT_PATH_DEFAULT = os.path.join(REC_MODEL_DIR_DEFAULT, REC_CHAR_DICT_FILENAME)

	# --- Font for drawing OCR results ---
	FONT_PATH = 'latin.ttf' # Ensure 'latin.ttf' (e.g., DejaVuSans.ttf renamed) is in your project root.
	if not os.path.exists(FONT_PATH):
	print(f"WARNING: Font file '{FONT_PATH}' not found. Text rendering on images might fail or look incorrect.")

	# --- Initialize PaddleOCR Engine ---
	ocr_engine = None
	try:
	# Check if essential model directories exist
	if not os.path.exists(DET_MODEL_DIR_DEFAULT):
	raise FileNotFoundError(f"Detection model directory not found: '{DET_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
	if not os.path.exists(REC_MODEL_DIR_DEFAULT):
	raise FileNotFoundError(f"Recognition model directory not found: '{REC_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")

	# Check if the character dictionary file exists
	if not os.path.exists(REC_CHAR_DICT_PATH_DEFAULT):
	raise FileNotFoundError(f"Recognition character dictionary not found: '{REC_CHAR_DICT_PATH_DEFAULT}'. Please ensure it's in the recognition model folder.")

	print(f"Initializing PaddleOCR with language: {SELECTED_LANGUAGE}")
	print(f" Detection Model Dir: {DET_MODEL_DIR_DEFAULT}")
	print(f" Recognition Model Dir: {REC_MODEL_DIR_DEFAULT}")
	print(f" Recognition Char Dict Path: {REC_CHAR_DICT_PATH_DEFAULT}")

	use_cls = os.path.exists(CLS_MODEL_DIR_DEFAULT)
	if use_cls:
	print(f" Classification Model Dir: {CLS_MODEL_DIR_DEFAULT}")
	else:
	print(f" Classification Model: Not found at '{CLS_MODEL_DIR_DEFAULT}' or not used.")

	ocr_engine = PaddleOCR(
	use_angle_cls=use_cls,
	lang=SELECTED_LANGUAGE, # Still useful for some internal logic, but dict path is key
	det_model_dir=DET_MODEL_DIR_DEFAULT,
	rec_model_dir=REC_MODEL_DIR_DEFAULT,
	rec_char_dict_path=REC_CHAR_DICT_PATH_DEFAULT, # Explicitly providing the dictionary path
	cls_model_dir=CLS_MODEL_DIR_DEFAULT if use_cls else None,
	show_log=True, # Set to False for less verbose logs in production if desired
	use_gpu=False # Set to True if you have GPU hardware on Spaces and paddlepaddle-gpu
	)
	print("PaddleOCR engine initialized successfully from local models.")

	except FileNotFoundError as fnf_error:
	print(f"FATAL ERROR (FileNotFound): {fnf_error}")
	print("Please check your 'paddleocr_models' directory and model/dict file paths in app.py.")
	ocr_engine = None
	except Exception as e:
	print(f"FATAL ERROR: Could not initialize PaddleOCR engine: {e}")
	ocr_engine = None # Ensure it's None if initialization fails

	def ocr_process(image_pil, language_key_display_name):
	"""
	Processes the uploaded image with PaddleOCR using the pre-loaded models.
	"""
	if ocr_engine is None:
	# This message will be displayed to the user in the Gradio interface
	return None, "PaddleOCR engine is not available. Please check the application logs for errors."
	if image_pil is None:
	return None, "No image provided. Please upload an image."

	print(f"Processing with pre-loaded language: {SELECTED_LANGUAGE}")

	try:
	img_np = np.array(image_pil.convert('RGB')) # Ensure image is RGB

	print("Performing OCR...")
	# The `ocr` method automatically uses the det, cls (if enabled), and rec models.
	result = ocr_engine.ocr(img_np, cls=ocr_engine.use_angle_cls)
	print("OCR processing complete.")

	# PaddleOCR v2.6+ returns results in a different structure: result = [[box, (text, score)], ...]
	# Check if result is not None and the first element (lines) is not empty
	if result is None or not result[0]:
	print("No text detected.")
	return image_pil, "No text detected."

	# Correctly extract boxes, texts, and scores from the result structure
	# result[0] contains the list of lines, where each line is [box, (text, score)]
	lines = result[0]
	boxes = [line[0] for line in lines]
	txts = [line[1][0] for line in lines]
	scores = [line[1][1] for line in lines]

	print("Drawing OCR results...")
	if not os.path.exists(FONT_PATH):
	print(f"Font file '{FONT_PATH}' still not found. Cannot draw results on image.")
	# Return original image and extracted text without drawn boxes
	extracted_text_raw = "\n".join(txts)
	return image_pil, f"Font file missing. Extracted text (raw):\n{extracted_text_raw}"

	# draw_ocr expects the image in a format it can handle (PIL Image is fine)
	im_show = draw_ocr(image_pil, boxes, txts, scores, font_path=FONT_PATH)
	im_show_pil = Image.fromarray(im_show) # Convert numpy array from draw_ocr back to PIL Image
	print("OCR results drawn.")

	extracted_text = "\n".join(txts)
	return im_show_pil, extracted_text

	except Exception as e:
	print(f"Error during OCR processing: {e}")
	# Return original image and error message
	return image_pil, f"An error occurred during OCR: {str(e)}"

	# --- Gradio Interface Definition ---
	title = "PaddleOCR Web App (Bundled Models)"
	description = f"""
	Upload an image to perform OCR. This app uses PaddleOCR with pre-bundled models
	for the {SELECTED_LANGUAGE.upper()} language to avoid re-downloads on Hugging Face Spaces.
	Detection: `{DET_MODEL_FOLDER_NAME}`
	Recognition: `{REC_MODEL_FOLDER_NAME}` (using `{REC_CHAR_DICT_FILENAME}`)
	Make sure the model files are correctly placed in the `paddleocr_models` directory
	and the font file `{FONT_PATH}` is in the project root.
	"""
	article = "<p style='text-align: center'>Powered by PaddleOCR and Gradio. Deployed on Hugging Face Spaces.</p>"

	# For this setup, the language dropdown is mainly informational as models are pre-loaded.
	# To truly switch languages, ocr_engine would need re-initialization with different model/dict paths.
	supported_langs_display_for_dropdown = {
	"English (Loaded)": "en",
	# "Chinese (Not Loaded)": "ch", # Example if you were to add more
	}

	iface = gr.Interface(
	fn=ocr_process,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Dropdown(
	choices=list(supported_langs_display_for_dropdown.keys()),
	label="Language (Using Pre-loaded Model)",
	# Default to the key corresponding to SELECTED_LANGUAGE
	value=[k for k, v in supported_langs_display_for_dropdown.items() if v == SELECTED_LANGUAGE][0]
	)
	],
	outputs=[
	gr.Image(type="pil", label="Processed Image with OCR"),
	gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)
	],
	title=title,
	description=description,
	article=article,
	allow_flagging='never', # Disables the "Flag" button
	# You can add example images to your repository and list them here
	# examples=[
	# ["path_to_your_example_image_in_repo.png", "English (Loaded)"]
	# ]
	)

	if __name__ == '__main__':
	if ocr_engine is None:
	print("OCR Engine could not be initialized. The Gradio app will not function correctly.")
	# In a real scenario, you might want to display an error in the Gradio UI itself
	# by modifying the interface or raising an error that Gradio can catch.
	print("Launching Gradio interface...")
	iface.launch()
	print("Gradio interface launched.")