Spaces:
Running
Running
import gradio as gr | |
from paddleocr import PaddleOCR, draw_ocr | |
from PIL import Image | |
import numpy as np | |
import os | |
# --- Configuration: Model and Font Paths --- | |
# IMPORTANT: Ensure these paths and folder names match exactly what you have | |
# in your 'paddleocr_models' directory. | |
# Define the language | |
SELECTED_LANGUAGE = 'en' # This informs which dictionary to look for primarily | |
# Base directory for your bundled models | |
MODEL_BASE_DIR = 'paddleocr_models' | |
# --- Model paths based on your logs --- | |
# Detection model: en_PP-OCRv3_det_infer | |
DET_MODEL_FOLDER_NAME = 'en_PP-OCRv3_det_infer' | |
DET_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, DET_MODEL_FOLDER_NAME) | |
# Recognition model: en_PP-OCRv4_rec_infer | |
REC_MODEL_FOLDER_NAME = 'en_PP-OCRv4_rec_infer' | |
REC_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, REC_MODEL_FOLDER_NAME) | |
# Classification model: ch_ppocr_mobile_v2.0_cls_infer (often shared) | |
CLS_MODEL_FOLDER_NAME = 'ch_ppocr_mobile_v2.0_cls_infer' | |
CLS_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, CLS_MODEL_FOLDER_NAME) | |
# --- Character Dictionary Path --- | |
# Since en_dict.txt might not be directly in the rec_model_dir after copying from cache, | |
# we assume you've copied the default en_dict.txt into your REC_MODEL_FOLDER_NAME. | |
# If you copied it from the PaddleOCR package utils, this path should be correct. | |
# Ensure 'en_dict.txt' is inside 'paddleocr_models/en_PP-OCRv4_rec_infer/' | |
REC_CHAR_DICT_FILENAME = 'en_dict.txt' # Or whatever your .txt file is named | |
REC_CHAR_DICT_PATH_DEFAULT = os.path.join(REC_MODEL_DIR_DEFAULT, REC_CHAR_DICT_FILENAME) | |
# --- Font for drawing OCR results --- | |
FONT_PATH = 'latin.ttf' # Ensure 'latin.ttf' (e.g., DejaVuSans.ttf renamed) is in your project root. | |
if not os.path.exists(FONT_PATH): | |
print(f"WARNING: Font file '{FONT_PATH}' not found. Text rendering on images might fail or look incorrect.") | |
# --- Initialize PaddleOCR Engine --- | |
ocr_engine = None | |
try: | |
# Check if essential model directories exist | |
if not os.path.exists(DET_MODEL_DIR_DEFAULT): | |
raise FileNotFoundError(f"Detection model directory not found: '{DET_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.") | |
if not os.path.exists(REC_MODEL_DIR_DEFAULT): | |
raise FileNotFoundError(f"Recognition model directory not found: '{REC_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.") | |
# Check if the character dictionary file exists | |
if not os.path.exists(REC_CHAR_DICT_PATH_DEFAULT): | |
raise FileNotFoundError(f"Recognition character dictionary not found: '{REC_CHAR_DICT_PATH_DEFAULT}'. Please ensure it's in the recognition model folder.") | |
print(f"Initializing PaddleOCR with language: {SELECTED_LANGUAGE}") | |
print(f" Detection Model Dir: {DET_MODEL_DIR_DEFAULT}") | |
print(f" Recognition Model Dir: {REC_MODEL_DIR_DEFAULT}") | |
print(f" Recognition Char Dict Path: {REC_CHAR_DICT_PATH_DEFAULT}") | |
use_cls = os.path.exists(CLS_MODEL_DIR_DEFAULT) | |
if use_cls: | |
print(f" Classification Model Dir: {CLS_MODEL_DIR_DEFAULT}") | |
else: | |
print(f" Classification Model: Not found at '{CLS_MODEL_DIR_DEFAULT}' or not used.") | |
ocr_engine = PaddleOCR( | |
use_angle_cls=use_cls, | |
lang=SELECTED_LANGUAGE, # Still useful for some internal logic, but dict path is key | |
det_model_dir=DET_MODEL_DIR_DEFAULT, | |
rec_model_dir=REC_MODEL_DIR_DEFAULT, | |
rec_char_dict_path=REC_CHAR_DICT_PATH_DEFAULT, # Explicitly providing the dictionary path | |
cls_model_dir=CLS_MODEL_DIR_DEFAULT if use_cls else None, | |
show_log=True, # Set to False for less verbose logs in production if desired | |
use_gpu=False # Set to True if you have GPU hardware on Spaces and paddlepaddle-gpu | |
) | |
print("PaddleOCR engine initialized successfully from local models.") | |
except FileNotFoundError as fnf_error: | |
print(f"FATAL ERROR (FileNotFound): {fnf_error}") | |
print("Please check your 'paddleocr_models' directory and model/dict file paths in app.py.") | |
ocr_engine = None | |
except Exception as e: | |
print(f"FATAL ERROR: Could not initialize PaddleOCR engine: {e}") | |
ocr_engine = None # Ensure it's None if initialization fails | |
def ocr_process(image_pil, language_key_display_name): | |
""" | |
Processes the uploaded image with PaddleOCR using the pre-loaded models. | |
""" | |
if ocr_engine is None: | |
# This message will be displayed to the user in the Gradio interface | |
return None, "PaddleOCR engine is not available. Please check the application logs for errors." | |
if image_pil is None: | |
return None, "No image provided. Please upload an image." | |
print(f"Processing with pre-loaded language: {SELECTED_LANGUAGE}") | |
try: | |
img_np = np.array(image_pil.convert('RGB')) # Ensure image is RGB | |
print("Performing OCR...") | |
# The `ocr` method automatically uses the det, cls (if enabled), and rec models. | |
result = ocr_engine.ocr(img_np, cls=ocr_engine.use_angle_cls) | |
print("OCR processing complete.") | |
# PaddleOCR v2.6+ returns results in a different structure: result = [[box, (text, score)], ...] | |
# Check if result is not None and the first element (lines) is not empty | |
if result is None or not result[0]: | |
print("No text detected.") | |
return image_pil, "No text detected." | |
# Correctly extract boxes, texts, and scores from the result structure | |
# result[0] contains the list of lines, where each line is [box, (text, score)] | |
lines = result[0] | |
boxes = [line[0] for line in lines] | |
txts = [line[1][0] for line in lines] | |
scores = [line[1][1] for line in lines] | |
print("Drawing OCR results...") | |
if not os.path.exists(FONT_PATH): | |
print(f"Font file '{FONT_PATH}' still not found. Cannot draw results on image.") | |
# Return original image and extracted text without drawn boxes | |
extracted_text_raw = "\n".join(txts) | |
return image_pil, f"Font file missing. Extracted text (raw):\n{extracted_text_raw}" | |
# draw_ocr expects the image in a format it can handle (PIL Image is fine) | |
im_show = draw_ocr(image_pil, boxes, txts, scores, font_path=FONT_PATH) | |
im_show_pil = Image.fromarray(im_show) # Convert numpy array from draw_ocr back to PIL Image | |
print("OCR results drawn.") | |
extracted_text = "\n".join(txts) | |
return im_show_pil, extracted_text | |
except Exception as e: | |
print(f"Error during OCR processing: {e}") | |
# Return original image and error message | |
return image_pil, f"An error occurred during OCR: {str(e)}" | |
# --- Gradio Interface Definition --- | |
title = "PaddleOCR Web App (Bundled Models)" | |
description = f""" | |
Upload an image to perform OCR. This app uses PaddleOCR with pre-bundled models | |
for the **{SELECTED_LANGUAGE.upper()}** language to avoid re-downloads on Hugging Face Spaces. | |
Detection: `{DET_MODEL_FOLDER_NAME}` | |
Recognition: `{REC_MODEL_FOLDER_NAME}` (using `{REC_CHAR_DICT_FILENAME}`) | |
Make sure the model files are correctly placed in the `paddleocr_models` directory | |
and the font file `{FONT_PATH}` is in the project root. | |
""" | |
article = "<p style='text-align: center'>Powered by PaddleOCR and Gradio. Deployed on Hugging Face Spaces.</p>" | |
# For this setup, the language dropdown is mainly informational as models are pre-loaded. | |
# To truly switch languages, ocr_engine would need re-initialization with different model/dict paths. | |
supported_langs_display_for_dropdown = { | |
"English (Loaded)": "en", | |
# "Chinese (Not Loaded)": "ch", # Example if you were to add more | |
} | |
iface = gr.Interface( | |
fn=ocr_process, | |
inputs=[ | |
gr.Image(type="pil", label="Upload Image"), | |
gr.Dropdown( | |
choices=list(supported_langs_display_for_dropdown.keys()), | |
label="Language (Using Pre-loaded Model)", | |
# Default to the key corresponding to SELECTED_LANGUAGE | |
value=[k for k, v in supported_langs_display_for_dropdown.items() if v == SELECTED_LANGUAGE][0] | |
) | |
], | |
outputs=[ | |
gr.Image(type="pil", label="Processed Image with OCR"), | |
gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True) | |
], | |
title=title, | |
description=description, | |
article=article, | |
allow_flagging='never', # Disables the "Flag" button | |
# You can add example images to your repository and list them here | |
# examples=[ | |
# ["path_to_your_example_image_in_repo.png", "English (Loaded)"] | |
# ] | |
) | |
if __name__ == '__main__': | |
if ocr_engine is None: | |
print("OCR Engine could not be initialized. The Gradio app will not function correctly.") | |
# In a real scenario, you might want to display an error in the Gradio UI itself | |
# by modifying the interface or raising an error that Gradio can catch. | |
print("Launching Gradio interface...") | |
iface.launch() | |
print("Gradio interface launched.") | |