Spaces:
Running
Running
File size: 8,983 Bytes
a914ac3 7550ca1 a914ac3 7550ca1 a914ac3 986b927 4bc7466 a914ac3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import gradio as gr
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import numpy as np
import os
# --- Configuration: Model and Font Paths ---
# IMPORTANT: Ensure these paths and folder names match exactly what you have
# in your 'paddleocr_models' directory.
# Define the language
SELECTED_LANGUAGE = 'en' # This informs which dictionary to look for primarily
# Base directory for your bundled models
MODEL_BASE_DIR = 'paddleocr_models'
# --- Model paths based on your logs ---
# Detection model: en_PP-OCRv3_det_infer
DET_MODEL_FOLDER_NAME = 'en_PP-OCRv3_det_infer'
DET_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, DET_MODEL_FOLDER_NAME)
# Recognition model: en_PP-OCRv4_rec_infer
REC_MODEL_FOLDER_NAME = 'en_PP-OCRv4_rec_infer'
REC_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, REC_MODEL_FOLDER_NAME)
# Classification model: ch_ppocr_mobile_v2.0_cls_infer (often shared)
CLS_MODEL_FOLDER_NAME = 'ch_ppocr_mobile_v2.0_cls_infer'
CLS_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, CLS_MODEL_FOLDER_NAME)
# --- Character Dictionary Path ---
# Since en_dict.txt might not be directly in the rec_model_dir after copying from cache,
# we assume you've copied the default en_dict.txt into your REC_MODEL_FOLDER_NAME.
# If you copied it from the PaddleOCR package utils, this path should be correct.
# Ensure 'en_dict.txt' is inside 'paddleocr_models/en_PP-OCRv4_rec_infer/'
REC_CHAR_DICT_FILENAME = 'en_dict.txt' # Or whatever your .txt file is named
REC_CHAR_DICT_PATH_DEFAULT = os.path.join(REC_MODEL_DIR_DEFAULT, REC_CHAR_DICT_FILENAME)
# --- Font for drawing OCR results ---
FONT_PATH = 'latin.ttf' # Ensure 'latin.ttf' (e.g., DejaVuSans.ttf renamed) is in your project root.
if not os.path.exists(FONT_PATH):
print(f"WARNING: Font file '{FONT_PATH}' not found. Text rendering on images might fail or look incorrect.")
# --- Initialize PaddleOCR Engine ---
ocr_engine = None
try:
# Check if essential model directories exist
if not os.path.exists(DET_MODEL_DIR_DEFAULT):
raise FileNotFoundError(f"Detection model directory not found: '{DET_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
if not os.path.exists(REC_MODEL_DIR_DEFAULT):
raise FileNotFoundError(f"Recognition model directory not found: '{REC_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
# Check if the character dictionary file exists
if not os.path.exists(REC_CHAR_DICT_PATH_DEFAULT):
raise FileNotFoundError(f"Recognition character dictionary not found: '{REC_CHAR_DICT_PATH_DEFAULT}'. Please ensure it's in the recognition model folder.")
print(f"Initializing PaddleOCR with language: {SELECTED_LANGUAGE}")
print(f" Detection Model Dir: {DET_MODEL_DIR_DEFAULT}")
print(f" Recognition Model Dir: {REC_MODEL_DIR_DEFAULT}")
print(f" Recognition Char Dict Path: {REC_CHAR_DICT_PATH_DEFAULT}")
use_cls = os.path.exists(CLS_MODEL_DIR_DEFAULT)
if use_cls:
print(f" Classification Model Dir: {CLS_MODEL_DIR_DEFAULT}")
else:
print(f" Classification Model: Not found at '{CLS_MODEL_DIR_DEFAULT}' or not used.")
ocr_engine = PaddleOCR(
use_angle_cls=use_cls,
lang=SELECTED_LANGUAGE, # Still useful for some internal logic, but dict path is key
det_model_dir=DET_MODEL_DIR_DEFAULT,
rec_model_dir=REC_MODEL_DIR_DEFAULT,
rec_char_dict_path=REC_CHAR_DICT_PATH_DEFAULT, # Explicitly providing the dictionary path
cls_model_dir=CLS_MODEL_DIR_DEFAULT if use_cls else None,
show_log=True, # Set to False for less verbose logs in production if desired
use_gpu=False # Set to True if you have GPU hardware on Spaces and paddlepaddle-gpu
)
print("PaddleOCR engine initialized successfully from local models.")
except FileNotFoundError as fnf_error:
print(f"FATAL ERROR (FileNotFound): {fnf_error}")
print("Please check your 'paddleocr_models' directory and model/dict file paths in app.py.")
ocr_engine = None
except Exception as e:
print(f"FATAL ERROR: Could not initialize PaddleOCR engine: {e}")
ocr_engine = None # Ensure it's None if initialization fails
def ocr_process(image_pil, language_key_display_name):
"""
Processes the uploaded image with PaddleOCR using the pre-loaded models.
"""
if ocr_engine is None:
# This message will be displayed to the user in the Gradio interface
return None, "PaddleOCR engine is not available. Please check the application logs for errors."
if image_pil is None:
return None, "No image provided. Please upload an image."
print(f"Processing with pre-loaded language: {SELECTED_LANGUAGE}")
try:
img_np = np.array(image_pil.convert('RGB')) # Ensure image is RGB
print("Performing OCR...")
# The `ocr` method automatically uses the det, cls (if enabled), and rec models.
result = ocr_engine.ocr(img_np, cls=ocr_engine.use_angle_cls)
print("OCR processing complete.")
# PaddleOCR v2.6+ returns results in a different structure: result = [[box, (text, score)], ...]
# Check if result is not None and the first element (lines) is not empty
if result is None or not result[0]:
print("No text detected.")
return image_pil, "No text detected."
# Correctly extract boxes, texts, and scores from the result structure
# result[0] contains the list of lines, where each line is [box, (text, score)]
lines = result[0]
boxes = [line[0] for line in lines]
txts = [line[1][0] for line in lines]
scores = [line[1][1] for line in lines]
print("Drawing OCR results...")
if not os.path.exists(FONT_PATH):
print(f"Font file '{FONT_PATH}' still not found. Cannot draw results on image.")
# Return original image and extracted text without drawn boxes
extracted_text_raw = "\n".join(txts)
return image_pil, f"Font file missing. Extracted text (raw):\n{extracted_text_raw}"
# draw_ocr expects the image in a format it can handle (PIL Image is fine)
im_show = draw_ocr(image_pil, boxes, txts, scores, font_path=FONT_PATH)
im_show_pil = Image.fromarray(im_show) # Convert numpy array from draw_ocr back to PIL Image
print("OCR results drawn.")
extracted_text = "\n".join(txts)
return im_show_pil, extracted_text
except Exception as e:
print(f"Error during OCR processing: {e}")
# Return original image and error message
return image_pil, f"An error occurred during OCR: {str(e)}"
# --- Gradio Interface Definition ---
title = "PaddleOCR Web App (Bundled Models)"
description = f"""
Upload an image to perform OCR. This app uses PaddleOCR with pre-bundled models
for the **{SELECTED_LANGUAGE.upper()}** language to avoid re-downloads on Hugging Face Spaces.
Detection: `{DET_MODEL_FOLDER_NAME}`
Recognition: `{REC_MODEL_FOLDER_NAME}` (using `{REC_CHAR_DICT_FILENAME}`)
Make sure the model files are correctly placed in the `paddleocr_models` directory
and the font file `{FONT_PATH}` is in the project root.
"""
article = "<p style='text-align: center'>Powered by PaddleOCR and Gradio. Deployed on Hugging Face Spaces.</p>"
# For this setup, the language dropdown is mainly informational as models are pre-loaded.
# To truly switch languages, ocr_engine would need re-initialization with different model/dict paths.
supported_langs_display_for_dropdown = {
"English (Loaded)": "en",
# "Chinese (Not Loaded)": "ch", # Example if you were to add more
}
iface = gr.Interface(
fn=ocr_process,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Dropdown(
choices=list(supported_langs_display_for_dropdown.keys()),
label="Language (Using Pre-loaded Model)",
# Default to the key corresponding to SELECTED_LANGUAGE
value=[k for k, v in supported_langs_display_for_dropdown.items() if v == SELECTED_LANGUAGE][0]
)
],
outputs=[
gr.Image(type="pil", label="Processed Image with OCR"),
gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)
],
title=title,
description=description,
article=article,
allow_flagging='never', # Disables the "Flag" button
# You can add example images to your repository and list them here
# examples=[
# ["path_to_your_example_image_in_repo.png", "English (Loaded)"]
# ]
)
if __name__ == '__main__':
if ocr_engine is None:
print("OCR Engine could not be initialized. The Gradio app will not function correctly.")
# In a real scenario, you might want to display an error in the Gradio UI itself
# by modifying the interface or raising an error that Gradio can catch.
print("Launching Gradio interface...")
iface.launch()
print("Gradio interface launched.")
|