import gradio as gr from paddleocr import PaddleOCR, draw_ocr from PIL import Image import numpy as np import os # --- Configuration: Model and Font Paths --- # IMPORTANT: Ensure these paths and folder names match exactly what you have # in your 'paddleocr_models' directory. # Define the language SELECTED_LANGUAGE = 'en' # This informs which dictionary to look for primarily # Base directory for your bundled models MODEL_BASE_DIR = 'paddleocr_models' # --- Model paths based on your logs --- # Detection model: en_PP-OCRv3_det_infer DET_MODEL_FOLDER_NAME = 'en_PP-OCRv3_det_infer' DET_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, DET_MODEL_FOLDER_NAME) # Recognition model: en_PP-OCRv4_rec_infer REC_MODEL_FOLDER_NAME = 'en_PP-OCRv4_rec_infer' REC_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, REC_MODEL_FOLDER_NAME) # Classification model: ch_ppocr_mobile_v2.0_cls_infer (often shared) CLS_MODEL_FOLDER_NAME = 'ch_ppocr_mobile_v2.0_cls_infer' CLS_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, CLS_MODEL_FOLDER_NAME) # --- Character Dictionary Path --- # Since en_dict.txt might not be directly in the rec_model_dir after copying from cache, # we assume you've copied the default en_dict.txt into your REC_MODEL_FOLDER_NAME. # If you copied it from the PaddleOCR package utils, this path should be correct. # Ensure 'en_dict.txt' is inside 'paddleocr_models/en_PP-OCRv4_rec_infer/' REC_CHAR_DICT_FILENAME = 'en_dict.txt' # Or whatever your .txt file is named REC_CHAR_DICT_PATH_DEFAULT = os.path.join(REC_MODEL_DIR_DEFAULT, REC_CHAR_DICT_FILENAME) # --- Font for drawing OCR results --- FONT_PATH = 'latin.ttf' # Ensure 'latin.ttf' (e.g., DejaVuSans.ttf renamed) is in your project root. if not os.path.exists(FONT_PATH): print(f"WARNING: Font file '{FONT_PATH}' not found. Text rendering on images might fail or look incorrect.") # --- Initialize PaddleOCR Engine --- ocr_engine = None try: # Check if essential model directories exist if not os.path.exists(DET_MODEL_DIR_DEFAULT): raise FileNotFoundError(f"Detection model directory not found: '{DET_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.") if not os.path.exists(REC_MODEL_DIR_DEFAULT): raise FileNotFoundError(f"Recognition model directory not found: '{REC_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.") # Check if the character dictionary file exists if not os.path.exists(REC_CHAR_DICT_PATH_DEFAULT): raise FileNotFoundError(f"Recognition character dictionary not found: '{REC_CHAR_DICT_PATH_DEFAULT}'. Please ensure it's in the recognition model folder.") print(f"Initializing PaddleOCR with language: {SELECTED_LANGUAGE}") print(f" Detection Model Dir: {DET_MODEL_DIR_DEFAULT}") print(f" Recognition Model Dir: {REC_MODEL_DIR_DEFAULT}") print(f" Recognition Char Dict Path: {REC_CHAR_DICT_PATH_DEFAULT}") use_cls = os.path.exists(CLS_MODEL_DIR_DEFAULT) if use_cls: print(f" Classification Model Dir: {CLS_MODEL_DIR_DEFAULT}") else: print(f" Classification Model: Not found at '{CLS_MODEL_DIR_DEFAULT}' or not used.") ocr_engine = PaddleOCR( use_angle_cls=use_cls, lang=SELECTED_LANGUAGE, # Still useful for some internal logic, but dict path is key det_model_dir=DET_MODEL_DIR_DEFAULT, rec_model_dir=REC_MODEL_DIR_DEFAULT, rec_char_dict_path=REC_CHAR_DICT_PATH_DEFAULT, # Explicitly providing the dictionary path cls_model_dir=CLS_MODEL_DIR_DEFAULT if use_cls else None, show_log=True, # Set to False for less verbose logs in production if desired use_gpu=False # Set to True if you have GPU hardware on Spaces and paddlepaddle-gpu ) print("PaddleOCR engine initialized successfully from local models.") except FileNotFoundError as fnf_error: print(f"FATAL ERROR (FileNotFound): {fnf_error}") print("Please check your 'paddleocr_models' directory and model/dict file paths in app.py.") ocr_engine = None except Exception as e: print(f"FATAL ERROR: Could not initialize PaddleOCR engine: {e}") ocr_engine = None # Ensure it's None if initialization fails def ocr_process(image_pil, language_key_display_name): """ Processes the uploaded image with PaddleOCR using the pre-loaded models. """ if ocr_engine is None: # This message will be displayed to the user in the Gradio interface return None, "PaddleOCR engine is not available. Please check the application logs for errors." if image_pil is None: return None, "No image provided. Please upload an image." print(f"Processing with pre-loaded language: {SELECTED_LANGUAGE}") try: img_np = np.array(image_pil.convert('RGB')) # Ensure image is RGB print("Performing OCR...") # The `ocr` method automatically uses the det, cls (if enabled), and rec models. result = ocr_engine.ocr(img_np, cls=ocr_engine.use_angle_cls) print("OCR processing complete.") # PaddleOCR v2.6+ returns results in a different structure: result = [[box, (text, score)], ...] # Check if result is not None and the first element (lines) is not empty if result is None or not result[0]: print("No text detected.") return image_pil, "No text detected." # Correctly extract boxes, texts, and scores from the result structure # result[0] contains the list of lines, where each line is [box, (text, score)] lines = result[0] boxes = [line[0] for line in lines] txts = [line[1][0] for line in lines] scores = [line[1][1] for line in lines] print("Drawing OCR results...") if not os.path.exists(FONT_PATH): print(f"Font file '{FONT_PATH}' still not found. Cannot draw results on image.") # Return original image and extracted text without drawn boxes extracted_text_raw = "\n".join(txts) return image_pil, f"Font file missing. Extracted text (raw):\n{extracted_text_raw}" # draw_ocr expects the image in a format it can handle (PIL Image is fine) im_show = draw_ocr(image_pil, boxes, txts, scores, font_path=FONT_PATH) im_show_pil = Image.fromarray(im_show) # Convert numpy array from draw_ocr back to PIL Image print("OCR results drawn.") extracted_text = "\n".join(txts) return im_show_pil, extracted_text except Exception as e: print(f"Error during OCR processing: {e}") # Return original image and error message return image_pil, f"An error occurred during OCR: {str(e)}" # --- Gradio Interface Definition --- title = "PaddleOCR Web App (Bundled Models)" description = f""" Upload an image to perform OCR. This app uses PaddleOCR with pre-bundled models for the **{SELECTED_LANGUAGE.upper()}** language to avoid re-downloads on Hugging Face Spaces. Detection: `{DET_MODEL_FOLDER_NAME}` Recognition: `{REC_MODEL_FOLDER_NAME}` (using `{REC_CHAR_DICT_FILENAME}`) Make sure the model files are correctly placed in the `paddleocr_models` directory and the font file `{FONT_PATH}` is in the project root. """ article = "
Powered by PaddleOCR and Gradio. Deployed on Hugging Face Spaces.
" # For this setup, the language dropdown is mainly informational as models are pre-loaded. # To truly switch languages, ocr_engine would need re-initialization with different model/dict paths. supported_langs_display_for_dropdown = { "English (Loaded)": "en", # "Chinese (Not Loaded)": "ch", # Example if you were to add more } iface = gr.Interface( fn=ocr_process, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Dropdown( choices=list(supported_langs_display_for_dropdown.keys()), label="Language (Using Pre-loaded Model)", # Default to the key corresponding to SELECTED_LANGUAGE value=[k for k, v in supported_langs_display_for_dropdown.items() if v == SELECTED_LANGUAGE][0] ) ], outputs=[ gr.Image(type="pil", label="Processed Image with OCR"), gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True) ], title=title, description=description, article=article, allow_flagging='never', # Disables the "Flag" button # You can add example images to your repository and list them here # examples=[ # ["path_to_your_example_image_in_repo.png", "English (Loaded)"] # ] ) if __name__ == '__main__': if ocr_engine is None: print("OCR Engine could not be initialized. The Gradio app will not function correctly.") # In a real scenario, you might want to display an error in the Gradio UI itself # by modifying the interface or raising an error that Gradio can catch. print("Launching Gradio interface...") iface.launch() print("Gradio interface launched.")