File size: 8,983 Bytes
a914ac3
 
7550ca1
a914ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7550ca1
a914ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986b927
4bc7466
a914ac3
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import gradio as gr
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import numpy as np
import os

# --- Configuration: Model and Font Paths ---
# IMPORTANT: Ensure these paths and folder names match exactly what you have
# in your 'paddleocr_models' directory.

# Define the language
SELECTED_LANGUAGE = 'en' # This informs which dictionary to look for primarily

# Base directory for your bundled models
MODEL_BASE_DIR = 'paddleocr_models'

# --- Model paths based on your logs ---
# Detection model: en_PP-OCRv3_det_infer
DET_MODEL_FOLDER_NAME = 'en_PP-OCRv3_det_infer'
DET_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, DET_MODEL_FOLDER_NAME)

# Recognition model: en_PP-OCRv4_rec_infer
REC_MODEL_FOLDER_NAME = 'en_PP-OCRv4_rec_infer'
REC_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, REC_MODEL_FOLDER_NAME)

# Classification model: ch_ppocr_mobile_v2.0_cls_infer (often shared)
CLS_MODEL_FOLDER_NAME = 'ch_ppocr_mobile_v2.0_cls_infer'
CLS_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, CLS_MODEL_FOLDER_NAME)

# --- Character Dictionary Path ---
# Since en_dict.txt might not be directly in the rec_model_dir after copying from cache,
# we assume you've copied the default en_dict.txt into your REC_MODEL_FOLDER_NAME.
# If you copied it from the PaddleOCR package utils, this path should be correct.
# Ensure 'en_dict.txt' is inside 'paddleocr_models/en_PP-OCRv4_rec_infer/'
REC_CHAR_DICT_FILENAME = 'en_dict.txt' # Or whatever your .txt file is named
REC_CHAR_DICT_PATH_DEFAULT = os.path.join(REC_MODEL_DIR_DEFAULT, REC_CHAR_DICT_FILENAME)

# --- Font for drawing OCR results ---
FONT_PATH = 'latin.ttf' # Ensure 'latin.ttf' (e.g., DejaVuSans.ttf renamed) is in your project root.
if not os.path.exists(FONT_PATH):
    print(f"WARNING: Font file '{FONT_PATH}' not found. Text rendering on images might fail or look incorrect.")

# --- Initialize PaddleOCR Engine ---
ocr_engine = None
try:
    # Check if essential model directories exist
    if not os.path.exists(DET_MODEL_DIR_DEFAULT):
        raise FileNotFoundError(f"Detection model directory not found: '{DET_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
    if not os.path.exists(REC_MODEL_DIR_DEFAULT):
        raise FileNotFoundError(f"Recognition model directory not found: '{REC_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
    
    # Check if the character dictionary file exists
    if not os.path.exists(REC_CHAR_DICT_PATH_DEFAULT):
        raise FileNotFoundError(f"Recognition character dictionary not found: '{REC_CHAR_DICT_PATH_DEFAULT}'. Please ensure it's in the recognition model folder.")

    print(f"Initializing PaddleOCR with language: {SELECTED_LANGUAGE}")
    print(f"  Detection Model Dir: {DET_MODEL_DIR_DEFAULT}")
    print(f"  Recognition Model Dir: {REC_MODEL_DIR_DEFAULT}")
    print(f"  Recognition Char Dict Path: {REC_CHAR_DICT_PATH_DEFAULT}")
    
    use_cls = os.path.exists(CLS_MODEL_DIR_DEFAULT)
    if use_cls:
        print(f"  Classification Model Dir: {CLS_MODEL_DIR_DEFAULT}")
    else:
        print(f"  Classification Model: Not found at '{CLS_MODEL_DIR_DEFAULT}' or not used.")

    ocr_engine = PaddleOCR(
        use_angle_cls=use_cls,
        lang=SELECTED_LANGUAGE, # Still useful for some internal logic, but dict path is key
        det_model_dir=DET_MODEL_DIR_DEFAULT,
        rec_model_dir=REC_MODEL_DIR_DEFAULT,
        rec_char_dict_path=REC_CHAR_DICT_PATH_DEFAULT, # Explicitly providing the dictionary path
        cls_model_dir=CLS_MODEL_DIR_DEFAULT if use_cls else None,
        show_log=True, # Set to False for less verbose logs in production if desired
        use_gpu=False # Set to True if you have GPU hardware on Spaces and paddlepaddle-gpu
    )
    print("PaddleOCR engine initialized successfully from local models.")

except FileNotFoundError as fnf_error:
    print(f"FATAL ERROR (FileNotFound): {fnf_error}")
    print("Please check your 'paddleocr_models' directory and model/dict file paths in app.py.")
    ocr_engine = None
except Exception as e:
    print(f"FATAL ERROR: Could not initialize PaddleOCR engine: {e}")
    ocr_engine = None # Ensure it's None if initialization fails

def ocr_process(image_pil, language_key_display_name):
    """
    Processes the uploaded image with PaddleOCR using the pre-loaded models.
    """
    if ocr_engine is None:
        # This message will be displayed to the user in the Gradio interface
        return None, "PaddleOCR engine is not available. Please check the application logs for errors."
    if image_pil is None:
        return None, "No image provided. Please upload an image."

    print(f"Processing with pre-loaded language: {SELECTED_LANGUAGE}")

    try:
        img_np = np.array(image_pil.convert('RGB')) # Ensure image is RGB

        print("Performing OCR...")
        # The `ocr` method automatically uses the det, cls (if enabled), and rec models.
        result = ocr_engine.ocr(img_np, cls=ocr_engine.use_angle_cls) 
        print("OCR processing complete.")

        # PaddleOCR v2.6+ returns results in a different structure: result = [[box, (text, score)], ...]
        # Check if result is not None and the first element (lines) is not empty
        if result is None or not result[0]: 
            print("No text detected.")
            return image_pil, "No text detected." 

        # Correctly extract boxes, texts, and scores from the result structure
        # result[0] contains the list of lines, where each line is [box, (text, score)]
        lines = result[0]
        boxes = [line[0] for line in lines]
        txts = [line[1][0] for line in lines]
        scores = [line[1][1] for line in lines]

        print("Drawing OCR results...")
        if not os.path.exists(FONT_PATH):
            print(f"Font file '{FONT_PATH}' still not found. Cannot draw results on image.")
            # Return original image and extracted text without drawn boxes
            extracted_text_raw = "\n".join(txts)
            return image_pil, f"Font file missing. Extracted text (raw):\n{extracted_text_raw}"

        # draw_ocr expects the image in a format it can handle (PIL Image is fine)
        im_show = draw_ocr(image_pil, boxes, txts, scores, font_path=FONT_PATH)
        im_show_pil = Image.fromarray(im_show) # Convert numpy array from draw_ocr back to PIL Image
        print("OCR results drawn.")

        extracted_text = "\n".join(txts)
        return im_show_pil, extracted_text

    except Exception as e:
        print(f"Error during OCR processing: {e}")
        # Return original image and error message
        return image_pil, f"An error occurred during OCR: {str(e)}"

# --- Gradio Interface Definition ---
title = "PaddleOCR Web App (Bundled Models)"
description = f"""
Upload an image to perform OCR. This app uses PaddleOCR with pre-bundled models
for the **{SELECTED_LANGUAGE.upper()}** language to avoid re-downloads on Hugging Face Spaces.
Detection: `{DET_MODEL_FOLDER_NAME}`
Recognition: `{REC_MODEL_FOLDER_NAME}` (using `{REC_CHAR_DICT_FILENAME}`)
Make sure the model files are correctly placed in the `paddleocr_models` directory
and the font file `{FONT_PATH}` is in the project root.
"""
article = "<p style='text-align: center'>Powered by PaddleOCR and Gradio. Deployed on Hugging Face Spaces.</p>"

# For this setup, the language dropdown is mainly informational as models are pre-loaded.
# To truly switch languages, ocr_engine would need re-initialization with different model/dict paths.
supported_langs_display_for_dropdown = {
    "English (Loaded)": "en",
    # "Chinese (Not Loaded)": "ch", # Example if you were to add more
}

iface = gr.Interface(
    fn=ocr_process,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Dropdown(
            choices=list(supported_langs_display_for_dropdown.keys()),
            label="Language (Using Pre-loaded Model)",
            # Default to the key corresponding to SELECTED_LANGUAGE
            value=[k for k, v in supported_langs_display_for_dropdown.items() if v == SELECTED_LANGUAGE][0]
        )
    ],
    outputs=[
        gr.Image(type="pil", label="Processed Image with OCR"),
        gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)
    ],
    title=title,
    description=description,
    article=article,
    allow_flagging='never', # Disables the "Flag" button
    # You can add example images to your repository and list them here
    # examples=[
    #     ["path_to_your_example_image_in_repo.png", "English (Loaded)"] 
    # ]
)

if __name__ == '__main__':
    if ocr_engine is None:
        print("OCR Engine could not be initialized. The Gradio app will not function correctly.")
        # In a real scenario, you might want to display an error in the Gradio UI itself
        # by modifying the interface or raising an error that Gradio can catch.
    print("Launching Gradio interface...")
    iface.launch() 
    print("Gradio interface launched.")