Spaces:
Running
Running
Commit
·
a914ac3
1
Parent(s):
7550ca1
Prepare project for Hugging Face Spaces with Gradio and bundled models
Browse files- .dockerignore +0 -9
- .gitignore +32 -4
- Dockerfile +0 -28
- app.py +187 -68
- download_ocr_models.py +129 -0
- latin.ttf +3 -0
- paddleocr_models/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams +3 -0
- paddleocr_models/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams.info +3 -0
- paddleocr_models/ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel +3 -0
- paddleocr_models/en_PP-OCRv3_det_infer/inference.pdiparams +3 -0
- paddleocr_models/en_PP-OCRv3_det_infer/inference.pdiparams.info +3 -0
- paddleocr_models/en_PP-OCRv3_det_infer/inference.pdmodel +3 -0
- paddleocr_models/en_PP-OCRv4_rec_infer/en_dict.txt +95 -0
- paddleocr_models/en_PP-OCRv4_rec_infer/inference.pdiparams +3 -0
- paddleocr_models/en_PP-OCRv4_rec_infer/inference.pdiparams.info +3 -0
- paddleocr_models/en_PP-OCRv4_rec_infer/inference.pdmodel +3 -0
- render.yaml +0 -10
- requirements.txt +14 -4
- run_ocr.py +0 -8
- static/style.css +0 -179
- templates/index.html +0 -43
.dockerignore
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
__pycache__/
|
2 |
-
*.pyc
|
3 |
-
*.pyo
|
4 |
-
*.pyd
|
5 |
-
.Python
|
6 |
-
env/
|
7 |
-
venv/
|
8 |
-
.git/
|
9 |
-
*.egg-info/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
@@ -1,8 +1,36 @@
|
|
|
|
1 |
__pycache__/
|
2 |
-
*.
|
3 |
-
|
4 |
-
*.
|
5 |
-
uploads/
|
6 |
.env
|
|
|
|
|
7 |
venv/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.egg-info/
|
|
|
6 |
.env
|
7 |
+
.venv
|
8 |
+
env/
|
9 |
venv/
|
10 |
+
ENV/
|
11 |
+
env.bak/
|
12 |
+
venv.bak/
|
13 |
+
|
14 |
+
# PaddleOCR / PaddlePaddle cache (if any created locally, not strictly needed for repo)
|
15 |
+
.paddlehub/
|
16 |
+
.paddleocr/
|
17 |
+
|
18 |
+
# IDE / OS files
|
19 |
+
.vscode/
|
20 |
+
.idea/
|
21 |
+
*.swp
|
22 |
+
*.swo
|
23 |
+
*~
|
24 |
.DS_Store
|
25 |
+
Thumbs.db
|
26 |
+
|
27 |
+
# Local model downloads if you accidentally commit them outside the designated 'paddleocr_models' dir
|
28 |
+
# (The 'paddleocr_models' directory itself SHOULD be committed)
|
29 |
+
|
30 |
+
# If you use Git LFS for models (optional, if models are very large)
|
31 |
+
# .gitattributes # This file itself should be committed if LFS is used
|
32 |
+
# *.pdparams # Example if you track these with LFS
|
33 |
+
|
34 |
+
# Output files
|
35 |
+
result.jpg
|
36 |
+
result.png
|
Dockerfile
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
# Use official Python image
|
2 |
-
FROM python:3.9-slim
|
3 |
-
|
4 |
-
# Set working directory
|
5 |
-
WORKDIR /app
|
6 |
-
|
7 |
-
# Install system dependencies
|
8 |
-
RUN apt-get update && apt-get install -y \
|
9 |
-
libgl1-mesa-glx \
|
10 |
-
libgomp1 \
|
11 |
-
libglib2.0-0 \
|
12 |
-
&& rm -rf /var/lib/apt/lists/*
|
13 |
-
|
14 |
-
# Install Python dependencies
|
15 |
-
COPY requirements.txt .
|
16 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
-
|
18 |
-
# Copy app files
|
19 |
-
COPY . .
|
20 |
-
|
21 |
-
# Set environment variable for port
|
22 |
-
ENV PORT=8080
|
23 |
-
|
24 |
-
# Expose the port
|
25 |
-
EXPOSE 8080
|
26 |
-
|
27 |
-
# Start the app
|
28 |
-
CMD ["gunicorn", "--bind", "0.0.0.0:8080", "app:app"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,72 +1,191 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
-
from werkzeug.utils import secure_filename
|
4 |
-
from paddleocr import PaddleOCR
|
5 |
from PIL import Image
|
6 |
-
import
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
#
|
14 |
-
|
15 |
-
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
#
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
#
|
56 |
-
result
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
if __name__ == '__main__':
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from paddleocr import PaddleOCR, draw_ocr
|
|
|
|
|
3 |
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
|
7 |
+
# --- Configuration: Model and Font Paths ---
|
8 |
+
# IMPORTANT: Ensure these paths and folder names match exactly what you have
|
9 |
+
# in your 'paddleocr_models' directory.
|
10 |
+
|
11 |
+
# Define the language
|
12 |
+
SELECTED_LANGUAGE = 'en' # This informs which dictionary to look for primarily
|
13 |
+
|
14 |
+
# Base directory for your bundled models
|
15 |
+
MODEL_BASE_DIR = 'paddleocr_models'
|
16 |
+
|
17 |
+
# --- Model paths based on your logs ---
|
18 |
+
# Detection model: en_PP-OCRv3_det_infer
|
19 |
+
DET_MODEL_FOLDER_NAME = 'en_PP-OCRv3_det_infer'
|
20 |
+
DET_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, DET_MODEL_FOLDER_NAME)
|
21 |
+
|
22 |
+
# Recognition model: en_PP-OCRv4_rec_infer
|
23 |
+
REC_MODEL_FOLDER_NAME = 'en_PP-OCRv4_rec_infer'
|
24 |
+
REC_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, REC_MODEL_FOLDER_NAME)
|
25 |
+
|
26 |
+
# Classification model: ch_ppocr_mobile_v2.0_cls_infer (often shared)
|
27 |
+
CLS_MODEL_FOLDER_NAME = 'ch_ppocr_mobile_v2.0_cls_infer'
|
28 |
+
CLS_MODEL_DIR_DEFAULT = os.path.join(MODEL_BASE_DIR, CLS_MODEL_FOLDER_NAME)
|
29 |
+
|
30 |
+
# --- Character Dictionary Path ---
|
31 |
+
# Since en_dict.txt might not be directly in the rec_model_dir after copying from cache,
|
32 |
+
# we assume you've copied the default en_dict.txt into your REC_MODEL_FOLDER_NAME.
|
33 |
+
# If you copied it from the PaddleOCR package utils, this path should be correct.
|
34 |
+
# Ensure 'en_dict.txt' is inside 'paddleocr_models/en_PP-OCRv4_rec_infer/'
|
35 |
+
REC_CHAR_DICT_FILENAME = 'en_dict.txt' # Or whatever your .txt file is named
|
36 |
+
REC_CHAR_DICT_PATH_DEFAULT = os.path.join(REC_MODEL_DIR_DEFAULT, REC_CHAR_DICT_FILENAME)
|
37 |
+
|
38 |
+
# --- Font for drawing OCR results ---
|
39 |
+
FONT_PATH = 'latin.ttf' # Ensure 'latin.ttf' (e.g., DejaVuSans.ttf renamed) is in your project root.
|
40 |
+
if not os.path.exists(FONT_PATH):
|
41 |
+
print(f"WARNING: Font file '{FONT_PATH}' not found. Text rendering on images might fail or look incorrect.")
|
42 |
+
|
43 |
+
# --- Initialize PaddleOCR Engine ---
|
44 |
+
ocr_engine = None
|
45 |
+
try:
|
46 |
+
# Check if essential model directories exist
|
47 |
+
if not os.path.exists(DET_MODEL_DIR_DEFAULT):
|
48 |
+
raise FileNotFoundError(f"Detection model directory not found: '{DET_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
|
49 |
+
if not os.path.exists(REC_MODEL_DIR_DEFAULT):
|
50 |
+
raise FileNotFoundError(f"Recognition model directory not found: '{REC_MODEL_DIR_DEFAULT}'. Please ensure it exists and contains model files.")
|
51 |
+
|
52 |
+
# Check if the character dictionary file exists
|
53 |
+
if not os.path.exists(REC_CHAR_DICT_PATH_DEFAULT):
|
54 |
+
raise FileNotFoundError(f"Recognition character dictionary not found: '{REC_CHAR_DICT_PATH_DEFAULT}'. Please ensure it's in the recognition model folder.")
|
55 |
+
|
56 |
+
print(f"Initializing PaddleOCR with language: {SELECTED_LANGUAGE}")
|
57 |
+
print(f" Detection Model Dir: {DET_MODEL_DIR_DEFAULT}")
|
58 |
+
print(f" Recognition Model Dir: {REC_MODEL_DIR_DEFAULT}")
|
59 |
+
print(f" Recognition Char Dict Path: {REC_CHAR_DICT_PATH_DEFAULT}")
|
60 |
+
|
61 |
+
use_cls = os.path.exists(CLS_MODEL_DIR_DEFAULT)
|
62 |
+
if use_cls:
|
63 |
+
print(f" Classification Model Dir: {CLS_MODEL_DIR_DEFAULT}")
|
64 |
+
else:
|
65 |
+
print(f" Classification Model: Not found at '{CLS_MODEL_DIR_DEFAULT}' or not used.")
|
66 |
+
|
67 |
+
ocr_engine = PaddleOCR(
|
68 |
+
use_angle_cls=use_cls,
|
69 |
+
lang=SELECTED_LANGUAGE, # Still useful for some internal logic, but dict path is key
|
70 |
+
det_model_dir=DET_MODEL_DIR_DEFAULT,
|
71 |
+
rec_model_dir=REC_MODEL_DIR_DEFAULT,
|
72 |
+
rec_char_dict_path=REC_CHAR_DICT_PATH_DEFAULT, # Explicitly providing the dictionary path
|
73 |
+
cls_model_dir=CLS_MODEL_DIR_DEFAULT if use_cls else None,
|
74 |
+
show_log=True, # Set to False for less verbose logs in production if desired
|
75 |
+
use_gpu=False # Set to True if you have GPU hardware on Spaces and paddlepaddle-gpu
|
76 |
+
)
|
77 |
+
print("PaddleOCR engine initialized successfully from local models.")
|
78 |
|
79 |
+
except FileNotFoundError as fnf_error:
|
80 |
+
print(f"FATAL ERROR (FileNotFound): {fnf_error}")
|
81 |
+
print("Please check your 'paddleocr_models' directory and model/dict file paths in app.py.")
|
82 |
+
ocr_engine = None
|
83 |
+
except Exception as e:
|
84 |
+
print(f"FATAL ERROR: Could not initialize PaddleOCR engine: {e}")
|
85 |
+
ocr_engine = None # Ensure it's None if initialization fails
|
86 |
+
|
87 |
+
def ocr_process(image_pil, language_key_display_name):
|
88 |
+
"""
|
89 |
+
Processes the uploaded image with PaddleOCR using the pre-loaded models.
|
90 |
+
"""
|
91 |
+
if ocr_engine is None:
|
92 |
+
# This message will be displayed to the user in the Gradio interface
|
93 |
+
return None, "PaddleOCR engine is not available. Please check the application logs for errors."
|
94 |
+
if image_pil is None:
|
95 |
+
return None, "No image provided. Please upload an image."
|
96 |
+
|
97 |
+
print(f"Processing with pre-loaded language: {SELECTED_LANGUAGE}")
|
98 |
+
|
99 |
+
try:
|
100 |
+
img_np = np.array(image_pil.convert('RGB')) # Ensure image is RGB
|
101 |
+
|
102 |
+
print("Performing OCR...")
|
103 |
+
# The `ocr` method automatically uses the det, cls (if enabled), and rec models.
|
104 |
+
result = ocr_engine.ocr(img_np, cls=ocr_engine.use_angle_cls)
|
105 |
+
print("OCR processing complete.")
|
106 |
+
|
107 |
+
# PaddleOCR v2.6+ returns results in a different structure: result = [[box, (text, score)], ...]
|
108 |
+
# Check if result is not None and the first element (lines) is not empty
|
109 |
+
if result is None or not result[0]:
|
110 |
+
print("No text detected.")
|
111 |
+
return image_pil, "No text detected."
|
112 |
+
|
113 |
+
# Correctly extract boxes, texts, and scores from the result structure
|
114 |
+
# result[0] contains the list of lines, where each line is [box, (text, score)]
|
115 |
+
lines = result[0]
|
116 |
+
boxes = [line[0] for line in lines]
|
117 |
+
txts = [line[1][0] for line in lines]
|
118 |
+
scores = [line[1][1] for line in lines]
|
119 |
+
|
120 |
+
print("Drawing OCR results...")
|
121 |
+
if not os.path.exists(FONT_PATH):
|
122 |
+
print(f"Font file '{FONT_PATH}' still not found. Cannot draw results on image.")
|
123 |
+
# Return original image and extracted text without drawn boxes
|
124 |
+
extracted_text_raw = "\n".join(txts)
|
125 |
+
return image_pil, f"Font file missing. Extracted text (raw):\n{extracted_text_raw}"
|
126 |
+
|
127 |
+
# draw_ocr expects the image in a format it can handle (PIL Image is fine)
|
128 |
+
im_show = draw_ocr(image_pil, boxes, txts, scores, font_path=FONT_PATH)
|
129 |
+
im_show_pil = Image.fromarray(im_show) # Convert numpy array from draw_ocr back to PIL Image
|
130 |
+
print("OCR results drawn.")
|
131 |
+
|
132 |
+
extracted_text = "\n".join(txts)
|
133 |
+
return im_show_pil, extracted_text
|
134 |
+
|
135 |
+
except Exception as e:
|
136 |
+
print(f"Error during OCR processing: {e}")
|
137 |
+
# Return original image and error message
|
138 |
+
return image_pil, f"An error occurred during OCR: {str(e)}"
|
139 |
+
|
140 |
+
# --- Gradio Interface Definition ---
|
141 |
+
title = "PaddleOCR Web App (Bundled Models)"
|
142 |
+
description = f"""
|
143 |
+
Upload an image to perform OCR. This app uses PaddleOCR with pre-bundled models
|
144 |
+
for the **{SELECTED_LANGUAGE.upper()}** language to avoid re-downloads on Hugging Face Spaces.
|
145 |
+
Detection: `{DET_MODEL_FOLDER_NAME}`
|
146 |
+
Recognition: `{REC_MODEL_FOLDER_NAME}` (using `{REC_CHAR_DICT_FILENAME}`)
|
147 |
+
Make sure the model files are correctly placed in the `paddleocr_models` directory
|
148 |
+
and the font file `{FONT_PATH}` is in the project root.
|
149 |
+
"""
|
150 |
+
article = "<p style='text-align: center'>Powered by PaddleOCR and Gradio. Deployed on Hugging Face Spaces.</p>"
|
151 |
+
|
152 |
+
# For this setup, the language dropdown is mainly informational as models are pre-loaded.
|
153 |
+
# To truly switch languages, ocr_engine would need re-initialization with different model/dict paths.
|
154 |
+
supported_langs_display_for_dropdown = {
|
155 |
+
"English (Loaded)": "en",
|
156 |
+
# "Chinese (Not Loaded)": "ch", # Example if you were to add more
|
157 |
+
}
|
158 |
+
|
159 |
+
iface = gr.Interface(
|
160 |
+
fn=ocr_process,
|
161 |
+
inputs=[
|
162 |
+
gr.Image(type="pil", label="Upload Image"),
|
163 |
+
gr.Dropdown(
|
164 |
+
choices=list(supported_langs_display_for_dropdown.keys()),
|
165 |
+
label="Language (Using Pre-loaded Model)",
|
166 |
+
# Default to the key corresponding to SELECTED_LANGUAGE
|
167 |
+
value=[k for k, v in supported_langs_display_for_dropdown.items() if v == SELECTED_LANGUAGE][0]
|
168 |
+
)
|
169 |
+
],
|
170 |
+
outputs=[
|
171 |
+
gr.Image(type="pil", label="Processed Image with OCR"),
|
172 |
+
gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)
|
173 |
+
],
|
174 |
+
title=title,
|
175 |
+
description=description,
|
176 |
+
article=article,
|
177 |
+
allow_flagging='never', # Disables the "Flag" button
|
178 |
+
# You can add example images to your repository and list them here
|
179 |
+
# examples=[
|
180 |
+
# ["path_to_your_example_image_in_repo.png", "English (Loaded)"]
|
181 |
+
# ]
|
182 |
+
)
|
183 |
|
184 |
if __name__ == '__main__':
|
185 |
+
if ocr_engine is None:
|
186 |
+
print("OCR Engine could not be initialized. The Gradio app will not function correctly.")
|
187 |
+
# In a real scenario, you might want to display an error in the Gradio UI itself
|
188 |
+
# by modifying the interface or raising an error that Gradio can catch.
|
189 |
+
print("Launching Gradio interface...")
|
190 |
+
iface.launch()
|
191 |
+
print("Gradio interface launched.")
|
download_ocr_models.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# download_ocr_models.py (Corrected v3)
|
2 |
+
from paddleocr import PaddleOCR
|
3 |
+
import os
|
4 |
+
import shutil # For copying files/folders later if you want to automate it
|
5 |
+
|
6 |
+
# --- CONFIGURATION ---
|
7 |
+
# 1. CHOOSE THE LANGUAGE YOU WANT TO DOWNLOAD MODELS FOR:
|
8 |
+
LANGUAGE_TO_DOWNLOAD = 'en' # <<< ***** CHANGE THIS TO YOUR TARGET LANGUAGE *****
|
9 |
+
# --- END CONFIGURATION ---
|
10 |
+
|
11 |
+
print(f"Attempting to download/locate models for language: '{LANGUAGE_TO_DOWNLOAD}'...")
|
12 |
+
|
13 |
+
try:
|
14 |
+
# Initialize PaddleOCR. This action will trigger the download of models
|
15 |
+
# for the specified language if they are not already in the local cache.
|
16 |
+
ocr_temp_engine = PaddleOCR(use_angle_cls=True, lang=LANGUAGE_TO_DOWNLOAD, show_log=True)
|
17 |
+
print(f"\nModels for '{LANGUAGE_TO_DOWNLOAD}' should now be in the PaddleOCR cache.")
|
18 |
+
|
19 |
+
# --- Accessing the model paths from the initialized engine ---
|
20 |
+
# The args object is an argparse.Namespace, access attributes directly.
|
21 |
+
|
22 |
+
args = ocr_temp_engine.args # This is an argparse.Namespace object
|
23 |
+
|
24 |
+
# Use hasattr to check if attributes exist before accessing them
|
25 |
+
det_model_dir_cache = args.det_model_dir if hasattr(args, 'det_model_dir') else None
|
26 |
+
rec_model_dir_cache = args.rec_model_dir if hasattr(args, 'rec_model_dir') else None
|
27 |
+
cls_model_dir_cache = args.cls_model_dir if hasattr(args, 'use_angle_cls') and args.use_angle_cls and hasattr(args, 'cls_model_dir') else None
|
28 |
+
rec_char_dict_path_from_args = args.rec_char_dict_path if hasattr(args, 'rec_char_dict_path') else None
|
29 |
+
|
30 |
+
|
31 |
+
print("\n--- CACHE PATHS FOR THE DOWNLOADED MODELS (from PaddleOCR config) ---")
|
32 |
+
if det_model_dir_cache:
|
33 |
+
print(f"Detection ({LANGUAGE_TO_DOWNLOAD}) model cache path: {det_model_dir_cache}")
|
34 |
+
else:
|
35 |
+
print(f"Detection ({LANGUAGE_TO_DOWNLOAD}) model cache path: Not found in args (Attribute 'det_model_dir' missing).")
|
36 |
+
|
37 |
+
if rec_model_dir_cache:
|
38 |
+
print(f"Recognition ({LANGUAGE_TO_DOWNLOAD}) model cache path: {rec_model_dir_cache}")
|
39 |
+
else:
|
40 |
+
print(f"Recognition ({LANGUAGE_TO_DOWNLOAD}) model cache path: Not found in args (Attribute 'rec_model_dir' missing).")
|
41 |
+
|
42 |
+
if cls_model_dir_cache:
|
43 |
+
print(f"Classification model cache path: {cls_model_dir_cache}")
|
44 |
+
elif hasattr(args, 'use_angle_cls') and args.use_angle_cls:
|
45 |
+
print("Classification model enabled but path not found in args (Attribute 'cls_model_dir' missing or invalid).")
|
46 |
+
else:
|
47 |
+
print("Classification model not used or path not found in args.")
|
48 |
+
|
49 |
+
|
50 |
+
# --- Instructions for copying ---
|
51 |
+
print("\n--- ACTION REQUIRED ---")
|
52 |
+
print("1. Create a folder named 'paddleocr_models' in your project's root directory (if it doesn't exist).")
|
53 |
+
|
54 |
+
project_root = os.getcwd()
|
55 |
+
project_model_dir_target = os.path.join(project_root, 'paddleocr_models')
|
56 |
+
if not os.path.exists(project_model_dir_target):
|
57 |
+
try:
|
58 |
+
os.makedirs(project_model_dir_target)
|
59 |
+
print(f" Created directory: {project_model_dir_target}")
|
60 |
+
except OSError as e:
|
61 |
+
print(f" ERROR creating directory {project_model_dir_target}: {e}")
|
62 |
+
print(" Please create it manually.")
|
63 |
+
else:
|
64 |
+
print(f" Your project's 'paddleocr_models' folder is at: {project_model_dir_target}")
|
65 |
+
|
66 |
+
|
67 |
+
print(f"\n2. Manually copy the following folders from the cache paths printed above (or from PaddleOCR's initial debug log) into '{project_model_dir_target}':")
|
68 |
+
|
69 |
+
# Detection model
|
70 |
+
if det_model_dir_cache and os.path.exists(det_model_dir_cache):
|
71 |
+
det_target_name = os.path.basename(os.path.normpath(det_model_dir_cache))
|
72 |
+
print(f" - Detection Model Folder to Copy: '{det_target_name}'")
|
73 |
+
print(f" (Full path of source: {det_model_dir_cache})")
|
74 |
+
print(f" (Target location: {os.path.join(project_model_dir_target, det_target_name)})")
|
75 |
+
else:
|
76 |
+
print(f" - Detection model directory NOT FOUND or path is invalid based on script access: {det_model_dir_cache}")
|
77 |
+
print(f" IMPORTANT: Please check the initial PaddleOCR debug logs (the long block of text when PaddleOCR starts).")
|
78 |
+
print(f" Look for the line starting with 'det_model_dir=' and use THAT PATH to find the folder to copy manually.")
|
79 |
+
|
80 |
+
|
81 |
+
# Recognition model
|
82 |
+
if rec_model_dir_cache and os.path.exists(rec_model_dir_cache):
|
83 |
+
rec_target_name = os.path.basename(os.path.normpath(rec_model_dir_cache))
|
84 |
+
print(f" - Recognition Model Folder to Copy: '{rec_target_name}'")
|
85 |
+
print(f" (Full path of source: {rec_model_dir_cache})")
|
86 |
+
print(f" (Target location: {os.path.join(project_model_dir_target, rec_target_name)})")
|
87 |
+
|
88 |
+
if rec_char_dict_path_from_args and os.path.exists(rec_char_dict_path_from_args):
|
89 |
+
print(f" (Dictionary file used by PaddleOCR: '{os.path.basename(rec_char_dict_path_from_args)}' found at {rec_char_dict_path_from_args})")
|
90 |
+
print(f" (Ensure a similar .txt dictionary file, like '{os.path.basename(rec_char_dict_path_from_args)}', is inside the '{rec_target_name}' folder you copy)")
|
91 |
+
else:
|
92 |
+
found_dicts = [f for f in os.listdir(rec_model_dir_cache) if f.endswith('.txt')]
|
93 |
+
if found_dicts:
|
94 |
+
print(f" (Ensure dictionary file like '{found_dicts[0]}' is inside the '{rec_target_name}' folder you copy)")
|
95 |
+
else:
|
96 |
+
print(f" WARNING: Dictionary file (e.g., '{LANGUAGE_TO_DOWNLOAD}_dict.txt') NOT FOUND in {rec_model_dir_cache}")
|
97 |
+
else:
|
98 |
+
print(f" - Recognition model directory NOT FOUND or path is invalid based on script access: {rec_model_dir_cache}")
|
99 |
+
print(f" IMPORTANT: Please check the initial PaddleOCR debug logs.")
|
100 |
+
print(f" Look for the line starting with 'rec_model_dir=' and use THAT PATH to find the folder to copy manually.")
|
101 |
+
|
102 |
+
|
103 |
+
# Classification model (optional)
|
104 |
+
if cls_model_dir_cache and os.path.exists(cls_model_dir_cache):
|
105 |
+
cls_target_name = os.path.basename(os.path.normpath(cls_model_dir_cache))
|
106 |
+
print(f" - Classification Model Folder to Copy (Optional): '{cls_target_name}'")
|
107 |
+
print(f" (Full path of source: {cls_model_dir_cache})")
|
108 |
+
print(f" (Target location: {os.path.join(project_model_dir_target, cls_target_name)})")
|
109 |
+
|
110 |
+
elif hasattr(args, 'use_angle_cls') and args.use_angle_cls:
|
111 |
+
print(f" - Classification model directory NOT FOUND or path is invalid based on script access: {cls_model_dir_cache}")
|
112 |
+
print(f" IMPORTANT: Please check the initial PaddleOCR debug logs.")
|
113 |
+
print(f" Look for the line starting with 'cls_model_dir=' and use THAT PATH to find the folder to copy manually if needed.")
|
114 |
+
|
115 |
+
|
116 |
+
print("\n3. After copying, your 'paddleocr_models' directory in your project should contain these model subfolders.")
|
117 |
+
print("4. Verify paths in your main `app.py` match these folder names.")
|
118 |
+
print(" For example, if your log showed 'en_PP-OCRv3_det_infer' for detection, app.py should use that name.")
|
119 |
+
|
120 |
+
except AttributeError as ae:
|
121 |
+
print(f"An AttributeError occurred during script execution (not PaddleOCR init): {ae}")
|
122 |
+
print("This might indicate an unexpected structure in the PaddleOCR object or its arguments when accessed by the script.")
|
123 |
+
print("Please carefully review the FULL initial debug output from PaddleOCR when it initializes.")
|
124 |
+
print("The lines starting with 'det_model_dir=', 'rec_model_dir=', 'cls_model_dir=' are key.")
|
125 |
+
print("You can use those paths directly to find and copy the model folders manually.")
|
126 |
+
except Exception as e:
|
127 |
+
print(f"An unexpected error occurred: {e}")
|
128 |
+
print("Please ensure PaddleOCR and PaddlePaddle are installed correctly.")
|
129 |
+
|
latin.ttf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7da195a74c55bef988d0d48f9508bd5d849425c1770dba5d7bfc6ce9ed848954
|
3 |
+
size 757076
|
paddleocr_models/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1efda1b80e174b4fcb168a035ac96c1af4938892bd86a55f300a6027105d08c
|
3 |
+
size 539978
|
paddleocr_models/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams.info
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee0c2eaa4c09814802bf520c01dfdbc1345dc2879dc9e67424d32c7b0ee88e59
|
3 |
+
size 18545
|
paddleocr_models/ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c4337ec61722a20b1dca2e5bfaffc313c0592bc89ad6e0d45168224186f6683
|
3 |
+
size 1624487
|
paddleocr_models/en_PP-OCRv3_det_infer/inference.pdiparams
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83676ec730627ab4502f401410a4b6a3ce1c0bb98fa249b71db055b6bddae051
|
3 |
+
size 2377917
|
paddleocr_models/en_PP-OCRv3_det_infer/inference.pdiparams.info
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fe414d9eadf914bf44e3f9ba212988a6f26f364e4f87c6d0af57438ffffc0c4
|
3 |
+
size 26392
|
paddleocr_models/en_PP-OCRv3_det_infer/inference.pdmodel
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4bfb1b05d9d1d5a760801eaf6d20180ef7e47bcc675fb17d1f3a89da5fef427
|
3 |
+
size 1590133
|
paddleocr_models/en_PP-OCRv4_rec_infer/en_dict.txt
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0
|
2 |
+
1
|
3 |
+
2
|
4 |
+
3
|
5 |
+
4
|
6 |
+
5
|
7 |
+
6
|
8 |
+
7
|
9 |
+
8
|
10 |
+
9
|
11 |
+
:
|
12 |
+
;
|
13 |
+
<
|
14 |
+
=
|
15 |
+
>
|
16 |
+
?
|
17 |
+
@
|
18 |
+
A
|
19 |
+
B
|
20 |
+
C
|
21 |
+
D
|
22 |
+
E
|
23 |
+
F
|
24 |
+
G
|
25 |
+
H
|
26 |
+
I
|
27 |
+
J
|
28 |
+
K
|
29 |
+
L
|
30 |
+
M
|
31 |
+
N
|
32 |
+
O
|
33 |
+
P
|
34 |
+
Q
|
35 |
+
R
|
36 |
+
S
|
37 |
+
T
|
38 |
+
U
|
39 |
+
V
|
40 |
+
W
|
41 |
+
X
|
42 |
+
Y
|
43 |
+
Z
|
44 |
+
[
|
45 |
+
\
|
46 |
+
]
|
47 |
+
^
|
48 |
+
_
|
49 |
+
`
|
50 |
+
a
|
51 |
+
b
|
52 |
+
c
|
53 |
+
d
|
54 |
+
e
|
55 |
+
f
|
56 |
+
g
|
57 |
+
h
|
58 |
+
i
|
59 |
+
j
|
60 |
+
k
|
61 |
+
l
|
62 |
+
m
|
63 |
+
n
|
64 |
+
o
|
65 |
+
p
|
66 |
+
q
|
67 |
+
r
|
68 |
+
s
|
69 |
+
t
|
70 |
+
u
|
71 |
+
v
|
72 |
+
w
|
73 |
+
x
|
74 |
+
y
|
75 |
+
z
|
76 |
+
{
|
77 |
+
|
|
78 |
+
}
|
79 |
+
~
|
80 |
+
!
|
81 |
+
"
|
82 |
+
#
|
83 |
+
$
|
84 |
+
%
|
85 |
+
&
|
86 |
+
'
|
87 |
+
(
|
88 |
+
)
|
89 |
+
*
|
90 |
+
+
|
91 |
+
,
|
92 |
+
-
|
93 |
+
.
|
94 |
+
/
|
95 |
+
|
paddleocr_models/en_PP-OCRv4_rec_infer/inference.pdiparams
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75f64a1ffb70c56b7a25655963ca16f5bf3286202e3f52ac972bee05cdee2f56
|
3 |
+
size 7607269
|
paddleocr_models/en_PP-OCRv4_rec_infer/inference.pdiparams.info
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:710d9f3d7c503067ae708f4bc3adc6973c0d6391adb7a1470c36eb70da6b5b83
|
3 |
+
size 102540
|
paddleocr_models/en_PP-OCRv4_rec_infer/inference.pdmodel
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85b952f05f709af259cfe4254012aa7208bef0998f71f57a15495446f25ccd43
|
3 |
+
size 2517366
|
render.yaml
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
services:
|
2 |
-
- type: web
|
3 |
-
name: ocr-app
|
4 |
-
env: python
|
5 |
-
plan: free
|
6 |
-
buildCommand: pip install -r requirements.txt
|
7 |
-
startCommand: python app.py
|
8 |
-
envVars:
|
9 |
-
- key: PORT
|
10 |
-
value: 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,5 +1,15 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
5 |
opencv-python-headless>=4.8.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core OCR and ML Framework
|
2 |
+
paddleocr>=2.7.0 # Or your preferred version
|
3 |
+
paddlepaddle>=2.6.0 # Or your preferred version, ensure compatibility with paddleocr
|
4 |
+
|
5 |
+
# Image Processing
|
6 |
opencv-python-headless>=4.8.0
|
7 |
+
Pillow>=9.0.0
|
8 |
+
|
9 |
+
# Web Application Interface
|
10 |
+
gradio>=4.0.0 # Using Gradio instead of Flask/Gunicorn for Hugging Face Spaces
|
11 |
+
|
12 |
+
# Utilities
|
13 |
+
numpy>=1.20.0
|
14 |
+
|
15 |
+
# Add any other specific utility libraries you might need
|
run_ocr.py
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
from paddleocr import PaddleOCR
|
2 |
-
|
3 |
-
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
|
4 |
-
img_path = r"C:\Users\KRUNAL\OneDrive\Pictures\Camera imports\2024-08-22 (2)\1000015730.jpg"
|
5 |
-
result = ocr.ocr(img_path, cls=True)
|
6 |
-
|
7 |
-
print(result)
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/style.css
DELETED
@@ -1,179 +0,0 @@
|
|
1 |
-
/* body {
|
2 |
-
font-family: 'Poppins', sans-serif;
|
3 |
-
background: linear-gradient(135deg, #74ebd5, #ACB6E5);
|
4 |
-
margin: 0;
|
5 |
-
padding: 0;
|
6 |
-
min-height: 100vh;
|
7 |
-
display: flex;
|
8 |
-
justify-content: center;
|
9 |
-
align-items: center;
|
10 |
-
}
|
11 |
-
|
12 |
-
.container {
|
13 |
-
background: #ffffff;
|
14 |
-
padding: 40px;
|
15 |
-
border-radius: 20px;
|
16 |
-
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
|
17 |
-
max-width: 700px;
|
18 |
-
width: 90%;
|
19 |
-
animation: fadeIn 1s ease-in;
|
20 |
-
}
|
21 |
-
|
22 |
-
h1 {
|
23 |
-
margin-bottom: 20px;
|
24 |
-
color: #333;
|
25 |
-
font-weight: 600;
|
26 |
-
}
|
27 |
-
|
28 |
-
.upload-form {
|
29 |
-
display: flex;
|
30 |
-
flex-direction: column;
|
31 |
-
gap: 20px;
|
32 |
-
margin-bottom: 30px;
|
33 |
-
}
|
34 |
-
|
35 |
-
input[type="file"] {
|
36 |
-
padding: 10px;
|
37 |
-
background: #f5f5f5;
|
38 |
-
border: 2px dashed #ccc;
|
39 |
-
border-radius: 10px;
|
40 |
-
}
|
41 |
-
|
42 |
-
button {
|
43 |
-
padding: 12px;
|
44 |
-
background: #007bff;
|
45 |
-
color: white;
|
46 |
-
border: none;
|
47 |
-
border-radius: 10px;
|
48 |
-
font-size: 16px;
|
49 |
-
cursor: pointer;
|
50 |
-
transition: background 0.3s;
|
51 |
-
}
|
52 |
-
|
53 |
-
button:hover {
|
54 |
-
background: #0056b3;
|
55 |
-
}
|
56 |
-
|
57 |
-
.preview img {
|
58 |
-
max-width: 100%;
|
59 |
-
max-height: 400px;
|
60 |
-
border-radius: 10px;
|
61 |
-
margin-top: 20px;
|
62 |
-
}
|
63 |
-
|
64 |
-
.output {
|
65 |
-
margin-top: 30px;
|
66 |
-
background: #f9f9f9;
|
67 |
-
padding: 20px;
|
68 |
-
border-radius: 10px;
|
69 |
-
word-break: break-word;
|
70 |
-
}
|
71 |
-
|
72 |
-
.error {
|
73 |
-
margin-top: 20px;
|
74 |
-
color: red;
|
75 |
-
font-weight: bold;
|
76 |
-
}
|
77 |
-
|
78 |
-
@keyframes fadeIn {
|
79 |
-
from { opacity: 0; transform: translateY(30px);}
|
80 |
-
to { opacity: 1; transform: translateY(0);}
|
81 |
-
}
|
82 |
-
|
83 |
-
#result-img {
|
84 |
-
max-width: 100%;
|
85 |
-
height: auto;
|
86 |
-
margin-top: 10px;
|
87 |
-
border: 1px solid #ddd;
|
88 |
-
border-radius: 8px;
|
89 |
-
padding: 4px;
|
90 |
-
}
|
91 |
-
#extracted-text {
|
92 |
-
text-align: left;
|
93 |
-
background: #fff;
|
94 |
-
padding: 15px;
|
95 |
-
border-radius: 8px;
|
96 |
-
border: 1px solid #ddd;
|
97 |
-
}
|
98 |
-
#spinner {
|
99 |
-
|
100 |
-
position: fixed;
|
101 |
-
top: 50%;
|
102 |
-
left: 50%;
|
103 |
-
transform: translate(-50%, -50%);
|
104 |
-
z-index: 1000;
|
105 |
-
} */
|
106 |
-
/* static/style.css */
|
107 |
-
/* static/style.css */
|
108 |
-
body {
|
109 |
-
background: #f0f2f5;
|
110 |
-
font-family: 'Segoe UI', Tahoma, sans-serif;
|
111 |
-
color: #333;
|
112 |
-
margin: 0;
|
113 |
-
padding: 0;
|
114 |
-
}
|
115 |
-
.container {
|
116 |
-
max-width: 600px;
|
117 |
-
margin: 40px auto;
|
118 |
-
background: #fff;
|
119 |
-
border-radius: 8px;
|
120 |
-
padding: 20px;
|
121 |
-
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
122 |
-
}
|
123 |
-
h1, h2 {
|
124 |
-
color: #444;
|
125 |
-
margin-bottom: 10px;
|
126 |
-
}
|
127 |
-
p {
|
128 |
-
color: #666;
|
129 |
-
}
|
130 |
-
form {
|
131 |
-
margin-top: 20px;
|
132 |
-
display: flex;
|
133 |
-
gap: 10px;
|
134 |
-
}
|
135 |
-
input[type="file"] {
|
136 |
-
flex: 1;
|
137 |
-
padding: 8px;
|
138 |
-
border: 1px solid #ccc;
|
139 |
-
border-radius: 4px;
|
140 |
-
}
|
141 |
-
button {
|
142 |
-
background-color: #007BFF;
|
143 |
-
color: white;
|
144 |
-
border: none;
|
145 |
-
padding: 8px 16px;
|
146 |
-
border-radius: 4px;
|
147 |
-
cursor: pointer;
|
148 |
-
}
|
149 |
-
button:hover {
|
150 |
-
background-color: #0056b3;
|
151 |
-
}
|
152 |
-
.result, .image-preview {
|
153 |
-
margin-top: 20px;
|
154 |
-
padding: 10px;
|
155 |
-
border-top: 1px solid #e1e1e1;
|
156 |
-
}
|
157 |
-
.result pre {
|
158 |
-
background: #f8f9fa;
|
159 |
-
padding: 10px;
|
160 |
-
border-radius: 4px;
|
161 |
-
white-space: pre-wrap;
|
162 |
-
}
|
163 |
-
.flashes {
|
164 |
-
list-style: none;
|
165 |
-
padding: 10px;
|
166 |
-
background: #ffe0e0;
|
167 |
-
border: 1px solid #ffb3b3;
|
168 |
-
border-radius: 4px;
|
169 |
-
color: #a94442;
|
170 |
-
}
|
171 |
-
.flashes li {
|
172 |
-
margin: 5px 0;
|
173 |
-
}
|
174 |
-
img {
|
175 |
-
max-width: 100%;
|
176 |
-
height: auto;
|
177 |
-
border-radius: 4px;
|
178 |
-
}
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
templates/index.html
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
<!-- templates/index.html -->
|
2 |
-
<!DOCTYPE html>
|
3 |
-
<html>
|
4 |
-
<head>
|
5 |
-
<title>OCR App</title>
|
6 |
-
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
7 |
-
</head>
|
8 |
-
<body>
|
9 |
-
<div class="container">
|
10 |
-
<h1>Image Text Extraction</h1>
|
11 |
-
<p>Upload an image to extract text using PaddleOCR.</p>
|
12 |
-
|
13 |
-
{% with messages = get_flashed_messages() %}
|
14 |
-
{% if messages %}
|
15 |
-
<ul class="flashes">
|
16 |
-
{% for message in messages %}
|
17 |
-
<li>{{ message }}</li>
|
18 |
-
{% endfor %}
|
19 |
-
</ul>
|
20 |
-
{% endif %}
|
21 |
-
{% endwith %}
|
22 |
-
|
23 |
-
<form method="POST" enctype="multipart/form-data">
|
24 |
-
<input type="file" name="image" accept="image/*" required>
|
25 |
-
<button type="submit">Extract Text</button>
|
26 |
-
</form>
|
27 |
-
|
28 |
-
{% if extracted_text %}
|
29 |
-
<div class="result">
|
30 |
-
<h2>Extracted Text:</h2>
|
31 |
-
<pre>{{ extracted_text }}</pre>
|
32 |
-
</div>
|
33 |
-
{% endif %}
|
34 |
-
|
35 |
-
{% if image_file %}
|
36 |
-
<div class="image-preview">
|
37 |
-
<h2>Uploaded Image:</h2>
|
38 |
-
<img src="{{ url_for('static', filename='uploads/' + image_file) }}" alt="Uploaded Image">
|
39 |
-
</div>
|
40 |
-
{% endif %}
|
41 |
-
</div>
|
42 |
-
</body>
|
43 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|