Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
-
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
2 |
-
import easyocr
|
3 |
-
from PIL import Image, ImageDraw
|
4 |
-
import numpy as np
|
5 |
import gradio as gr
|
|
|
6 |
import requests
|
7 |
from io import BytesIO
|
|
|
8 |
import json
|
|
|
|
|
9 |
|
10 |
# TrOCR model for recognition
|
11 |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
@@ -27,16 +27,14 @@ def detect_text_trocr_json(image_file, image_url):
|
|
27 |
if image is None:
|
28 |
return None, "No image provided.", None
|
29 |
|
30 |
-
# Step 1: Detect bounding boxes with EasyOCR
|
31 |
results = reader.readtext(np.array(image))
|
32 |
-
|
33 |
draw = ImageDraw.Draw(image)
|
34 |
words_json = []
|
35 |
-
paragraph_json = []
|
36 |
|
37 |
for bbox, _, conf in results:
|
38 |
-
|
39 |
-
|
|
|
40 |
x_min, y_min = min(x_coords), min(y_coords)
|
41 |
x_max, y_max = max(x_coords), max(y_coords)
|
42 |
|
@@ -54,14 +52,16 @@ def detect_text_trocr_json(image_file, image_url):
|
|
54 |
"confidence": float(conf)
|
55 |
})
|
56 |
|
57 |
-
|
|
|
58 |
|
59 |
output_json = {
|
60 |
"words": words_json,
|
61 |
-
"paragraphs":
|
62 |
}
|
63 |
|
64 |
-
|
|
|
65 |
|
66 |
iface = gr.Interface(
|
67 |
fn=detect_text_trocr_json,
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from PIL import Image, ImageDraw
|
3 |
import requests
|
4 |
from io import BytesIO
|
5 |
+
import numpy as np
|
6 |
import json
|
7 |
+
import easyocr
|
8 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
9 |
|
10 |
# TrOCR model for recognition
|
11 |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
|
|
27 |
if image is None:
|
28 |
return None, "No image provided.", None
|
29 |
|
|
|
30 |
results = reader.readtext(np.array(image))
|
|
|
31 |
draw = ImageDraw.Draw(image)
|
32 |
words_json = []
|
|
|
33 |
|
34 |
for bbox, _, conf in results:
|
35 |
+
# Convert coordinates to float
|
36 |
+
x_coords = [float(point[0]) for point in bbox]
|
37 |
+
y_coords = [float(point[1]) for point in bbox]
|
38 |
x_min, y_min = min(x_coords), min(y_coords)
|
39 |
x_max, y_max = max(x_coords), max(y_coords)
|
40 |
|
|
|
52 |
"confidence": float(conf)
|
53 |
})
|
54 |
|
55 |
+
# For simplicity, treat words as paragraphs
|
56 |
+
paragraphs_json = words_json.copy()
|
57 |
|
58 |
output_json = {
|
59 |
"words": words_json,
|
60 |
+
"paragraphs": paragraphs_json
|
61 |
}
|
62 |
|
63 |
+
json_str = json.dumps(output_json, indent=2) # now serializable
|
64 |
+
return image, json_str, json_str
|
65 |
|
66 |
iface = gr.Interface(
|
67 |
fn=detect_text_trocr_json,
|