Spaces:
Sleeping
Sleeping
change routes.py
Browse files- Dockerfile +2 -2
- app/routes.py +11 -5
- requirements.txt +0 -1
Dockerfile
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
FROM python:3.11-slim
|
2 |
|
3 |
# install system depsβ¦
|
@@ -24,8 +25,7 @@ COPY . .
|
|
24 |
ENV PORT=7860 \
|
25 |
XDG_CACHE_HOME=/data/.cache \
|
26 |
TRANSFORMERS_CACHE=/data/.cache/huggingface \
|
27 |
-
HF_HOME=/data/.cache/huggingface
|
28 |
-
EASYOCR_MODEL_STORAGE=/data/.EasyOCR
|
29 |
|
30 |
EXPOSE ${PORT}
|
31 |
|
|
|
1 |
+
# syntax=docker/dockerfile:1.4
|
2 |
FROM python:3.11-slim
|
3 |
|
4 |
# install system depsβ¦
|
|
|
25 |
ENV PORT=7860 \
|
26 |
XDG_CACHE_HOME=/data/.cache \
|
27 |
TRANSFORMERS_CACHE=/data/.cache/huggingface \
|
28 |
+
HF_HOME=/data/.cache/huggingface
|
|
|
29 |
|
30 |
EXPOSE ${PORT}
|
31 |
|
app/routes.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
from flask import Blueprint, request, jsonify
|
2 |
from werkzeug.utils import secure_filename
|
3 |
import os
|
4 |
-
import easyocr
|
5 |
import pytesseract # Ensure this is imported
|
6 |
import base64
|
7 |
from huggingface_hub import InferenceApi
|
@@ -20,8 +19,6 @@ from werkzeug.utils import secure_filename
|
|
20 |
# Initialize Flask Blueprint
|
21 |
bp = Blueprint('main', __name__)
|
22 |
|
23 |
-
model_dir = os.getenv('EASYOCR_MODEL_STORAGE', None)
|
24 |
-
|
25 |
# ββ OCR via HF Inference API βββββββββββββββββββββββββββββββββββββββββββββββββ
|
26 |
# We're using Microsoft's TrOCR for printed text:
|
27 |
|
@@ -121,11 +118,20 @@ def analyze_image():
|
|
121 |
# )
|
122 |
img = Image.open(path).convert("RGB")
|
123 |
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
print(extracted)
|
127 |
|
|
|
128 |
analysis = analyze_text_internal(extracted)
|
|
|
129 |
tags = generate_tags(extracted)
|
130 |
return jsonify({
|
131 |
"extracted_text": extracted,
|
|
|
1 |
from flask import Blueprint, request, jsonify
|
2 |
from werkzeug.utils import secure_filename
|
3 |
import os
|
|
|
4 |
import pytesseract # Ensure this is imported
|
5 |
import base64
|
6 |
from huggingface_hub import InferenceApi
|
|
|
19 |
# Initialize Flask Blueprint
|
20 |
bp = Blueprint('main', __name__)
|
21 |
|
|
|
|
|
22 |
# ββ OCR via HF Inference API βββββββββββββββββββββββββββββββββββββββββββββββββ
|
23 |
# We're using Microsoft's TrOCR for printed text:
|
24 |
|
|
|
118 |
# )
|
119 |
img = Image.open(path).convert("RGB")
|
120 |
|
121 |
+
# run OCR pipeline, which returns a list of dicts
|
122 |
+
ocr_results = ocr_pipe(img)
|
123 |
+
# extract the generated text from the first result
|
124 |
+
extracted = ""
|
125 |
+
if isinstance(ocr_results, list) and len(ocr_results) > 0 and "generated_text" in ocr_results[0]:
|
126 |
+
extracted = ocr_results[0]["generated_text"].strip()
|
127 |
+
else:
|
128 |
+
extracted = str(ocr_results)
|
129 |
|
130 |
+
print("OCR extracted text:", extracted)
|
131 |
|
132 |
+
# now analyze the extracted string
|
133 |
analysis = analyze_text_internal(extracted)
|
134 |
+
|
135 |
tags = generate_tags(extracted)
|
136 |
return jsonify({
|
137 |
"extracted_text": extracted,
|
requirements.txt
CHANGED
@@ -32,7 +32,6 @@ openai-whisper==20231106 # pins triton 2.0.0 (now satisfied)
|
|
32 |
|
33 |
###############################################################################
|
34 |
# 6. OCR / CV
|
35 |
-
easyocr==1.7.2
|
36 |
opencv-python-headless==4.11.0.86
|
37 |
pytesseract==0.3.13
|
38 |
scikit-image==0.25.1
|
|
|
32 |
|
33 |
###############################################################################
|
34 |
# 6. OCR / CV
|
|
|
35 |
opencv-python-headless==4.11.0.86
|
36 |
pytesseract==0.3.13
|
37 |
scikit-image==0.25.1
|