ocr for image
Browse files- app.py +69 -0
- requirements.txt +20 -0
app.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import mimetypes
|
4 |
+
|
5 |
+
API_URL = "http://209.20.158.215:7862/v1/visual_query/?src_lang=eng_Latn&tgt_lang=kan_Knda"
|
6 |
+
|
7 |
+
def ocr_from_paths(file_paths, query):
|
8 |
+
results = []
|
9 |
+
if isinstance(file_paths, str):
|
10 |
+
file_paths = [file_paths]
|
11 |
+
|
12 |
+
for path in file_paths:
|
13 |
+
filename = path.split("/")[-1]
|
14 |
+
mime_type, _ = mimetypes.guess_type(path)
|
15 |
+
if not mime_type:
|
16 |
+
results.append(f"β {filename}: Unsupported file type")
|
17 |
+
continue
|
18 |
+
|
19 |
+
with open(path, "rb") as f:
|
20 |
+
files_param = {"file": (filename, f, mime_type)}
|
21 |
+
data_param = {"query": query or "describe the image"} # default query if empty
|
22 |
+
|
23 |
+
try:
|
24 |
+
response = requests.post(
|
25 |
+
API_URL,
|
26 |
+
files=files_param,
|
27 |
+
data=data_param,
|
28 |
+
headers={"accept": "application/json"} # do NOT set Content-Type here
|
29 |
+
)
|
30 |
+
if response.status_code == 200:
|
31 |
+
resp_json = response.json()
|
32 |
+
extracted_text = resp_json.get("result") or resp_json.get("text") or str(resp_json)
|
33 |
+
results.append(f"β
{filename}:\n{extracted_text}")
|
34 |
+
else:
|
35 |
+
results.append(f"β {filename}: API Error ({response.status_code})")
|
36 |
+
except Exception as e:
|
37 |
+
results.append(f"β {filename}: {str(e)}")
|
38 |
+
|
39 |
+
return "\n\n".join(results)
|
40 |
+
|
41 |
+
with gr.Blocks() as demo:
|
42 |
+
gr.Markdown("## Browse & OCR Extract PDFs/Images (Batch)")
|
43 |
+
with gr.Row():
|
44 |
+
file_input = gr.File(
|
45 |
+
label="Upload Files",
|
46 |
+
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".webp"],
|
47 |
+
file_count="multiple"
|
48 |
+
)
|
49 |
+
query_input = gr.Textbox(
|
50 |
+
label="Query (optional)",
|
51 |
+
placeholder="Enter a query string for the API"
|
52 |
+
)
|
53 |
+
|
54 |
+
output_text = gr.Textbox(
|
55 |
+
label="Extracted Text Results",
|
56 |
+
interactive=False,
|
57 |
+
lines=15,
|
58 |
+
placeholder="OCR results will appear here..."
|
59 |
+
)
|
60 |
+
|
61 |
+
submit_btn = gr.Button("Extract Text")
|
62 |
+
submit_btn.click(
|
63 |
+
ocr_from_paths,
|
64 |
+
inputs=[file_input, query_input],
|
65 |
+
outputs=output_text
|
66 |
+
)
|
67 |
+
|
68 |
+
if __name__ == "__main__":
|
69 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cached-path
|
2 |
+
smart_open
|
3 |
+
pypdf
|
4 |
+
pypdfium2
|
5 |
+
cryptography
|
6 |
+
lingua-language-detector
|
7 |
+
Pillow
|
8 |
+
ftfy
|
9 |
+
bleach
|
10 |
+
markdown2
|
11 |
+
filelock
|
12 |
+
orjson
|
13 |
+
requests
|
14 |
+
zstandard
|
15 |
+
boto3
|
16 |
+
httpx
|
17 |
+
torch
|
18 |
+
transformers
|
19 |
+
img2pdf
|
20 |
+
beaker-py
|