Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
beca8ab
1
Parent(s):
c7a30f7
parse olmo output
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from PIL import Image
|
|
3 |
import xml.etree.ElementTree as ET
|
4 |
import os
|
5 |
import torch
|
|
|
6 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline, Qwen2VLForConditionalGeneration
|
7 |
import spaces
|
8 |
|
@@ -259,6 +260,14 @@ def run_hf_ocr(image_path, model_name="RolmOCR"):
|
|
259 |
generated_content = ocr_results[0]["generated_text"]
|
260 |
|
261 |
if isinstance(generated_content, str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
return generated_content
|
263 |
|
264 |
if isinstance(generated_content, list) and generated_content:
|
@@ -272,6 +281,14 @@ def run_hf_ocr(image_path, model_name="RolmOCR"):
|
|
272 |
),
|
273 |
None,
|
274 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
return assistant_message
|
276 |
|
277 |
# Fallback if the specific assistant message structure isn't found but there's content
|
@@ -284,11 +301,27 @@ def run_hf_ocr(image_path, model_name="RolmOCR"):
|
|
284 |
and isinstance(generated_content[1], dict)
|
285 |
and "content" in generated_content[1]
|
286 |
):
|
287 |
-
|
288 |
-
|
289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
else:
|
291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
print(f"Unexpected OCR output structure from HF model: {ocr_results}")
|
294 |
return "Error: Could not parse OCR model output. Check console."
|
|
|
3 |
import xml.etree.ElementTree as ET
|
4 |
import os
|
5 |
import torch
|
6 |
+
import json
|
7 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline, Qwen2VLForConditionalGeneration
|
8 |
import spaces
|
9 |
|
|
|
260 |
generated_content = ocr_results[0]["generated_text"]
|
261 |
|
262 |
if isinstance(generated_content, str):
|
263 |
+
# Check if it's JSON format from olmOCR
|
264 |
+
if model_name == "olmOCR":
|
265 |
+
try:
|
266 |
+
json_data = json.loads(generated_content)
|
267 |
+
if "natural_text" in json_data:
|
268 |
+
return json_data["natural_text"]
|
269 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
270 |
+
pass
|
271 |
return generated_content
|
272 |
|
273 |
if isinstance(generated_content, list) and generated_content:
|
|
|
281 |
),
|
282 |
None,
|
283 |
):
|
284 |
+
# Check if it's JSON format from olmOCR
|
285 |
+
if model_name == "olmOCR":
|
286 |
+
try:
|
287 |
+
json_data = json.loads(assistant_message)
|
288 |
+
if "natural_text" in json_data:
|
289 |
+
return json_data["natural_text"]
|
290 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
291 |
+
pass
|
292 |
return assistant_message
|
293 |
|
294 |
# Fallback if the specific assistant message structure isn't found but there's content
|
|
|
301 |
and isinstance(generated_content[1], dict)
|
302 |
and "content" in generated_content[1]
|
303 |
):
|
304 |
+
content = generated_content[1]["content"]
|
305 |
+
# Check if it's JSON format from olmOCR
|
306 |
+
if model_name == "olmOCR":
|
307 |
+
try:
|
308 |
+
json_data = json.loads(content)
|
309 |
+
if "natural_text" in json_data:
|
310 |
+
return json_data["natural_text"]
|
311 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
312 |
+
pass
|
313 |
+
return content # Assuming second part is assistant
|
314 |
else:
|
315 |
+
content = generated_content[0]["content"]
|
316 |
+
# Check if it's JSON format from olmOCR
|
317 |
+
if model_name == "olmOCR":
|
318 |
+
try:
|
319 |
+
json_data = json.loads(content)
|
320 |
+
if "natural_text" in json_data:
|
321 |
+
return json_data["natural_text"]
|
322 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
323 |
+
pass
|
324 |
+
return content
|
325 |
|
326 |
print(f"Unexpected OCR output structure from HF model: {ocr_results}")
|
327 |
return "Error: Could not parse OCR model output. Check console."
|