Spaces:
Sleeping
Sleeping
Meet Patel
commited on
Commit
·
2dc3c19
1
Parent(s):
0fae407
Refactor response handling in app.py and various tool modules to improve JSON parsing and error management. Introduce utility functions for cleaning and extracting JSON from text, ensuring consistent data handling across quiz, lesson, learning path, and interaction tools. Enhance robustness by accommodating multiple response formats, including strings and dictionaries, to streamline educational interactions.
Browse files- app.py +80 -12
- mcp_server/tools/concept_tools.py +17 -10
- mcp_server/tools/interaction_tools.py +15 -1
- mcp_server/tools/learning_path_tools.py +16 -10
- mcp_server/tools/lesson_tools.py +17 -1
- mcp_server/tools/ocr_tools.py +17 -1
app.py
CHANGED
@@ -420,7 +420,6 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
|
|
420 |
async with ClientSession(sse, write) as session:
|
421 |
await session.initialize()
|
422 |
response = await session.call_tool("generate_quiz_tool", {"concept": concept.strip(), "difficulty": difficulty_str})
|
423 |
-
# --- PATCH: Parse quiz JSON for pretty display ---
|
424 |
if hasattr(response, 'content') and isinstance(response.content, list):
|
425 |
for item in response.content:
|
426 |
if hasattr(item, 'text') and item.text:
|
@@ -468,7 +467,22 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
|
|
468 |
async with ClientSession(sse, write) as session:
|
469 |
await session.initialize()
|
470 |
response = await session.call_tool("generate_lesson_tool", {"topic": topic, "grade_level": grade, "duration_minutes": duration})
|
471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
|
473 |
gen_lesson_btn.click(
|
474 |
fn=generate_lesson_async,
|
@@ -495,7 +509,22 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
|
|
495 |
"concept_ids": [c.strip() for c in concept_ids.split(",") if c.strip()],
|
496 |
"student_level": student_level
|
497 |
})
|
498 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
499 |
except Exception as e:
|
500 |
return {"error": str(e)}
|
501 |
lp_btn.click(
|
@@ -520,7 +549,22 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
|
|
520 |
async with ClientSession(sse, write) as session:
|
521 |
await session.initialize()
|
522 |
response = await session.call_tool("text_interaction", {"query": text, "student_id": student_id})
|
523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
|
525 |
text_btn.click(
|
526 |
fn=text_interaction_async,
|
@@ -558,23 +602,32 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
|
|
558 |
file_path = file
|
559 |
if not file_path or not os.path.exists(file_path):
|
560 |
return {"error": "File not found", "success": False}
|
561 |
-
|
562 |
-
# Upload file to storage API
|
563 |
upload_result = await upload_file_to_storage(file_path)
|
564 |
if not upload_result.get("success"):
|
565 |
return upload_result
|
566 |
-
|
567 |
-
# Get the storage URL from the upload response
|
568 |
storage_url = upload_result.get("storage_url")
|
569 |
if not storage_url:
|
570 |
return {"error": "No storage URL returned from upload", "success": False}
|
571 |
-
|
572 |
-
# Use the storage URL for OCR processing
|
573 |
async with sse_client(SERVER_URL) as (sse, write):
|
574 |
async with ClientSession(sse, write) as session:
|
575 |
await session.initialize()
|
576 |
response = await session.call_tool("mistral_document_ocr", {"document_url": storage_url})
|
577 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
except Exception as e:
|
579 |
return {"error": f"Error processing document: {str(e)}", "success": False}
|
580 |
doc_ocr_btn.click(
|
@@ -609,7 +662,22 @@ with gr.Blocks(title="TutorX Educational AI", theme=gr.themes.Soft()) as demo:
|
|
609 |
async with ClientSession(sse, write) as session:
|
610 |
await session.initialize()
|
611 |
response = await session.call_tool("check_submission_originality", {"submission": submission, "reference_sources": [reference] if isinstance(reference, str) else reference})
|
612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
|
614 |
plagiarism_btn.click(
|
615 |
fn=check_plagiarism_async,
|
|
|
420 |
async with ClientSession(sse, write) as session:
|
421 |
await session.initialize()
|
422 |
response = await session.call_tool("generate_quiz_tool", {"concept": concept.strip(), "difficulty": difficulty_str})
|
|
|
423 |
if hasattr(response, 'content') and isinstance(response.content, list):
|
424 |
for item in response.content:
|
425 |
if hasattr(item, 'text') and item.text:
|
|
|
467 |
async with ClientSession(sse, write) as session:
|
468 |
await session.initialize()
|
469 |
response = await session.call_tool("generate_lesson_tool", {"topic": topic, "grade_level": grade, "duration_minutes": duration})
|
470 |
+
if hasattr(response, 'content') and isinstance(response.content, list):
|
471 |
+
for item in response.content:
|
472 |
+
if hasattr(item, 'text') and item.text:
|
473 |
+
try:
|
474 |
+
lesson_data = json.loads(item.text)
|
475 |
+
return lesson_data
|
476 |
+
except Exception:
|
477 |
+
return {"raw": item.text}
|
478 |
+
if isinstance(response, dict):
|
479 |
+
return response
|
480 |
+
if isinstance(response, str):
|
481 |
+
try:
|
482 |
+
return json.loads(response)
|
483 |
+
except Exception:
|
484 |
+
return {"raw": response}
|
485 |
+
return {"raw": str(response)}
|
486 |
|
487 |
gen_lesson_btn.click(
|
488 |
fn=generate_lesson_async,
|
|
|
509 |
"concept_ids": [c.strip() for c in concept_ids.split(",") if c.strip()],
|
510 |
"student_level": student_level
|
511 |
})
|
512 |
+
if hasattr(result, 'content') and isinstance(result.content, list):
|
513 |
+
for item in result.content:
|
514 |
+
if hasattr(item, 'text') and item.text:
|
515 |
+
try:
|
516 |
+
lp_data = json.loads(item.text)
|
517 |
+
return lp_data
|
518 |
+
except Exception:
|
519 |
+
return {"raw": item.text}
|
520 |
+
if isinstance(result, dict):
|
521 |
+
return result
|
522 |
+
if isinstance(result, str):
|
523 |
+
try:
|
524 |
+
return json.loads(result)
|
525 |
+
except Exception:
|
526 |
+
return {"raw": result}
|
527 |
+
return {"raw": str(result)}
|
528 |
except Exception as e:
|
529 |
return {"error": str(e)}
|
530 |
lp_btn.click(
|
|
|
549 |
async with ClientSession(sse, write) as session:
|
550 |
await session.initialize()
|
551 |
response = await session.call_tool("text_interaction", {"query": text, "student_id": student_id})
|
552 |
+
if hasattr(response, 'content') and isinstance(response.content, list):
|
553 |
+
for item in response.content:
|
554 |
+
if hasattr(item, 'text') and item.text:
|
555 |
+
try:
|
556 |
+
data = json.loads(item.text)
|
557 |
+
return data
|
558 |
+
except Exception:
|
559 |
+
return {"raw": item.text}
|
560 |
+
if isinstance(response, dict):
|
561 |
+
return response
|
562 |
+
if isinstance(response, str):
|
563 |
+
try:
|
564 |
+
return json.loads(response)
|
565 |
+
except Exception:
|
566 |
+
return {"raw": response}
|
567 |
+
return {"raw": str(response)}
|
568 |
|
569 |
text_btn.click(
|
570 |
fn=text_interaction_async,
|
|
|
602 |
file_path = file
|
603 |
if not file_path or not os.path.exists(file_path):
|
604 |
return {"error": "File not found", "success": False}
|
|
|
|
|
605 |
upload_result = await upload_file_to_storage(file_path)
|
606 |
if not upload_result.get("success"):
|
607 |
return upload_result
|
|
|
|
|
608 |
storage_url = upload_result.get("storage_url")
|
609 |
if not storage_url:
|
610 |
return {"error": "No storage URL returned from upload", "success": False}
|
|
|
|
|
611 |
async with sse_client(SERVER_URL) as (sse, write):
|
612 |
async with ClientSession(sse, write) as session:
|
613 |
await session.initialize()
|
614 |
response = await session.call_tool("mistral_document_ocr", {"document_url": storage_url})
|
615 |
+
if hasattr(response, 'content') and isinstance(response.content, list):
|
616 |
+
for item in response.content:
|
617 |
+
if hasattr(item, 'text') and item.text:
|
618 |
+
try:
|
619 |
+
data = json.loads(item.text)
|
620 |
+
return data
|
621 |
+
except Exception:
|
622 |
+
return {"raw": item.text}
|
623 |
+
if isinstance(response, dict):
|
624 |
+
return response
|
625 |
+
if isinstance(response, str):
|
626 |
+
try:
|
627 |
+
return json.loads(response)
|
628 |
+
except Exception:
|
629 |
+
return {"raw": response}
|
630 |
+
return {"raw": str(response)}
|
631 |
except Exception as e:
|
632 |
return {"error": f"Error processing document: {str(e)}", "success": False}
|
633 |
doc_ocr_btn.click(
|
|
|
662 |
async with ClientSession(sse, write) as session:
|
663 |
await session.initialize()
|
664 |
response = await session.call_tool("check_submission_originality", {"submission": submission, "reference_sources": [reference] if isinstance(reference, str) else reference})
|
665 |
+
if hasattr(response, 'content') and isinstance(response.content, list):
|
666 |
+
for item in response.content:
|
667 |
+
if hasattr(item, 'text') and item.text:
|
668 |
+
try:
|
669 |
+
data = json.loads(item.text)
|
670 |
+
return data
|
671 |
+
except Exception:
|
672 |
+
return {"raw": item.text}
|
673 |
+
if isinstance(response, dict):
|
674 |
+
return response
|
675 |
+
if isinstance(response, str):
|
676 |
+
try:
|
677 |
+
return json.loads(response)
|
678 |
+
except Exception:
|
679 |
+
return {"raw": response}
|
680 |
+
return {"raw": str(response)}
|
681 |
|
682 |
plagiarism_btn.click(
|
683 |
fn=check_plagiarism_async,
|
mcp_server/tools/concept_tools.py
CHANGED
@@ -8,20 +8,13 @@ import sys
|
|
8 |
import os
|
9 |
from pathlib import Path
|
10 |
import json
|
|
|
11 |
|
12 |
# Add the parent directory to the Python path
|
13 |
current_dir = Path(__file__).parent
|
14 |
parent_dir = current_dir.parent.parent
|
15 |
sys.path.insert(0, str(parent_dir))
|
16 |
|
17 |
-
import sys
|
18 |
-
import os
|
19 |
-
from pathlib import Path
|
20 |
-
|
21 |
-
# Add the parent directory to the Python path
|
22 |
-
current_dir = Path(__file__).parent
|
23 |
-
parent_dir = current_dir.parent
|
24 |
-
sys.path.insert(0, str(parent_dir))
|
25 |
|
26 |
# Import from local resources
|
27 |
from resources.concept_graph import get_concept, get_all_concepts
|
@@ -32,6 +25,20 @@ from mcp_server.model.gemini_flash import GeminiFlash
|
|
32 |
|
33 |
MODEL = GeminiFlash()
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
@mcp.tool()
|
36 |
async def get_concept_tool(concept_id: str = None) -> dict:
|
37 |
"""
|
@@ -46,7 +53,7 @@ async def get_concept_tool(concept_id: str = None) -> dict:
|
|
46 |
)
|
47 |
llm_response = await MODEL.generate_text(prompt)
|
48 |
try:
|
49 |
-
data =
|
50 |
except Exception:
|
51 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
52 |
return data
|
@@ -63,7 +70,7 @@ async def assess_skill_tool(student_id: str, concept_id: str) -> dict:
|
|
63 |
)
|
64 |
llm_response = await MODEL.generate_text(prompt)
|
65 |
try:
|
66 |
-
data =
|
67 |
except Exception:
|
68 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
69 |
return data
|
|
|
8 |
import os
|
9 |
from pathlib import Path
|
10 |
import json
|
11 |
+
import re
|
12 |
|
13 |
# Add the parent directory to the Python path
|
14 |
current_dir = Path(__file__).parent
|
15 |
parent_dir = current_dir.parent.parent
|
16 |
sys.path.insert(0, str(parent_dir))
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Import from local resources
|
20 |
from resources.concept_graph import get_concept, get_all_concepts
|
|
|
25 |
|
26 |
MODEL = GeminiFlash()
|
27 |
|
28 |
+
def clean_json_trailing_commas(json_text: str) -> str:
|
29 |
+
return re.sub(r',([ \t\r\n]*[}}\]])', r'\1', json_text)
|
30 |
+
|
31 |
+
def extract_json_from_text(text: str):
|
32 |
+
if not text or not isinstance(text, str):
|
33 |
+
return None
|
34 |
+
# Remove code fences
|
35 |
+
text = re.sub(r'^\s*```(?:json)?\s*', '', text, flags=re.IGNORECASE)
|
36 |
+
text = re.sub(r'\s*```\s*$', '', text, flags=re.IGNORECASE)
|
37 |
+
text = text.strip()
|
38 |
+
# Remove trailing commas
|
39 |
+
cleaned = clean_json_trailing_commas(text)
|
40 |
+
return json.loads(cleaned)
|
41 |
+
|
42 |
@mcp.tool()
|
43 |
async def get_concept_tool(concept_id: str = None) -> dict:
|
44 |
"""
|
|
|
53 |
)
|
54 |
llm_response = await MODEL.generate_text(prompt)
|
55 |
try:
|
56 |
+
data = extract_json_from_text(llm_response)
|
57 |
except Exception:
|
58 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
59 |
return data
|
|
|
70 |
)
|
71 |
llm_response = await MODEL.generate_text(prompt)
|
72 |
try:
|
73 |
+
data = extract_json_from_text(llm_response)
|
74 |
except Exception:
|
75 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
76 |
return data
|
mcp_server/tools/interaction_tools.py
CHANGED
@@ -14,6 +14,20 @@ def calculate_similarity(text1: str, text2: str) -> float:
|
|
14 |
"""Calculate the similarity ratio between two texts."""
|
15 |
return 0.0 # No longer used, LLM-driven
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
@mcp.tool()
|
18 |
async def text_interaction(query: str, student_id: str) -> dict:
|
19 |
"""
|
@@ -26,7 +40,7 @@ async def text_interaction(query: str, student_id: str) -> dict:
|
|
26 |
)
|
27 |
llm_response = await MODEL.generate_text(prompt)
|
28 |
try:
|
29 |
-
data =
|
30 |
except Exception:
|
31 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
32 |
return data
|
|
|
14 |
"""Calculate the similarity ratio between two texts."""
|
15 |
return 0.0 # No longer used, LLM-driven
|
16 |
|
17 |
+
def clean_json_trailing_commas(json_text: str) -> str:
|
18 |
+
return re.sub(r',([ \t\r\n]*[}}\]])', r'\1', json_text)
|
19 |
+
|
20 |
+
def extract_json_from_text(text: str):
|
21 |
+
if not text or not isinstance(text, str):
|
22 |
+
return None
|
23 |
+
# Remove code fences
|
24 |
+
text = re.sub(r'^\s*```(?:json)?\s*', '', text, flags=re.IGNORECASE)
|
25 |
+
text = re.sub(r'\s*```\s*$', '', text, flags=re.IGNORECASE)
|
26 |
+
text = text.strip()
|
27 |
+
# Remove trailing commas
|
28 |
+
cleaned = clean_json_trailing_commas(text)
|
29 |
+
return json.loads(cleaned)
|
30 |
+
|
31 |
@mcp.tool()
|
32 |
async def text_interaction(query: str, student_id: str) -> dict:
|
33 |
"""
|
|
|
40 |
)
|
41 |
llm_response = await MODEL.generate_text(prompt)
|
42 |
try:
|
43 |
+
data = extract_json_from_text(llm_response)
|
44 |
except Exception:
|
45 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
46 |
return data
|
mcp_server/tools/learning_path_tools.py
CHANGED
@@ -8,21 +8,13 @@ import sys
|
|
8 |
import os
|
9 |
from pathlib import Path
|
10 |
import json
|
|
|
11 |
|
12 |
# Add the parent directory to the Python path
|
13 |
current_dir = Path(__file__).parent
|
14 |
parent_dir = current_dir.parent.parent
|
15 |
sys.path.insert(0, str(parent_dir))
|
16 |
|
17 |
-
import sys
|
18 |
-
import os
|
19 |
-
from pathlib import Path
|
20 |
-
|
21 |
-
# Add the parent directory to the Python path
|
22 |
-
current_dir = Path(__file__).parent
|
23 |
-
parent_dir = current_dir.parent
|
24 |
-
sys.path.insert(0, str(parent_dir))
|
25 |
-
|
26 |
# Import from local resources
|
27 |
from resources.concept_graph import CONCEPT_GRAPH
|
28 |
|
@@ -146,6 +138,20 @@ def generate_learning_path(concept_ids: List[str], student_level: str = "beginne
|
|
146 |
"generated_at": datetime.utcnow().isoformat() + "Z"
|
147 |
}
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
@mcp.tool()
|
150 |
async def get_learning_path(student_id: str, concept_ids: list, student_level: str = "beginner") -> dict:
|
151 |
"""
|
@@ -158,7 +164,7 @@ async def get_learning_path(student_id: str, concept_ids: list, student_level: s
|
|
158 |
)
|
159 |
llm_response = await MODEL.generate_text(prompt)
|
160 |
try:
|
161 |
-
data =
|
162 |
except Exception:
|
163 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
164 |
return data
|
|
|
8 |
import os
|
9 |
from pathlib import Path
|
10 |
import json
|
11 |
+
import re
|
12 |
|
13 |
# Add the parent directory to the Python path
|
14 |
current_dir = Path(__file__).parent
|
15 |
parent_dir = current_dir.parent.parent
|
16 |
sys.path.insert(0, str(parent_dir))
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# Import from local resources
|
19 |
from resources.concept_graph import CONCEPT_GRAPH
|
20 |
|
|
|
138 |
"generated_at": datetime.utcnow().isoformat() + "Z"
|
139 |
}
|
140 |
|
141 |
+
def clean_json_trailing_commas(json_text: str) -> str:
|
142 |
+
return re.sub(r',([ \t\r\n]*[}}\]])', r'\1', json_text)
|
143 |
+
|
144 |
+
def extract_json_from_text(text: str):
|
145 |
+
if not text or not isinstance(text, str):
|
146 |
+
return None
|
147 |
+
# Remove code fences
|
148 |
+
text = re.sub(r'^\s*```(?:json)?\s*', '', text, flags=re.IGNORECASE)
|
149 |
+
text = re.sub(r'\s*```\s*$', '', text, flags=re.IGNORECASE)
|
150 |
+
text = text.strip()
|
151 |
+
# Remove trailing commas
|
152 |
+
cleaned = clean_json_trailing_commas(text)
|
153 |
+
return json.loads(cleaned)
|
154 |
+
|
155 |
@mcp.tool()
|
156 |
async def get_learning_path(student_id: str, concept_ids: list, student_level: str = "beginner") -> dict:
|
157 |
"""
|
|
|
164 |
)
|
165 |
llm_response = await MODEL.generate_text(prompt)
|
166 |
try:
|
167 |
+
data = extract_json_from_text(llm_response)
|
168 |
except Exception:
|
169 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
170 |
return data
|
mcp_server/tools/lesson_tools.py
CHANGED
@@ -8,6 +8,22 @@ import json
|
|
8 |
|
9 |
MODEL = GeminiFlash()
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
@mcp.tool()
|
12 |
async def generate_lesson_tool(topic: str, grade_level: int, duration_minutes: int) -> dict:
|
13 |
"""
|
@@ -20,7 +36,7 @@ async def generate_lesson_tool(topic: str, grade_level: int, duration_minutes: i
|
|
20 |
)
|
21 |
llm_response = await MODEL.generate_text(prompt)
|
22 |
try:
|
23 |
-
data =
|
24 |
except Exception:
|
25 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
26 |
return data
|
|
|
8 |
|
9 |
MODEL = GeminiFlash()
|
10 |
|
11 |
+
def clean_json_trailing_commas(json_text: str) -> str:
|
12 |
+
import re
|
13 |
+
return re.sub(r',([ \t\r\n]*[}}\]])', r'\1', json_text)
|
14 |
+
|
15 |
+
def extract_json_from_text(text: str):
|
16 |
+
import re, json
|
17 |
+
if not text or not isinstance(text, str):
|
18 |
+
return None
|
19 |
+
# Remove code fences
|
20 |
+
text = re.sub(r'^\s*```(?:json)?\s*', '', text, flags=re.IGNORECASE)
|
21 |
+
text = re.sub(r'\s*```\s*$', '', text, flags=re.IGNORECASE)
|
22 |
+
text = text.strip()
|
23 |
+
# Remove trailing commas
|
24 |
+
cleaned = clean_json_trailing_commas(text)
|
25 |
+
return json.loads(cleaned)
|
26 |
+
|
27 |
@mcp.tool()
|
28 |
async def generate_lesson_tool(topic: str, grade_level: int, duration_minutes: int) -> dict:
|
29 |
"""
|
|
|
36 |
)
|
37 |
llm_response = await MODEL.generate_text(prompt)
|
38 |
try:
|
39 |
+
data = extract_json_from_text(llm_response)
|
40 |
except Exception:
|
41 |
data = {"llm_raw": llm_response, "error": "Failed to parse LLM output as JSON"}
|
42 |
return data
|
mcp_server/tools/ocr_tools.py
CHANGED
@@ -139,4 +139,20 @@ Document:
|
|
139 |
"success": False,
|
140 |
"error": f"Error processing document with Mistral OCR: {str(e)}",
|
141 |
"document_url": document_url
|
142 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
"success": False,
|
140 |
"error": f"Error processing document with Mistral OCR: {str(e)}",
|
141 |
"document_url": document_url
|
142 |
+
}
|
143 |
+
|
144 |
+
def clean_json_trailing_commas(json_text: str) -> str:
|
145 |
+
import re
|
146 |
+
return re.sub(r',([ \t\r\n]*[}}\]])', r'\1', json_text)
|
147 |
+
|
148 |
+
def extract_json_from_text(text: str):
|
149 |
+
import re, json
|
150 |
+
if not text or not isinstance(text, str):
|
151 |
+
return None
|
152 |
+
# Remove code fences
|
153 |
+
text = re.sub(r'^\s*```(?:json)?\s*', '', text, flags=re.IGNORECASE)
|
154 |
+
text = re.sub(r'\s*```\s*$', '', text, flags=re.IGNORECASE)
|
155 |
+
text = text.strip()
|
156 |
+
# Remove trailing commas
|
157 |
+
cleaned = clean_json_trailing_commas(text)
|
158 |
+
return json.loads(cleaned)
|