Update app.py
Browse files
app.py
CHANGED
@@ -20,26 +20,37 @@ async def extract_data(content, schema_text, progress=gr.Progress()):
|
|
20 |
result = await system.extract_structured_data(content, schema)
|
21 |
|
22 |
extracted_data = json.dumps(result["data"], indent=2)
|
23 |
-
confidence = f"{result['overall_confidence']:.1%}"
|
24 |
metadata = result["extraction_metadata"]
|
25 |
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
Processing Stages: {metadata['stages_executed']}
|
28 |
-
Estimated Cost: ${metadata['estimated_cost']:.3f}
|
29 |
Processing Time: {metadata['actual_processing_time']:.2f}s
|
30 |
Schema Compliance: {metadata['schema_compliance']:.1%}"""
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
progress(1.0, desc="Complete")
|
37 |
-
return extracted_data,
|
38 |
|
39 |
except json.JSONDecodeError as e:
|
40 |
-
return "",
|
41 |
except Exception as e:
|
42 |
-
return "",
|
43 |
|
44 |
def extract_wrapper(content, schema_text):
|
45 |
return asyncio.run(extract_data(content, schema_text))
|
@@ -424,7 +435,6 @@ with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Def
|
|
424 |
|
425 |
with gr.Column(scale=1):
|
426 |
gr.Markdown("### Results")
|
427 |
-
confidence_output = gr.Textbox(label="Confidence")
|
428 |
metadata_output = gr.Textbox(label="Analysis", lines=8)
|
429 |
status_output = gr.Textbox(label="Status")
|
430 |
|
@@ -451,12 +461,12 @@ with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Def
|
|
451 |
extract_btn.click(
|
452 |
fn=extract_wrapper,
|
453 |
inputs=[content_input, schema_input],
|
454 |
-
outputs=[output_json,
|
455 |
)
|
456 |
|
457 |
clear_btn.click(
|
458 |
lambda: ("", "", "", "", ""),
|
459 |
-
outputs=[content_input, schema_input, output_json,
|
460 |
)
|
461 |
|
462 |
if __name__ == "__main__":
|
|
|
20 |
result = await system.extract_structured_data(content, schema)
|
21 |
|
22 |
extracted_data = json.dumps(result["data"], indent=2)
|
|
|
23 |
metadata = result["extraction_metadata"]
|
24 |
|
25 |
+
total_expected = len(schema.get('properties', {}))
|
26 |
+
extracted_count = len([k for k, v in result["data"].items() if v is not None and v != ""])
|
27 |
+
completeness = extracted_count / total_expected if total_expected > 0 else 0
|
28 |
+
|
29 |
+
analysis = f"""Fields Extracted: {extracted_count}/{total_expected} ({completeness:.1%})
|
30 |
+
Complexity Tier: {metadata['complexity_tier']}
|
31 |
Processing Stages: {metadata['stages_executed']}
|
|
|
32 |
Processing Time: {metadata['actual_processing_time']:.2f}s
|
33 |
Schema Compliance: {metadata['schema_compliance']:.1%}"""
|
34 |
|
35 |
+
status_flags = result.get("review_flags", [])
|
36 |
+
|
37 |
+
if completeness >= 0.8 and not any(flag in ["incomplete_extraction", "low_quality", "schema_violations"] for flag in status_flags):
|
38 |
+
status = "Success"
|
39 |
+
elif completeness >= 0.5:
|
40 |
+
status = "Partial Success"
|
41 |
+
else:
|
42 |
+
status = "Incomplete"
|
43 |
+
|
44 |
+
if status_flags:
|
45 |
+
analysis += f"\nIssues: {', '.join(status_flags)}"
|
46 |
|
47 |
progress(1.0, desc="Complete")
|
48 |
+
return extracted_data, analysis, status
|
49 |
|
50 |
except json.JSONDecodeError as e:
|
51 |
+
return "", f"Invalid JSON Schema: {str(e)}", "Schema Error"
|
52 |
except Exception as e:
|
53 |
+
return "", f"Extraction Error: {str(e)}", "Error"
|
54 |
|
55 |
def extract_wrapper(content, schema_text):
|
56 |
return asyncio.run(extract_data(content, schema_text))
|
|
|
435 |
|
436 |
with gr.Column(scale=1):
|
437 |
gr.Markdown("### Results")
|
|
|
438 |
metadata_output = gr.Textbox(label="Analysis", lines=8)
|
439 |
status_output = gr.Textbox(label="Status")
|
440 |
|
|
|
461 |
extract_btn.click(
|
462 |
fn=extract_wrapper,
|
463 |
inputs=[content_input, schema_input],
|
464 |
+
outputs=[output_json, metadata_output, status_output]
|
465 |
)
|
466 |
|
467 |
clear_btn.click(
|
468 |
lambda: ("", "", "", "", ""),
|
469 |
+
outputs=[content_input, schema_input, output_json, metadata_output, status_output]
|
470 |
)
|
471 |
|
472 |
if __name__ == "__main__":
|