Update app.py
Browse files
app.py
CHANGED
|
@@ -20,26 +20,37 @@ async def extract_data(content, schema_text, progress=gr.Progress()):
|
|
| 20 |
result = await system.extract_structured_data(content, schema)
|
| 21 |
|
| 22 |
extracted_data = json.dumps(result["data"], indent=2)
|
| 23 |
-
confidence = f"{result['overall_confidence']:.1%}"
|
| 24 |
metadata = result["extraction_metadata"]
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
Processing Stages: {metadata['stages_executed']}
|
| 28 |
-
Estimated Cost: ${metadata['estimated_cost']:.3f}
|
| 29 |
Processing Time: {metadata['actual_processing_time']:.2f}s
|
| 30 |
Schema Compliance: {metadata['schema_compliance']:.1%}"""
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
progress(1.0, desc="Complete")
|
| 37 |
-
return extracted_data,
|
| 38 |
|
| 39 |
except json.JSONDecodeError as e:
|
| 40 |
-
return "",
|
| 41 |
except Exception as e:
|
| 42 |
-
return "",
|
| 43 |
|
| 44 |
def extract_wrapper(content, schema_text):
|
| 45 |
return asyncio.run(extract_data(content, schema_text))
|
|
@@ -424,7 +435,6 @@ with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Def
|
|
| 424 |
|
| 425 |
with gr.Column(scale=1):
|
| 426 |
gr.Markdown("### Results")
|
| 427 |
-
confidence_output = gr.Textbox(label="Confidence")
|
| 428 |
metadata_output = gr.Textbox(label="Analysis", lines=8)
|
| 429 |
status_output = gr.Textbox(label="Status")
|
| 430 |
|
|
@@ -451,12 +461,12 @@ with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Def
|
|
| 451 |
extract_btn.click(
|
| 452 |
fn=extract_wrapper,
|
| 453 |
inputs=[content_input, schema_input],
|
| 454 |
-
outputs=[output_json,
|
| 455 |
)
|
| 456 |
|
| 457 |
clear_btn.click(
|
| 458 |
lambda: ("", "", "", "", ""),
|
| 459 |
-
outputs=[content_input, schema_input, output_json,
|
| 460 |
)
|
| 461 |
|
| 462 |
if __name__ == "__main__":
|
|
|
|
| 20 |
result = await system.extract_structured_data(content, schema)
|
| 21 |
|
| 22 |
extracted_data = json.dumps(result["data"], indent=2)
|
|
|
|
| 23 |
metadata = result["extraction_metadata"]
|
| 24 |
|
| 25 |
+
total_expected = len(schema.get('properties', {}))
|
| 26 |
+
extracted_count = len([k for k, v in result["data"].items() if v is not None and v != ""])
|
| 27 |
+
completeness = extracted_count / total_expected if total_expected > 0 else 0
|
| 28 |
+
|
| 29 |
+
analysis = f"""Fields Extracted: {extracted_count}/{total_expected} ({completeness:.1%})
|
| 30 |
+
Complexity Tier: {metadata['complexity_tier']}
|
| 31 |
Processing Stages: {metadata['stages_executed']}
|
|
|
|
| 32 |
Processing Time: {metadata['actual_processing_time']:.2f}s
|
| 33 |
Schema Compliance: {metadata['schema_compliance']:.1%}"""
|
| 34 |
|
| 35 |
+
status_flags = result.get("review_flags", [])
|
| 36 |
+
|
| 37 |
+
if completeness >= 0.8 and not any(flag in ["incomplete_extraction", "low_quality", "schema_violations"] for flag in status_flags):
|
| 38 |
+
status = "Success"
|
| 39 |
+
elif completeness >= 0.5:
|
| 40 |
+
status = "Partial Success"
|
| 41 |
+
else:
|
| 42 |
+
status = "Incomplete"
|
| 43 |
+
|
| 44 |
+
if status_flags:
|
| 45 |
+
analysis += f"\nIssues: {', '.join(status_flags)}"
|
| 46 |
|
| 47 |
progress(1.0, desc="Complete")
|
| 48 |
+
return extracted_data, analysis, status
|
| 49 |
|
| 50 |
except json.JSONDecodeError as e:
|
| 51 |
+
return "", f"Invalid JSON Schema: {str(e)}", "Schema Error"
|
| 52 |
except Exception as e:
|
| 53 |
+
return "", f"Extraction Error: {str(e)}", "Error"
|
| 54 |
|
| 55 |
def extract_wrapper(content, schema_text):
|
| 56 |
return asyncio.run(extract_data(content, schema_text))
|
|
|
|
| 435 |
|
| 436 |
with gr.Column(scale=1):
|
| 437 |
gr.Markdown("### Results")
|
|
|
|
| 438 |
metadata_output = gr.Textbox(label="Analysis", lines=8)
|
| 439 |
status_output = gr.Textbox(label="Status")
|
| 440 |
|
|
|
|
| 461 |
extract_btn.click(
|
| 462 |
fn=extract_wrapper,
|
| 463 |
inputs=[content_input, schema_input],
|
| 464 |
+
outputs=[output_json, metadata_output, status_output]
|
| 465 |
)
|
| 466 |
|
| 467 |
clear_btn.click(
|
| 468 |
lambda: ("", "", "", "", ""),
|
| 469 |
+
outputs=[content_input, schema_input, output_json, metadata_output, status_output]
|
| 470 |
)
|
| 471 |
|
| 472 |
if __name__ == "__main__":
|