arjunanand13 commited on
Commit
91a9da3
·
verified ·
1 Parent(s): 45102e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -12
app.py CHANGED
@@ -20,26 +20,37 @@ async def extract_data(content, schema_text, progress=gr.Progress()):
20
  result = await system.extract_structured_data(content, schema)
21
 
22
  extracted_data = json.dumps(result["data"], indent=2)
23
- confidence = f"{result['overall_confidence']:.1%}"
24
  metadata = result["extraction_metadata"]
25
 
26
- analysis = f"""Complexity Tier: {metadata['complexity_tier']}
 
 
 
 
 
27
  Processing Stages: {metadata['stages_executed']}
28
- Estimated Cost: ${metadata['estimated_cost']:.3f}
29
  Processing Time: {metadata['actual_processing_time']:.2f}s
30
  Schema Compliance: {metadata['schema_compliance']:.1%}"""
31
 
32
- if result["review_flags"]:
33
- analysis += f"\nReview Flags: {', '.join(result['review_flags'])}"
34
- analysis += f"\nReview Time: {metadata['recommended_review_time']} minutes"
 
 
 
 
 
 
 
 
35
 
36
  progress(1.0, desc="Complete")
37
- return extracted_data, confidence, analysis, "Success"
38
 
39
  except json.JSONDecodeError as e:
40
- return "", "0%", f"Invalid JSON Schema: {str(e)}", "Schema Error"
41
  except Exception as e:
42
- return "", "0%", f"Extraction Error: {str(e)}", "Error"
43
 
44
  def extract_wrapper(content, schema_text):
45
  return asyncio.run(extract_data(content, schema_text))
@@ -424,7 +435,6 @@ with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Def
424
 
425
  with gr.Column(scale=1):
426
  gr.Markdown("### Results")
427
- confidence_output = gr.Textbox(label="Confidence")
428
  metadata_output = gr.Textbox(label="Analysis", lines=8)
429
  status_output = gr.Textbox(label="Status")
430
 
@@ -451,12 +461,12 @@ with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Def
451
  extract_btn.click(
452
  fn=extract_wrapper,
453
  inputs=[content_input, schema_input],
454
- outputs=[output_json, confidence_output, metadata_output, status_output]
455
  )
456
 
457
  clear_btn.click(
458
  lambda: ("", "", "", "", ""),
459
- outputs=[content_input, schema_input, output_json, confidence_output, metadata_output]
460
  )
461
 
462
  if __name__ == "__main__":
 
20
  result = await system.extract_structured_data(content, schema)
21
 
22
  extracted_data = json.dumps(result["data"], indent=2)
 
23
  metadata = result["extraction_metadata"]
24
 
25
+ total_expected = len(schema.get('properties', {}))
26
+ extracted_count = len([k for k, v in result["data"].items() if v is not None and v != ""])
27
+ completeness = extracted_count / total_expected if total_expected > 0 else 0
28
+
29
+ analysis = f"""Fields Extracted: {extracted_count}/{total_expected} ({completeness:.1%})
30
+ Complexity Tier: {metadata['complexity_tier']}
31
  Processing Stages: {metadata['stages_executed']}
 
32
  Processing Time: {metadata['actual_processing_time']:.2f}s
33
  Schema Compliance: {metadata['schema_compliance']:.1%}"""
34
 
35
+ status_flags = result.get("review_flags", [])
36
+
37
+ if completeness >= 0.8 and not any(flag in ["incomplete_extraction", "low_quality", "schema_violations"] for flag in status_flags):
38
+ status = "Success"
39
+ elif completeness >= 0.5:
40
+ status = "Partial Success"
41
+ else:
42
+ status = "Incomplete"
43
+
44
+ if status_flags:
45
+ analysis += f"\nIssues: {', '.join(status_flags)}"
46
 
47
  progress(1.0, desc="Complete")
48
+ return extracted_data, analysis, status
49
 
50
  except json.JSONDecodeError as e:
51
+ return "", f"Invalid JSON Schema: {str(e)}", "Schema Error"
52
  except Exception as e:
53
+ return "", f"Extraction Error: {str(e)}", "Error"
54
 
55
  def extract_wrapper(content, schema_text):
56
  return asyncio.run(extract_data(content, schema_text))
 
435
 
436
  with gr.Column(scale=1):
437
  gr.Markdown("### Results")
 
438
  metadata_output = gr.Textbox(label="Analysis", lines=8)
439
  status_output = gr.Textbox(label="Status")
440
 
 
461
  extract_btn.click(
462
  fn=extract_wrapper,
463
  inputs=[content_input, schema_input],
464
+ outputs=[output_json, metadata_output, status_output]
465
  )
466
 
467
  clear_btn.click(
468
  lambda: ("", "", "", "", ""),
469
+ outputs=[content_input, schema_input, output_json, metadata_output, status_output]
470
  )
471
 
472
  if __name__ == "__main__":