Spaces:

arjunanand13
/

unstructured-to-structured-converter

Sleeping

App Files Files Community

arjunanand13 commited on 23 days ago

Commit

857328d

verified ·

1 Parent(s): 74609eb

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -241

app.py CHANGED Viewed

@@ -16,39 +16,30 @@ async def extract_data(content, schema_text, progress=gr.Progress()):
         progress(0.1, desc="Parsing schema...")
         schema = json.loads(schema_text)
-        progress(0.3, desc="Analyzing complexity...")
-        progress(0.5, desc="Extracting data...")
         result = await system.extract_structured_data(content, schema)
-        progress(0.9, desc="Finalizing results...")
         extracted_data = json.dumps(result["data"], indent=2)
         confidence = f"{result['overall_confidence']:.1%}"
         metadata = result["extraction_metadata"]
-        complexity_info = f"""
-**Schema Analysis:**
-- Complexity Tier: {metadata['complexity_tier']}
-- Processing Stages: {metadata['stages_executed']}
-- Estimated Cost: ${metadata['estimated_cost']:.3f}
-- Processing Time: {metadata['actual_processing_time']:.2f}s
-- Schema Compliance: {metadata['schema_compliance']:.1%}
-"""
-        review_info = ""
         if result["review_flags"]:
-            review_info = f"\n**Review Required:** {', '.join(result['review_flags'])}"
-            review_info += f"\nEstimated Review Time: {metadata['recommended_review_time']} minutes"
-        progress(1.0, desc="Complete!")
-        return extracted_data, confidence, complexity_info + review_info, "✅ Success"
     except json.JSONDecodeError as e:
-        return "", "0%", f"❌ Invalid JSON Schema: {str(e)}", "❌ Schema Error"
     except Exception as e:
-        return "", "0%", f"❌ Extraction Error: {str(e)}", "❌ Error"
 def extract_wrapper(content, schema_text):
     return asyncio.run(extract_data(content, schema_text))
@@ -115,26 +106,28 @@ github_schema = """{
 github_content = """MkDocs Publisher Action
-This is a composite action that builds an MkDocs documentation site and deploys it to GitHub Pages.
-It's designed to be reusable across multiple repositories.
-Author: DevRel Team
-The action requires:
-- python-version: Python version to use (default: 3.11)
-- requirements-file: Path to requirements file (required)
-- gh-token: GitHub token for deployment (required)
-The action outputs the URL where the site was deployed.
-The action runs these steps:
-1. Checkout the repository code using actions/checkout@v4
-2. Setup Python environment using actions/setup-python@v5
-3. Install dependencies: pip install -r requirements.txt
-4. Build the MkDocs site: mkdocs build
-5. Deploy to GitHub Pages using peaceiris/actions-gh-pages@v3
-Branding: Use blue color with book-open icon."""
 resume_schema = """{
   "type": "object",
@@ -146,14 +139,27 @@ resume_schema = """{
         "label": {"type": "string"},
         "email": {"type": "string"},
         "phone": {"type": "string"},
-        "website": {"type": "string"},
         "summary": {"type": "string"},
         "location": {
           "type": "object",
           "properties": {
             "city": {"type": "string"},
-            "region": {"type": "string"},
-            "countryCode": {"type": "string"}
           }
         }
       }
@@ -164,8 +170,8 @@ resume_schema = """{
         "type": "object",
         "properties": {
           "name": {"type": "string"},
-          "position": {"type": "string"},
           "location": {"type": "string"},
           "startDate": {"type": "string"},
           "endDate": {"type": "string"},
           "highlights": {
@@ -184,8 +190,7 @@ resume_schema = """{
           "area": {"type": "string"},
           "studyType": {"type": "string"},
           "startDate": {"type": "string"},
-          "endDate": {"type": "string"},
-          "score": {"type": "string"}
         }
       }
     },
@@ -205,124 +210,114 @@ resume_schema = """{
   }
 }"""
-resume_content = """Sarah Chen - Senior AI Research Scientist
-Email: [email protected] | Phone: +1-555-0123
-Location: Palo Alto, California, United States
-Website: https://sarahchen.ai
-SUMMARY
-Experienced AI research scientist with 8+ years in machine learning, deep learning, and natural language processing. Led teams that developed production ML systems serving millions of users.
 WORK EXPERIENCE
-Senior AI Research Scientist | OpenAI | 2021 - Present | San Francisco, CA
-• Led development of GPT-4 training infrastructure, improving training efficiency by 40%
-• Designed novel attention mechanisms for transformer architectures
-• Managed team of 12 researchers across multiple ML projects
-Machine Learning Engineer | Google Brain | 2019 - 2021 | Mountain View, CA
-• Developed recommendation systems serving 500M+ users daily
-• Implemented distributed training frameworks for large-scale models
-• Reduced model inference latency by 60% through optimization techniques
 EDUCATION
-Ph.D. Computer Science | Stanford University | 2013 - 2017 | Stanford, CA
-Dissertation: "Efficient Training of Large-Scale Neural Networks"
-GPA: 3.95/4.0
-M.S. Computer Science | MIT | 2011 - 2013 | Cambridge, MA
-Concentration: Artificial Intelligence | GPA: 3.9/4.0
-SKILLS
-Programming: Python, C++, JavaScript, CUDA, PyTorch, TensorFlow
-Machine Learning: Deep Learning, NLP, Computer Vision, Reinforcement Learning
-Cloud Platforms: AWS, GCP, Azure, Kubernetes, Docker"""
-email_schema = """{
   "type": "object",
   "properties": {
-    "participants": {
       "type": "array",
       "items": {
         "type": "object",
         "properties": {
-          "name": {"type": "string"},
-          "email": {"type": "string"},
-          "role": {"type": "string"},
-          "organization": {"type": "string"}
         }
       }
     },
-    "requirements": {
       "type": "array",
-      "items": {
-        "type": "object",
-        "properties": {
-          "id": {"type": "string"},
-          "description": {"type": "string"},
-          "priority": {"type": "string"},
-          "status": {"type": "string"},
-          "source_stakeholder": {"type": "string"}
-        }
-      }
     },
-    "decisions": {
-      "type": "array",
-      "items": {
-        "type": "object",
-        "properties": {
-          "decision": {"type": "string"},
-          "rationale": {"type": "string"},
-          "stakeholders_involved": {"type": "array", "items": {"type": "string"}},
-          "implementation_impact": {"type": "string"}
-        }
-      }
-    },
-    "timeline": {
       "type": "object",
       "properties": {
-        "start_date": {"type": "string"},
-        "key_milestones": {"type": "array", "items": {"type": "string"}},
-        "final_deadline": {"type": "string"}
       }
     }
   }
 }"""
-email_content = """From: [email protected]
-To: [email protected], [email protected]
-Subject: API Rate Limiting Requirements - Final Decision
-Hi team,
-After our discussion yesterday, I wanted to confirm the final requirements for the API rate limiting feature:
-REQ-001: Implement per-user rate limiting at 1000 requests/hour (HIGH priority)
-REQ-002: Add burst capacity of 100 requests/minute (MEDIUM priority)
-REQ-003: Provide rate limit headers in API responses (HIGH priority)
-REQ-004: Create rate limit monitoring dashboard (LOW priority)
-Decision: We'll use Redis for rate limiting storage instead of in-memory due to scalability concerns raised by Mike.
-Rationale: Redis provides persistence and can scale across multiple API instances.
-Implementation impact: Will require Redis infrastructure setup but provides better long-term scalability.
-Timeline:
-- Start development: January 15, 2024
-- Feature complete: February 28, 2024
-- Production deployment: March 15, 2024
-Let me know if you have any questions.
-Best regards,
-John Smith - Product Manager, Acme Corp
-Sarah Johnson - Lead Engineer, TechCorp
-Mike Brown - DevOps Lead, Acme Corp"""
 contract_schema = """{
   "type": "object",
   "properties": {
     "parties": {
       "type": "array",
       "items": {
@@ -330,179 +325,127 @@ contract_schema = """{
         "properties": {
           "name": {"type": "string"},
           "type": {"type": "string"},
-          "address": {"type": "string"},
-          "role": {"type": "string"}
         }
       }
     },
-    "contract_details": {
-      "type": "object",
-      "properties": {
-        "contract_value": {"type": "string"},
-        "currency": {"type": "string"},
-        "payment_terms": {"type": "string"},
-        "contract_duration": {"type": "string"},
-        "start_date": {"type": "string"},
-        "end_date": {"type": "string"}
-      }
-    },
-    "key_terms": {
-      "type": "object",
-      "properties": {
-        "liability_cap": {"type": "string"},
-        "termination_clause": {"type": "string"},
-        "intellectual_property": {"type": "string"},
-        "confidentiality_period": {"type": "string"}
-      }
-    },
     "deliverables": {
       "type": "array",
       "items": {
         "type": "object",
         "properties": {
           "name": {"type": "string"},
-          "description": {"type": "string"},
           "deadline": {"type": "string"},
-          "acceptance_criteria": {"type": "string"}
         }
       }
     }
   }
 }"""
 contract_content = """SOFTWARE DEVELOPMENT AGREEMENT
-This Software Development Agreement ("Agreement") is entered into on January 1, 2024, between:
-TechCorp Inc., a Delaware corporation with offices at 123 Silicon Valley Blvd, San Francisco, CA 94105 ("Client")
-AND
-DevSolutions LLC, a California limited liability company with offices at 456 Innovation Drive, Palo Alto, CA 94301 ("Developer")
-CONTRACT TERMS:
-- Total Contract Value: $2,500,000 USD
-- Payment Terms: Net 30 days
-- Contract Duration: 18 months
-- Start Date: January 15, 2024
-- End Date: July 15, 2025
-KEY PROVISIONS:
-- Liability Cap: Limited to total contract value ($2.5M)
-- Termination: Either party may terminate with 90 days written notice
-- Intellectual Property: All developed IP remains with Client
-- Confidentiality: 5-year confidentiality period post-contract
 DELIVERABLES:
-1. API Platform Development
-   - Complete REST API platform with authentication
-   - Deadline: June 1, 2024
-   - Acceptance: Must pass security audit and performance tests
-2. Mobile Application
    - iOS and Android applications
-   - Deadline: October 1, 2024
-   - Acceptance: App store approval and user acceptance testing
-3. Documentation & Training
-   - Complete technical documentation and user training
-   - Deadline: December 1, 2024
-   - Acceptance: Training completion by 95% of users"""
-with gr.Blocks(title="Unstructured to Structured JSON Converter", theme=gr.themes.Soft()) as app:
-    gr.Markdown("""
-    # 🔄 Unstructured to Structured JSON Converter
-    **A production-ready system for extracting structured data from unstructured text following complex JSON schemas.**
-    ✨ **Key Features:**
-    - Supports unlimited schema complexity (6+ levels, 250+ fields, 500+ enums)
-    - Handles large documents (50+ pages, 10MB+ files)
-    - Dynamic resource allocation ($0.01-$5.00 based on complexity)
-    - Confidence-based quality assessment with human review routing
-    📊 **Performance:** 97-99% time savings vs manual processing with 85-95% accuracy
-    """)
     with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📝 Input Content")
             content_input = gr.Textbox(
-                label="Unstructured Text Content",
-                placeholder="Paste your document content here...",
-                lines=15,
-                max_lines=25
             )
-        with gr.Column(scale=1):
-            gr.Markdown("### 🗂️ JSON Schema")
             schema_input = gr.Textbox(
-                label="Target JSON Schema",
-                placeholder="Paste your JSON schema here...",
-                lines=15,
-                max_lines=25,
                 value=github_schema
             )
     with gr.Row():
-        extract_btn = gr.Button("🚀 Extract Structured Data", variant="primary", size="lg")
-        clear_btn = gr.Button("🗑️ Clear", variant="secondary")
     with gr.Row():
         with gr.Column(scale=2):
-            gr.Markdown("### 📤 Extracted JSON Data")
             output_json = gr.Textbox(
-                label="Structured Output",
-                lines=20,
-                max_lines=30,
                 show_copy_button=True
             )
         with gr.Column(scale=1):
-            gr.Markdown("### 📊 Analysis Results")
-            confidence_output = gr.Textbox(label="Overall Confidence", interactive=False)
-            metadata_output = gr.Textbox(
-                label="Processing Metadata",
-                lines=12,
-                interactive=False
-            )
-            status_output = gr.Textbox(label="Status", interactive=False)
-    gr.Markdown("### 🎯 Example Test Cases")
     gr.Examples(
         examples=[
             [github_content, github_schema],
             [resume_content, resume_schema],
-            [email_content, email_schema],
             [contract_content, contract_schema]
         ],
         inputs=[content_input, schema_input],
-        label="Click any example to load it:",
-        examples_per_page=4
     )
     gr.Markdown("""
-    ### 🔧 How It Works
-    1. **Schema Analysis**: Analyzes complexity (depth, fields, objects) and creates optimal extraction plan
-    2. **Document Processing**: Handles large documents with semantic chunking and context preservation
-    3. **Multi-Stage Extraction**: Uses hierarchical processing with dynamic model selection
-    4. **Quality Assessment**: Provides confidence scores and flags uncertain fields for human review
-    ### 📈 Complexity Tiers
-    | Tier | Depth | Fields | Cost | Time | Use Case |
-    |------|-------|--------|------|------|----------|
-    | **1 (Simple)** | ≤2 levels | ≤20 | $0.01-0.05 | 5-15s | Forms, basic extraction |
-    | **2 (Medium)** | ≤4 levels | ≤100 | $0.08-0.25 | 15-45s | API docs, structured reports |
-    | **3 (Complex)** | >4 levels | >100 | $0.30-2.00 | 45-120s | Legal docs, research papers |
-    ### 🎓 Schema Examples
-    **GitHub Actions** (Medium): Action metadata with inputs/outputs
-    **Resume/CV** (Complex): Personal profile with work history and skills
-    **Email Chains** (Complex): Requirements extraction from stakeholder communications
-    **Legal Contracts** (Complex): Contract terms, parties, and deliverables
     """)
     extract_btn.click(

         progress(0.1, desc="Parsing schema...")
         schema = json.loads(schema_text)
+        progress(0.5, desc="Processing...")
         result = await system.extract_structured_data(content, schema)
         extracted_data = json.dumps(result["data"], indent=2)
         confidence = f"{result['overall_confidence']:.1%}"
         metadata = result["extraction_metadata"]
+        analysis = f"""Complexity Tier: {metadata['complexity_tier']}
+Processing Stages: {metadata['stages_executed']}
+Estimated Cost: ${metadata['estimated_cost']:.3f}
+Processing Time: {metadata['actual_processing_time']:.2f}s
+Schema Compliance: {metadata['schema_compliance']:.1%}"""
         if result["review_flags"]:
+            analysis += f"\nReview Flags: {', '.join(result['review_flags'])}"
+            analysis += f"\nReview Time: {metadata['recommended_review_time']} minutes"
+        progress(1.0, desc="Complete")
+        return extracted_data, confidence, analysis, "Success"
     except json.JSONDecodeError as e:
+        return "", "0%", f"Invalid JSON Schema: {str(e)}", "Schema Error"
     except Exception as e:
+        return "", "0%", f"Extraction Error: {str(e)}", "Error"
 def extract_wrapper(content, schema_text):
     return asyncio.run(extract_data(content, schema_text))
 github_content = """MkDocs Publisher Action
+I keep repeating the steps to build and deploy our MkDocs documentation sites to GitHub Pages across different repos. Let's create a reusable composite action to handle this.
+Action Name: MkDocs Publisher
+Purpose: A simple action to build an MkDocs site and push it to the gh-pages branch. Should be easy to use. Author should be listed as 'DevRel Team'.
+Inputs Needed:
+python-version: We need to specify the Python version for setting up the environment. Users should be able to choose. Let's make this optional and default it to 3.11. Description: 'The version of Python to set up for building.'
+requirements-file: Users might have different requirements files (e.g., requirements.txt, docs/requirements.txt). This input should specify the path. It's required. Description: 'Path to the Python requirements file'.
+gh-token: The GitHub token for pushing to gh-pages. This is absolutely required. Description: 'GitHub token for deployment.'
+Outputs:
+The action needs to output the URL where the site was deployed. Let's call this output page-url. Its description should be 'The URL of the deployed GitHub Pages site.'
+How it Runs:
+This will be a composite action (using: composite). Here are the steps involved:
+Checkout Code: First, we need the repository code. Use the standard actions/checkout@v4.
+Setup Python: Next, set up the Python environment. Use actions/setup-python@v5.
+Install Dependencies: Run a command to install the Python packages. The command is pip install -r requirements.txt. Execute this using the bash shell.
+Build Site: Run the command mkdocs build. Use bash for this too.
+Deploy to Pages: Use peaceiris/actions-gh-pages@v3 for deployment.
+Branding: For the marketplace look, let's use the color blue and the book-open icon."""
 resume_schema = """{
   "type": "object",
         "label": {"type": "string"},
         "email": {"type": "string"},
         "phone": {"type": "string"},
+        "url": {"type": "string"},
         "summary": {"type": "string"},
         "location": {
           "type": "object",
           "properties": {
+            "address": {"type": "string"},
+            "postalCode": {"type": "string"},
             "city": {"type": "string"},
+            "countryCode": {"type": "string"},
+            "region": {"type": "string"}
+          }
+        },
+        "profiles": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "network": {"type": "string"},
+              "username": {"type": "string"},
+              "url": {"type": "string"}
+            }
           }
         }
       }
         "type": "object",
         "properties": {
           "name": {"type": "string"},
           "location": {"type": "string"},
+          "position": {"type": "string"},
           "startDate": {"type": "string"},
           "endDate": {"type": "string"},
           "highlights": {
           "area": {"type": "string"},
           "studyType": {"type": "string"},
           "startDate": {"type": "string"},
+          "endDate": {"type": "string"}
         }
       }
     },
   }
 }"""
+resume_content = """John Doe
+Software Engineer
+Email: [email protected]
+Phone: +1-555-0123
+Address: 123 Main St, San Francisco, CA 94105, US
+Website: https://johndoe.dev
+PROFESSIONAL SUMMARY
+Experienced software engineer with 5+ years developing web applications and distributed systems. Skilled in full-stack development with expertise in Python, JavaScript, and cloud technologies.
 WORK EXPERIENCE
+Senior Software Engineer | TechCorp Inc | San Francisco, CA | 2022 - Present
+- Designed and implemented microservices architecture serving 1M+ users
+- Led development of real-time data processing pipeline using Apache Kafka
+- Reduced system latency by 40% through performance optimization
+Software Engineer | StartupXYZ | Palo Alto, CA | 2020 - 2022
+- Built responsive web applications using React and Node.js
+- Implemented CI/CD pipelines resulting in 50% faster deployment cycles
+- Collaborated with cross-functional teams on product development
 EDUCATION
+Bachelor of Science in Computer Science | Stanford University | 2016 - 2020
+- Relevant Coursework: Data Structures, Algorithms, Database Systems
+- Senior Project: Machine Learning Platform for Predictive Analytics
+TECHNICAL SKILLS
+Programming Languages: Python, JavaScript, Java, Go, SQL
+Web Technologies: React, Node.js, HTML/CSS, REST APIs, GraphQL
+Cloud & DevOps: AWS, Docker, Kubernetes, Jenkins, Git"""
+citation_schema = """{
   "type": "object",
   "properties": {
+    "cff-version": {"type": "string"},
+    "message": {"type": "string"},
+    "title": {"type": "string"},
+    "authors": {
       "type": "array",
       "items": {
         "type": "object",
         "properties": {
+          "given-names": {"type": "string"},
+          "family-names": {"type": "string"},
+          "affiliation": {"type": "string"},
+          "orcid": {"type": "string"}
         }
       }
     },
+    "type": {"type": "string"},
+    "date-published": {"type": "string"},
+    "url": {"type": "string"},
+    "abstract": {"type": "string"},
+    "keywords": {
       "type": "array",
+      "items": {"type": "string"}
     },
+    "preferred-citation": {
       "type": "object",
       "properties": {
+        "type": {"type": "string"},
+        "title": {"type": "string"},
+        "authors": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "given-names": {"type": "string"},
+              "family-names": {"type": "string"}
+            }
+          }
+        },
+        "collection-title": {"type": "string"},
+        "volume": {"type": "integer"},
+        "year": {"type": "integer"},
+        "publisher": {
+          "type": "object",
+          "properties": {
+            "name": {"type": "string"}
+          }
+        }
       }
     }
   }
 }"""
+citation_content = """Title: Attention Is All You Need
+Authors: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Łukasz Kaiser, Illia Polosukhin
+This paper introduces the Transformer, a novel neural network architecture based solely on attention mechanisms, dispensing with recurrence and convolutions entirely.
+Published in: Advances in Neural Information Processing Systems, Volume 30, 2017
+Publisher: Curran Associates, Inc.
+URL: https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+The Transformer uses multi-head self-attention to draw global dependencies between input and output. The model achieves state-of-the-art results on machine translation tasks while being more parallelizable and requiring significantly less time to train.
+Keywords: attention mechanism, transformer, neural networks, machine translation, self-attention
+This work has become foundational for modern NLP models including BERT, GPT, and T5."""
 contract_schema = """{
   "type": "object",
   "properties": {
+    "contract_type": {"type": "string"},
     "parties": {
       "type": "array",
       "items": {
         "properties": {
           "name": {"type": "string"},
           "type": {"type": "string"},
+          "address": {"type": "string"}
         }
       }
     },
+    "contract_value": {"type": "string"},
+    "payment_terms": {"type": "string"},
+    "duration": {"type": "string"},
+    "start_date": {"type": "string"},
     "deliverables": {
       "type": "array",
       "items": {
         "type": "object",
         "properties": {
           "name": {"type": "string"},
           "deadline": {"type": "string"},
+          "description": {"type": "string"}
         }
       }
+    },
+    "key_terms": {
+      "type": "object",
+      "properties": {
+        "liability_cap": {"type": "string"},
+        "termination_notice": {"type": "string"},
+        "intellectual_property": {"type": "string"}
+      }
     }
   }
 }"""
 contract_content = """SOFTWARE DEVELOPMENT AGREEMENT
+This Agreement is made between:
+Client: TechCorp Inc., 123 Business Ave, San Francisco, CA 94105
+Contractor: DevStudio LLC, 456 Developer St, Austin, TX 78701
+Contract Value: $150,000
+Payment Terms: 50% upfront, 50% upon completion
+Duration: 6 months
+Start Date: January 1, 2024
 DELIVERABLES:
+1. Web Application Development
+   - Complete e-commerce platform with user authentication
+   - Deadline: March 15, 2024
+2. Mobile App Development
    - iOS and Android applications
+   - Deadline: May 1, 2024
+3. API Integration
+   - Third-party payment processing integration
+   - Deadline: April 15, 2024
+KEY TERMS:
+- Liability is capped at the total contract value
+- Either party may terminate with 30 days written notice
+- All intellectual property developed under this agreement belongs to the Client
+- Contractor agrees to maintain confidentiality of all proprietary information"""
+with gr.Blocks(title="Unstructured to Structured Converter", theme=gr.themes.Default()) as app:
+    gr.Markdown("# Unstructured to Structured JSON Converter")
+    gr.Markdown("Convert any unstructured text into structured JSON following complex schemas")
     with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Input Content")
             content_input = gr.Textbox(
+                label="Document Content",
+                placeholder="Enter your unstructured text here...",
+                lines=12,
+                max_lines=20
             )
+        with gr.Column():
+            gr.Markdown("### JSON Schema")
             schema_input = gr.Textbox(
+                label="Target Schema",
+                placeholder="Enter your JSON schema here...",
+                lines=12,
+                max_lines=20,
                 value=github_schema
             )
     with gr.Row():
+        extract_btn = gr.Button("Extract Data", variant="primary")
+        clear_btn = gr.Button("Clear")
     with gr.Row():
         with gr.Column(scale=2):
+            gr.Markdown("### Extracted Data")
             output_json = gr.Textbox(
+                label="JSON Output",
+                lines=15,
                 show_copy_button=True
             )
         with gr.Column(scale=1):
+            gr.Markdown("### Results")
+            confidence_output = gr.Textbox(label="Confidence")
+            metadata_output = gr.Textbox(label="Analysis", lines=8)
+            status_output = gr.Textbox(label="Status")
+    gr.Markdown("### Test Cases")
     gr.Examples(
         examples=[
             [github_content, github_schema],
             [resume_content, resume_schema],
+            [citation_content, citation_schema],
             [contract_content, contract_schema]
         ],
         inputs=[content_input, schema_input],
+        label="Select a test case:"
     )
     gr.Markdown("""
+    ### System Features
+    - **Schema Complexity**: Supports 6+ levels nesting, 250+ fields, unlimited enums
+    - **Document Size**: Handles 50+ page documents and 10MB+ files
+    - **Dynamic Scaling**: Cost ranges from $0.01 to $5.00 based on complexity
+    - **Quality Assurance**: Confidence scoring with human review routing
     """)
     extract_btn.click(