Spaces:

SauravCh11
/

PassportExtrator

Runtime error

App Files Files Community

Sandy2636 commited on 16 days ago

Commit

ba14e67

1 Parent(s): e08f157

Add application file

Browse files

Files changed (3) hide show

.gradio/certificate.pem +31 -0
app.py +390 -198
old_app.py +267 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py CHANGED Viewed

@@ -1,70 +1,105 @@
 import gradio as gr
-import requests
 import base64
-import os
 import json
-import mimetypes
 # --- Configuration ---
-OPENROUTER_API_KEY = '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'
-IMAGE_MODEL = "opengvlab/internvl3-14b:free"
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
-# --- Application State ---
-current_batch = []
 # --- Helper Functions ---
-def generate_extraction_prompt(doc_type_provided_by_user):
-    prompt = f"""You are an advanced OCR and information extraction AI.
-The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
 Your task is to meticulously analyze this image and extract all relevant information.
 Output Format Instructions:
 Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
 The JSON object should have the following top-level keys:
-- "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}".
-- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
 - "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
-    - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
-    - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
-    - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
 - "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
     - "raw_mrz_lines": (array of strings) Each line of the MRZ.
-    - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
     If no MRZ, this field should be null.
-- "multilingual_info": (array of objects or null) For any text segments not in English:
-    - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
-    If no non-English text, this field can be null or an empty array.
 - "full_text_ocr": (string) Concatenation of all text found on the document.
 Extraction Guidelines:
-1.  Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
-2.  Extract all visible text, including small print, stamps, and handwritten annotations if legible.
 3.  For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
-4.  If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
-5.  If the document is multi-page and only one page is provided, note this if apparent.
 Ensure the entire output strictly adheres to the JSON format.
 """
-    return prompt
-def process_single_image_with_openrouter(image_path, doc_type):
     if not OPENROUTER_API_KEY:
-        return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
     try:
-        with open(image_path, "rb") as f:
-            encoded_image_bytes = f.read()
-            encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
-        mime_type, _ = mimetypes.guess_type(image_path)
-        if not mime_type:
-            ext = os.path.splitext(image_path)[1].lower()
-            if ext == ".png": mime_type = "image/png"
-            elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg"
-            elif ext == ".webp": mime_type = "image/webp"
-            else: mime_type = "image/jpeg"
-        data_url = f"data:{mime_type};base64,{encoded_image_string}"
-        prompt_text = generate_extraction_prompt(doc_type)
         payload = {
             "model": IMAGE_MODEL,
             "messages": [
@@ -76,192 +111,349 @@ def process_single_image_with_openrouter(image_path, doc_type):
                     ]
                 }
             ],
-            "max_tokens": 3000,
             "temperature": 0.1,
         }
         headers = {
             "Authorization": f"Bearer {OPENROUTER_API_KEY}",
             "Content-Type": "application/json",
-            "HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME",
-            "X-Title": "Gradio Document Extractor"
         }
-        print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
-        response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120)
         response.raise_for_status()
         result = response.json()
-        print(f"Received response from OpenRouter. Status: {response.status_code}")
         if "choices" in result and result["choices"]:
-            content_text = result["choices"][0]["message"]["content"]
-            clean_content = content_text.strip()
-            if clean_content.startswith("```json"):
-                clean_content = clean_content[7:]
-                if clean_content.endswith("```"):
-                    clean_content = clean_content[:-3]
-            elif clean_content.startswith("`") and clean_content.endswith("`"):
-                 clean_content = clean_content[1:-1]
-            try:
-                parsed_json = json.loads(clean_content)
-                if "document_type_provided" not in parsed_json:
-                    parsed_json["document_type_provided"] = doc_type
-                return parsed_json
-            except json.JSONDecodeError as e:
-                print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}")
-                return {
-                    "error": "Failed to parse LLM output as JSON.",
-                    "raw_content_from_llm": content_text,
-                    "document_type_provided": doc_type
-                }
         else:
-            print(f"No 'choices' in API response: {result}")
-            return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
     except requests.exceptions.Timeout:
-        print(f"API Request Timeout for {os.path.basename(image_path)}")
-        return {"error": "API request timed out.", "document_type_provided": doc_type}
     except requests.exceptions.RequestException as e:
         error_message = f"API Request Error: {str(e)}"
-        if e.response is not None:
             error_message += f" Status: {e.response.status_code}, Response: {e.response.text}"
-        print(error_message)
-        return {"error": error_message, "document_type_provided": doc_type}
     except Exception as e:
-        print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
-        return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
-def add_document_to_batch_ui(image_filepath, doc_type_selection):
-    global current_batch
-    if image_filepath and doc_type_selection:
-        filename = os.path.basename(image_filepath)
-        current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
-        batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
-        return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
-    batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
-    return batch_display_data, "Failed to add: Image or document type missing."
-def process_batch_ui():
-    global current_batch
-    if not OPENROUTER_API_KEY:
-        return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
-    if not current_batch:
-        return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
-    all_results = []
-    status_updates = []
-    for i, item_to_process in enumerate(current_batch):
-        status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
-        print(status_msg)
-        extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
-        all_results.append(extracted_data)
-        if "error" in extracted_data:
-            status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
-        else:
-            status_updates.append(f"Successfully processed {item_to_process['filename']}.")
-    grouped_by_person = {}
-    unidentified_docs = []
-    for result_item in all_results:
-        doc_id = None
-        if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
-            fields = result_item["extracted_fields"]
-            passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
-            name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
-            surname = fields.get("Surname") or fields.get("Family Name")
-            dob = fields.get("Date of Birth") or fields.get("DOB")
-            if passport_no:
-                doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
-            elif name and surname and dob:
-                doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
-            elif name and surname:
-                 doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
-        if doc_id:
-            if doc_id not in grouped_by_person:
-                grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
-            grouped_by_person[doc_id]["documents"].append(result_item)
-        else:
-            unidentified_docs.append(result_item)
-    final_structured_output = {
-        "summary": f"Processed {len(current_batch)} documents.",
-        "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [],
-        "unidentified_documents_or_errors": unidentified_docs
     }
-    final_status = "Batch processing complete. " + " | ".join(status_updates)
-    print(final_status)
-    return final_structured_output, final_status
-def clear_batch_ui():
-    global current_batch
-    current_batch = []
-    return [], "Batch cleared successfully."
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 📄 Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
     gr.Markdown(
-        "**Instructions:**\n"
-        "1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n"
-        "2. Select the correct document type.\n"
-        "3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n"
-        "4. Review the batch. Click 'Clear Entire Batch' to start over.\n"
-        "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
-        "6. View the extracted information in JSON format below."
     )
     if not OPENROUTER_API_KEY:
-        gr.Markdown(
-            "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
-            "API calls will fail. Please set it and restart this application.</h3>"
-        )
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### Step 1: Add Document")
-            image_input = gr.Image(
-                label="Upload Document Image",
-                type="filepath",
-                sources=["upload"],
-                height=300
-            )
-            doc_type_choices = [
-                'passport_front', 'passport_back', 'national_id_front', 'national_id_back',
-                'drivers_license_front', 'drivers_license_back', 'visa_sticker',
-                'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document'
-            ]
-            doc_type_input = gr.Dropdown(
-                label="Select Document Type",
-                choices=doc_type_choices,
-                value='passport_front',
-                filterable=True
-            )
-            add_button = gr.Button("➕ Add Document to Current Batch", variant="secondary")
-        with gr.Column(scale=2):
-            gr.Markdown("### Step 2: Review Current Batch")
-            batch_dataframe = gr.Dataframe(
-                headers=["Filename", "Document Type"],
-                datatype=["str", "str"],
-                row_count=1,  # Changed: Start with 1 row, should grow dynamically
-                col_count=2,  # Changed: Simpler integer for fixed columns
-                wrap=True
-            )
-            clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
-    gr.Markdown("### Step 3: Process Batch")
-    process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
-    status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
-    gr.Markdown("### Step 4: View Results")
-    output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
-    add_button.click(
-        fn=add_document_to_batch_ui,
-        inputs=[image_input, doc_type_input],
-        outputs=[batch_dataframe, status_message_textbox]
-    ).then(lambda: None, outputs=image_input)
-    clear_batch_button.click(
-        fn=clear_batch_ui,
-        inputs=[],
-        outputs=[batch_dataframe, status_message_textbox]
     )
     process_button.click(
-        fn=process_batch_ui,
-        inputs=[],
-        outputs=[output_json_display, status_message_textbox]
     )
 if __name__ == "__main__":
-    if not OPENROUTER_API_KEY:
-        print("ERROR: The OPENROUTER_API_KEY environment variable is not set.")
-        print("Please set it before running the application, e.g.:")
-        print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
-        print("The application will launch, but API calls will fail.")
-    demo.launch(share=True) # Added share=True

 import gradio as gr
 import base64
+import requests
 import json
+import re
+import os
+import uuid
+from datetime import datetime
 # --- Configuration ---
+# IMPORTANT: Set your OPENROUTER_API_KEY as a Hugging Face Space Secret
+OPENROUTER_API_KEY = "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"
+IMAGE_MODEL = "opengvlab/internvl3-14b:free" # Using the free tier model as specified
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+# --- Global State (managed within Gradio's session if possible, or module-level for simplicity here) ---
+# This will be reset each time the processing function is called.
+# For a multi-user or more robust app, session state or a proper backend DB would be needed.
+processed_files_data = [] # Stores dicts for each file's details and status
+person_profiles = {}      # Stores dicts for each identified person and their documents
 # --- Helper Functions ---
+def extract_json_from_text(text):
+    """
+    Extracts a JSON object from a string, trying common markdown and direct JSON.
+    """
+    if not text:
+        return {"error": "Empty text provided for JSON extraction."}
+    # Try to match ```json ... ``` code block
+    match_block = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
+    if match_block:
+        json_str = match_block.group(1)
+    else:
+        # If no block, assume the text itself might be JSON or wrapped in single backticks
+        text_stripped = text.strip()
+        if text_stripped.startswith("`") and text_stripped.endswith("`"):
+            json_str = text_stripped[1:-1]
+        else:
+            json_str = text_stripped # Assume it's direct JSON
+    try:
+        return json.loads(json_str)
+    except json.JSONDecodeError as e:
+        # Fallback: Try to find the first '{' and last '}' if initial parsing fails
+        try:
+            first_brace = json_str.find('{')
+            last_brace = json_str.rfind('}')
+            if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
+                potential_json_str = json_str[first_brace : last_brace+1]
+                return json.loads(potential_json_str)
+            else:
+                return {"error": f"Invalid JSON structure: {str(e)}", "original_text": text}
+        except json.JSONDecodeError as e2:
+             return {"error": f"Invalid JSON structure after attempting substring: {str(e2)}", "original_text": text}
+def get_ocr_prompt():
+    return f"""You are an advanced OCR and information extraction AI.
 Your task is to meticulously analyze this image and extract all relevant information.
 Output Format Instructions:
 Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
 The JSON object should have the following top-level keys:
+- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Bank Statement", "Photo of a person").
 - "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
+    - For passports/IDs: "Surname", "Given Names", "Full Name", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
+    - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date".
+    - For bank statements: "Account Holder Name", "Account Number", "Bank Name", "Statement Period", "Ending Balance".
+    - For photos: "Description" (e.g., "Portrait of a person", "Group photo at a location"), "People Present" (array of strings if multiple).
 - "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
     - "raw_mrz_lines": (array of strings) Each line of the MRZ.
+    - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields.
     If no MRZ, this field should be null.
 - "full_text_ocr": (string) Concatenation of all text found on the document.
 Extraction Guidelines:
+1.  Prioritize accuracy.
+2.  Extract all visible text. Include "Full Name" by combining given and surnames if possible.
 3.  For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
 Ensure the entire output strictly adheres to the JSON format.
 """
+def call_openrouter_ocr(image_filepath):
     if not OPENROUTER_API_KEY:
+        return {"error": "OpenRouter API Key not configured."}
     try:
+        with open(image_filepath, "rb") as f:
+            encoded_image = base64.b64encode(f.read()).decode("utf-8")
+        # Basic MIME type guessing, default to jpeg
+        mime_type = "image/jpeg"
+        if image_filepath.lower().endswith(".png"):
+            mime_type = "image/png"
+        elif image_filepath.lower().endswith(".webp"):
+            mime_type = "image/webp"
+        data_url = f"data:{mime_type};base64,{encoded_image}"
+        prompt_text = get_ocr_prompt()
         payload = {
             "model": IMAGE_MODEL,
             "messages": [
                     ]
                 }
             ],
+            "max_tokens": 3500, # Increased for detailed JSON
             "temperature": 0.1,
         }
         headers = {
             "Authorization": f"Bearer {OPENROUTER_API_KEY}",
             "Content-Type": "application/json",
+            "HTTP-Referer": "https://huggingface.co/spaces/DoClassifier", # Optional: Update with your Space URL
+            "X-Title": "DoClassifier Processor" # Optional
         }
+        response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=180) # 3 min timeout
         response.raise_for_status()
         result = response.json()
         if "choices" in result and result["choices"]:
+            raw_content = result["choices"][0]["message"]["content"]
+            return extract_json_from_text(raw_content)
         else:
+            return {"error": "No 'choices' in API response from OpenRouter.", "details": result}
     except requests.exceptions.Timeout:
+        return {"error": "API request timed out."}
     except requests.exceptions.RequestException as e:
         error_message = f"API Request Error: {str(e)}"
+        if hasattr(e, 'response') and e.response is not None:
             error_message += f" Status: {e.response.status_code}, Response: {e.response.text}"
+        return {"error": error_message}
     except Exception as e:
+        return {"error": f"An unexpected error occurred during OCR: {str(e)}"}
+def extract_entities_from_ocr(ocr_json):
+    if not ocr_json or "extracted_fields" not in ocr_json or not isinstance(ocr_json["extracted_fields"], dict):
+        return {"name": None, "dob": None, "passport_no": None, "doc_type": ocr_json.get("document_type_detected", "Unknown")}
+    fields = ocr_json["extracted_fields"]
+    doc_type = ocr_json.get("document_type_detected", "Unknown")
+    # Normalize potential field names (case-insensitive search)
+    name_keys = ["full name", "name", "account holder name", "guest name"]
+    dob_keys = ["date of birth", "dob"]
+    passport_keys = ["document number", "passport number"]
+    extracted_name = None
+    for key in name_keys:
+        for field_key, value in fields.items():
+            if key == field_key.lower():
+                extracted_name = str(value) if value else None
+                break
+        if extracted_name:
+            break
+    extracted_dob = None
+    for key in dob_keys:
+        for field_key, value in fields.items():
+            if key == field_key.lower():
+                extracted_dob = str(value) if value else None
+                break
+        if extracted_dob:
+            break
+    extracted_passport_no = None
+    for key in passport_keys:
+        for field_key, value in fields.items():
+            if key == field_key.lower():
+                extracted_passport_no = str(value).replace(" ", "").upper() if value else None # Normalize
+                break
+        if extracted_passport_no:
+            break
+    return {
+        "name": extracted_name,
+        "dob": extracted_dob,
+        "passport_no": extracted_passport_no,
+        "doc_type": doc_type
+    }
+def normalize_name(name):
+    if not name: return ""
+    return "".join(filter(str.isalnum, name)).lower()
+def get_person_id_and_update_profiles(doc_id, entities, current_persons_data):
+    """
+    Tries to assign a document to an existing person or creates a new one.
+    Returns a person_key.
+    Updates current_persons_data in place.
+    """
+    passport_no = entities.get("passport_no")
+    name = entities.get("name")
+    dob = entities.get("dob")
+    # 1. Match by Passport Number (strongest identifier)
+    if passport_no:
+        for p_key, p_data in current_persons_data.items():
+            if passport_no in p_data.get("passport_numbers", set()):
+                p_data["doc_ids"].add(doc_id)
+                # Update person profile with potentially new name/dob if current is missing
+                if name and not p_data.get("canonical_name"): p_data["canonical_name"] = name
+                if dob and not p_data.get("canonical_dob"): p_data["canonical_dob"] = dob
+                return p_key
+        # New person based on passport number
+        new_person_key = f"person_{passport_no}" # Or more robust ID generation
+        current_persons_data[new_person_key] = {
+            "canonical_name": name,
+            "canonical_dob": dob,
+            "names": {normalize_name(name)} if name else set(),
+            "dobs": {dob} if dob else set(),
+            "passport_numbers": {passport_no},
+            "doc_ids": {doc_id},
+            "display_name": name or f"Person (ID: {passport_no})"
+        }
+        return new_person_key
+    # 2. Match by Normalized Name + DOB (if passport not found or not present)
+    if name and dob:
+        norm_name = normalize_name(name)
+        composite_key_nd = f"{norm_name}_{dob}"
+        for p_key, p_data in current_persons_data.items():
+            # Check if this name and dob combo has been seen for this person
+            if norm_name in p_data.get("names", set()) and dob in p_data.get("dobs", set()):
+                p_data["doc_ids"].add(doc_id)
+                return p_key
+        # New person based on name and DOB
+        new_person_key = f"person_{composite_key_nd}_{str(uuid.uuid4())[:4]}"
+        current_persons_data[new_person_key] = {
+            "canonical_name": name,
+            "canonical_dob": dob,
+            "names": {norm_name},
+            "dobs": {dob},
+            "passport_numbers": set(),
+            "doc_ids": {doc_id},
+            "display_name": name
+        }
+        return new_person_key
+    # 3. If only name, less reliable, create new person (could add fuzzy matching later)
+    if name:
+        norm_name = normalize_name(name)
+        # Check if a person with just this name exists and has no other strong identifiers yet
+        # This part can be made more robust, for now, it might create more splits
+        new_person_key = f"person_{norm_name}_{str(uuid.uuid4())[:4]}"
+        current_persons_data[new_person_key] = {
+            "canonical_name": name, "canonical_dob": None,
+            "names": {norm_name}, "dobs": set(), "passport_numbers": set(),
+            "doc_ids": {doc_id}, "display_name": name
+        }
+        return new_person_key
+    # 4. Unclassifiable for now, assign a generic unique person key
+    generic_person_key = f"unidentified_person_{str(uuid.uuid4())[:6]}"
+    current_persons_data[generic_person_key] = {
+        "canonical_name": "Unknown", "canonical_dob": None,
+        "names": set(), "dobs": set(), "passport_numbers": set(),
+        "doc_ids": {doc_id}, "display_name": f"Unknown Person ({doc_id[:6]})"
     }
+    return generic_person_key
+def format_dataframe_data(current_files_data):
+    # Headers for the dataframe
+    # "ID", "Filename", "Status", "Detected Type", "Extracted Name", "Extracted DOB", "Main ID", "Person Key"
+    df_rows = []
+    for f_data in current_files_data:
+        entities = f_data.get("entities") or {}
+        df_rows.append([
+            f_data["doc_id"][:8], # Short ID
+            f_data["filename"],
+            f_data["status"],
+            entities.get("doc_type", "N/A"),
+            entities.get("name", "N/A"),
+            entities.get("dob", "N/A"),
+            entities.get("passport_no", "N/A"),
+            f_data.get("assigned_person_key", "N/A")
+        ])
+    return df_rows
+def format_persons_markdown(current_persons_data, current_files_data):
+    if not current_persons_data:
+        return "No persons identified yet."
+    md_parts = ["## Classified Persons & Documents\n"]
+    for p_key, p_data in current_persons_data.items():
+        display_name = p_data.get('display_name', p_key)
+        md_parts.append(f"### Person: {display_name} (Profile Key: {p_key})")
+        if p_data.get("canonical_dob"): md_parts.append(f"* DOB: {p_data['canonical_dob']}")
+        if p_data.get("passport_numbers"): md_parts.append(f"* Passport(s): {', '.join(p_data['passport_numbers'])}")
+        md_parts.append("* Documents:")
+        doc_ids_for_person = p_data.get("doc_ids", set())
+        if doc_ids_for_person:
+            for doc_id in doc_ids_for_person:
+                # Find the filename and detected type from current_files_data
+                doc_detail = next((f for f in current_files_data if f["doc_id"] == doc_id), None)
+                if doc_detail:
+                    filename = doc_detail["filename"]
+                    doc_type = doc_detail.get("entities", {}).get("doc_type", "Unknown Type")
+                    md_parts.append(f"  - {filename} (`{doc_type}`)")
+                else:
+                    md_parts.append(f"  - Document ID: {doc_id[:8]} (details not found, unexpected)")
+        else:
+            md_parts.append("  - No documents currently assigned.")
+        md_parts.append("\n---\n")
+    return "\n".join(md_parts)
+# --- Main Gradio Processing Function (Generator) ---
+def process_uploaded_files(files_list, progress=gr.Progress(track_tqdm=True)):
+    global processed_files_data, person_profiles # Reset global state for each run
+    processed_files_data = []
+    person_profiles = {}
+    if not OPENROUTER_API_KEY:
+        yield (
+            [["N/A", "ERROR", "OpenRouter API Key not configured.", "N/A", "N/A", "N/A", "N/A", "N/A"]],
+            "Error: OpenRouter API Key not configured. Please set it in Space Secrets.",
+            "{}", "API Key Missing. Processing halted."
+        )
+        return
+    if not files_list:
+        yield ([], "No files uploaded.", "{}", "Upload files to begin.")
+        return
+    # Initialize processed_files_data
+    for i, file_obj in enumerate(files_list):
+        doc_uid = str(uuid.uuid4())
+        processed_files_data.append({
+            "doc_id": doc_uid,
+            "filename": os.path.basename(file_obj.name), # file_obj.name is the temp path
+            "filepath": file_obj.name,
+            "status": "Queued",
+            "ocr_json": None,
+            "entities": None,
+            "assigned_person_key": None
+        })
+    initial_df_data = format_dataframe_data(processed_files_data)
+    initial_persons_md = format_persons_markdown(person_profiles, processed_files_data)
+    yield (initial_df_data, initial_persons_md, "{}", f"Initialized. Found {len(files_list)} files.")
+    # Iterate and process each file
+    for i, file_data_item in enumerate(progress.tqdm(processed_files_data, desc="Processing Documents")):
+        current_doc_id = file_data_item["doc_id"]
+        current_filename = file_data_item["filename"]
+        # 1. OCR Processing
+        file_data_item["status"] = "OCR in Progress..."
+        df_data = format_dataframe_data(processed_files_data)
+        persons_md = format_persons_markdown(person_profiles, processed_files_data) # No change yet
+        yield (df_data, persons_md, "{}", f"({i+1}/{len(processed_files_data)}) OCR for: {current_filename}")
+        ocr_result = call_openrouter_ocr(file_data_item["filepath"])
+        file_data_item["ocr_json"] = ocr_result # Store full JSON
+        if "error" in ocr_result:
+            file_data_item["status"] = f"OCR Error: {ocr_result['error'][:50]}..." # Truncate long errors
+            df_data = format_dataframe_data(processed_files_data)
+            yield (df_data, persons_md, json.dumps(ocr_result, indent=2), f"({i+1}/{len(processed_files_data)}) OCR Error on {current_filename}")
+            continue # Move to next file
+        file_data_item["status"] = "OCR Done. Extracting Entities..."
+        df_data = format_dataframe_data(processed_files_data)
+        yield (df_data, persons_md, json.dumps(ocr_result, indent=2), f"({i+1}/{len(processed_files_data)}) OCR Done for {current_filename}")
+        # 2. Entity Extraction
+        entities = extract_entities_from_ocr(ocr_result)
+        file_data_item["entities"] = entities
+        file_data_item["status"] = "Entities Extracted. Classifying..."
+        df_data = format_dataframe_data(processed_files_data) # Now entities will show up
+        yield (df_data, persons_md, json.dumps(ocr_result, indent=2), f"({i+1}/{len(processed_files_data)}) Entities for {current_filename}")
+        # 3. Person Classification / Linking
+        person_key = get_person_id_and_update_profiles(current_doc_id, entities, person_profiles)
+        file_data_item["assigned_person_key"] = person_key
+        file_data_item["status"] = "Classified"
+        df_data = format_dataframe_data(processed_files_data)
+        persons_md = format_persons_markdown(person_profiles, processed_files_data) # Now persons_md updates
+        yield (df_data, persons_md, json.dumps(ocr_result, indent=2), f"({i+1}/{len(processed_files_data)}) Classified {current_filename} -> {person_key}")
+    final_df_data = format_dataframe_data(processed_files_data)
+    final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
+    yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} documents processed.")
+# --- Gradio UI Layout ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📄 Intelligent Document Processor & Classifier")
     gr.Markdown(
+        "**Upload multiple documents (images of passports, bank statements, hotel reservations, photos, etc.). "
+        "The system will perform OCR, attempt to extract key entities, and classify documents by the person they belong to.**\n"
+        "Ensure `OPENROUTER_API_KEY` is set as a Secret in your Hugging Face Space."
     )
     if not OPENROUTER_API_KEY:
+        gr.Markdown("<h3 style='color:red;'>⚠️ ERROR: `OPENROUTER_API_KEY` is not set in Space Secrets! OCR will fail.</h3>")
     with gr.Row():
         with gr.Column(scale=1):
+            files_input = gr.Files(label="Upload Document Images (Bulk)", file_count="multiple", type="filepath")
+            process_button = gr.Button("Process Uploaded Documents", variant="primary")
+            overall_status_textbox = gr.Textbox(label="Overall Progress", interactive=False, lines=1)
+    gr.Markdown("---")
+    gr.Markdown("## Document Processing Details")
+    # "ID", "Filename", "Status", "Detected Type", "Extracted Name", "Extracted DOB", "Main ID", "Person Key"
+    dataframe_headers = ["Doc ID (short)", "Filename", "Status", "Detected Type", "Name", "DOB", "Passport No.", "Assigned Person Key"]
+    document_status_df = gr.Dataframe(
+        headers=dataframe_headers,
+        datatype=["str"] * len(dataframe_headers), # All as strings for display simplicity
+        label="Individual Document Status & Extracted Entities",
+        row_count=(0, "dynamic"), # Start empty, dynamically grows
+        col_count=(len(dataframe_headers), "fixed"),
+        wrap=True
     )
+    ocr_json_output = gr.Code(label="Selected Document OCR JSON", language="json", interactive=False)
+    gr.Markdown("---")
+    person_classification_output_md = gr.Markdown("## Classified Persons & Documents\nNo persons identified yet.")
+    # Event Handlers
     process_button.click(
+        fn=process_uploaded_files,
+        inputs=[files_input],
+        outputs=[
+            document_status_df,
+            person_classification_output_md,
+            ocr_json_output, # Temporarily show last OCR here, better if select event works
+            overall_status_textbox
+        ]
     )
+    @document_status_df.select(inputs=None, outputs=ocr_json_output, show_progress="hidden")
+    def display_selected_ocr(evt: gr.SelectData):
+        if evt.index is None or evt.index[0] is None: # evt.index is (row, col)
+            return "{}" # Nothing selected or invalid selection
+        selected_row_index = evt.index[0]
+        if selected_row_index < len(processed_files_data):
+            selected_doc_data = processed_files_data[selected_row_index]
+            if selected_doc_data and selected_doc_data["ocr_json"]:
+                return json.dumps(selected_doc_data["ocr_json"], indent=2)
+        return "{ \"message\": \"No OCR data found for selected row or selection out of bounds.\" }"
 if __name__ == "__main__":
+    demo.queue().launch(debug=True, share=True) # Use queue for longer processes, share=True for Spaces

old_app.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import gradio as gr
+import requests
+import base64
+import os
+import json
+import mimetypes
+# --- Configuration ---
+OPENROUTER_API_KEY = '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'
+IMAGE_MODEL = "opengvlab/internvl3-14b:free"
+OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+# --- Application State ---
+current_batch = []
+# --- Helper Functions ---
+def generate_extraction_prompt(doc_type_provided_by_user):
+    prompt = f"""You are an advanced OCR and information extraction AI.
+The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
+Your task is to meticulously analyze this image and extract all relevant information.
+Output Format Instructions:
+Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
+The JSON object should have the following top-level keys:
+- "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}".
+- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
+- "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
+    - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
+    - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
+    - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
+- "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
+    - "raw_mrz_lines": (array of strings) Each line of the MRZ.
+    - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
+    If no MRZ, this field should be null.
+- "multilingual_info": (array of objects or null) For any text segments not in English:
+    - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
+    If no non-English text, this field can be null or an empty array.
+- "full_text_ocr": (string) Concatenation of all text found on the document.
+Extraction Guidelines:
+1.  Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
+2.  Extract all visible text, including small print, stamps, and handwritten annotations if legible.
+3.  For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
+4.  If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
+5.  If the document is multi-page and only one page is provided, note this if apparent.
+Ensure the entire output strictly adheres to the JSON format.
+"""
+    return prompt
+def process_single_image_with_openrouter(image_path, doc_type):
+    if not OPENROUTER_API_KEY:
+        return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
+    try:
+        with open(image_path, "rb") as f:
+            encoded_image_bytes = f.read()
+            encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
+        mime_type, _ = mimetypes.guess_type(image_path)
+        if not mime_type:
+            ext = os.path.splitext(image_path)[1].lower()
+            if ext == ".png": mime_type = "image/png"
+            elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg"
+            elif ext == ".webp": mime_type = "image/webp"
+            else: mime_type = "image/jpeg"
+        data_url = f"data:{mime_type};base64,{encoded_image_string}"
+        prompt_text = generate_extraction_prompt(doc_type)
+        payload = {
+            "model": IMAGE_MODEL,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt_text},
+                        {"type": "image_url", "image_url": {"url": data_url}}
+                    ]
+                }
+            ],
+            "max_tokens": 3000,
+            "temperature": 0.1,
+        }
+        headers = {
+            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://huggingface.co/spaces/Passport_Extractor",
+            "X-Title": "Document Classifier"
+        }
+        print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
+        response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120)
+        response.raise_for_status()
+        result = response.json()
+        print(f"Received response from OpenRouter. Status: {response.status_code}")
+        if "choices" in result and result["choices"]:
+            content_text = result["choices"][0]["message"]["content"]
+            clean_content = content_text.strip()
+            if clean_content.startswith("```json"):
+                clean_content = clean_content[7:]
+                if clean_content.endswith("```"):
+                    clean_content = clean_content[:-3]
+            elif clean_content.startswith("`") and clean_content.endswith("`"):
+                 clean_content = clean_content[1:-1]
+            try:
+                parsed_json = json.loads(clean_content)
+                if "document_type_provided" not in parsed_json:
+                    parsed_json["document_type_provided"] = doc_type
+                return parsed_json
+            except json.JSONDecodeError as e:
+                print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}")
+                return {
+                    "error": "Failed to parse LLM output as JSON.",
+                    "raw_content_from_llm": content_text,
+                    "document_type_provided": doc_type
+                }
+        else:
+            print(f"No 'choices' in API response: {result}")
+            return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
+    except requests.exceptions.Timeout:
+        print(f"API Request Timeout for {os.path.basename(image_path)}")
+        return {"error": "API request timed out.", "document_type_provided": doc_type}
+    except requests.exceptions.RequestException as e:
+        error_message = f"API Request Error: {str(e)}"
+        if e.response is not None:
+            error_message += f" Status: {e.response.status_code}, Response: {e.response.text}"
+        print(error_message)
+        return {"error": error_message, "document_type_provided": doc_type}
+    except Exception as e:
+        print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
+        return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
+def add_document_to_batch_ui(image_filepath, doc_type_selection):
+    global current_batch
+    if image_filepath and doc_type_selection:
+        filename = os.path.basename(image_filepath)
+        current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
+        batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
+        return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
+    batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
+    return batch_display_data, "Failed to add: Image or document type missing."
+def process_batch_ui():
+    global current_batch
+    if not OPENROUTER_API_KEY:
+        return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
+    if not current_batch:
+        return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
+    all_results = []
+    status_updates = []
+    for i, item_to_process in enumerate(current_batch):
+        status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
+        print(status_msg)
+        extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
+        all_results.append(extracted_data)
+        if "error" in extracted_data:
+            status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
+        else:
+            status_updates.append(f"Successfully processed {item_to_process['filename']}.")
+    grouped_by_person = {}
+    unidentified_docs = []
+    for result_item in all_results:
+        doc_id = None
+        if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
+            fields = result_item["extracted_fields"]
+            passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
+            name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
+            surname = fields.get("Surname") or fields.get("Family Name")
+            dob = fields.get("Date of Birth") or fields.get("DOB")
+            if passport_no:
+                doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
+            elif name and surname and dob:
+                doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
+            elif name and surname:
+                 doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
+        if doc_id:
+            if doc_id not in grouped_by_person:
+                grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
+            grouped_by_person[doc_id]["documents"].append(result_item)
+        else:
+            unidentified_docs.append(result_item)
+    final_structured_output = {
+        "summary": f"Processed {len(current_batch)} documents.",
+        "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [],
+        "unidentified_documents_or_errors": unidentified_docs
+    }
+    final_status = "Batch processing complete. " + " | ".join(status_updates)
+    print(final_status)
+    return final_structured_output, final_status
+def clear_batch_ui():
+    global current_batch
+    current_batch = []
+    return [], "Batch cleared successfully."
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📄 Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
+    gr.Markdown(
+        "**Instructions:**\n"
+        "1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n"
+        "2. Select the correct document type.\n"
+        "3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n"
+        "4. Review the batch. Click 'Clear Entire Batch' to start over.\n"
+        "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
+        "6. View the extracted information in JSON format below."
+    )
+    if not OPENROUTER_API_KEY:
+        gr.Markdown(
+            "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
+            "API calls will fail. Please set it and restart this application.</h3>"
+        )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Step 1: Add Document")
+            image_input = gr.Image(
+                label="Upload Document Image",
+                type="filepath",
+                sources=["upload"],
+                height=300
+            )
+            doc_type_choices = [
+                'passport_front', 'passport_back', 'national_id_front', 'national_id_back',
+                'drivers_license_front', 'drivers_license_back', 'visa_sticker',
+                'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document'
+            ]
+            doc_type_input = gr.Dropdown(
+                label="Select Document Type",
+                choices=doc_type_choices,
+                value='passport_front',
+                filterable=True
+            )
+            add_button = gr.Button("➕ Add Document to Current Batch", variant="secondary")
+        with gr.Column(scale=2):
+            gr.Markdown("### Step 2: Review Current Batch")
+            batch_dataframe = gr.Dataframe(
+                headers=["Filename", "Document Type"],
+                datatype=["str", "str"],
+                row_count=1,  # Changed: Start with 1 row, should grow dynamically
+                col_count=2,  # Changed: Simpler integer for fixed columns
+                wrap=True
+            )
+            clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
+    gr.Markdown("### Step 3: Process Batch")
+    process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
+    status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
+    gr.Markdown("### Step 4: View Results")
+    output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
+    add_button.click(
+        fn=add_document_to_batch_ui,
+        inputs=[image_input, doc_type_input],
+        outputs=[batch_dataframe, status_message_textbox]
+    ).then(lambda: None, outputs=image_input)
+    clear_batch_button.click(
+        fn=clear_batch_ui,
+        inputs=[],
+        outputs=[batch_dataframe, status_message_textbox]
+    )
+    process_button.click(
+        fn=process_batch_ui,
+        inputs=[],
+        outputs=[output_json_display, status_message_textbox]
+    )
+if __name__ == "__main__":
+    if not OPENROUTER_API_KEY:
+        print("ERROR: The OPENROUTER_API_KEY environment variable is not set.")
+        print("Please set it before running the application, e.g.:")
+        print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
+        print("The application will launch, but API calls will fail.")
+    demo.launch(share=True) # Added share=True