Spaces:

SauravCh11
/

PassportExtrator

Runtime error

App Files Files Community

Sandy2636 commited on 25 days ago

Commit

3d827ec

1 Parent(s): 0a8e31d

Update space

Browse files

Files changed (1) hide show

app.py +329 -72

app.py CHANGED Viewed

@@ -1,91 +1,348 @@
 import gradio as gr
-import base64
 import requests
-from PIL import Image
-import io
-API_KEY = "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"
-IMAGE_MODEL = "OpenGVLab/InternVL3-14B"
-def extract_passport_info(images, document_type):
-    results = []
-    for image in images:
-        # Convert image to base64
-        buffered = io.BytesIO()
-        image.save(buffered, format="JPEG")
-        encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        data_url = f"data:image/jpeg;base64,{encoded_image}"
-        # Prompt to extract full passport data
-        prompt = (
-            f"Extract all passport information from the uploaded {document_type} image. "
-            "Include MRZ (if present), full name, passport number, nationality, gender, "
-            "date of birth, date of issue, expiry date, issuing country, and any other text or labels in other languages. "
-            "Return the result in a JSON format."
-        )
-        # OpenRouter Payload
         payload = {
             "model": IMAGE_MODEL,
             "messages": [
                 {
                     "role": "user",
                     "content": [
-                        {"type": "text", "text": prompt},
-                        {"type": "image_url", "image_url": {"url": data_url}},
-                    ],
                 }
             ],
         }
         headers = {
-            "Authorization": f"Bearer {API_KEY}",
-            "Content-Type": "application/json"
         }
-        try:
-            response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
-            result = response.json()
-            print("📡 Status:", response.status_code)
-            print("📡 Raw Result:", result)
-            if "choices" in result:
-                extracted = result["choices"][0]["message"]["content"]
-                results.append({
-                    "document_type": document_type,
-                    "extracted_info": extracted
-                })
-            else:
-                results.append({
-                    "document_type": document_type,
-                    "extracted_info": "❌ No data extracted"
-                })
-        except Exception as e:
-            results.append({
-                "document_type": document_type,
-                "extracted_info": f"⚠️ Error: {str(e)}"
-            })
-    return results
-# Gradio UI
-demo = gr.Interface(
-    fn=extract_passport_info,
-    inputs=[
-        gr.Image(type="pil", label="Upload Passport/Document Images", multiple=True),
-        gr.Dropdown(
-            choices=["passport_front", "passport_back", "photo", "hotel_reservation"],
-            label="Document Type",
-            value="passport_front",
         )
-    ],
-    outputs="json",
-    title="Passport & Document Info Extractor",
-    description="Upload one or more document images. Extracted information will include MRZ and all available text, structured in JSON format.",
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import requests
+import base64
+import os
+import json
+import mimetypes
+# --- Configuration ---
+# IMPORTANT: Set your OPENROUTER_API_KEY as an environment variable
+# For example, in your terminal: export OPENROUTER_API_KEY='your_key_here'
+OPENROUTER_API_KEY = "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"
+IMAGE_MODEL = "opengvlab/internvl3-14b:free"
+OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+# --- Application State ---
+# Global list to store documents in the current batch
+# Each item: {"path": "image_file_path", "type": "document_type_string", "filename": "display_filename"}
+current_batch = []
+# --- Helper Functions ---
+def generate_extraction_prompt(doc_type_provided_by_user):
+    """
+    Generates a detailed prompt for the LLM to extract information
+    and structure it as a JSON object.
+    """
+    prompt = f"""You are an advanced OCR and information extraction AI.
+The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
+Your task is to meticulously analyze this image and extract all relevant information.
+Output Format Instructions:
+Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
+The JSON object should have the following top-level keys:
+- "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}".
+- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
+- "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
+    - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
+    - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
+    - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
+- "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
+    - "raw_mrz_lines": (array of strings) Each line of the MRZ.
+    - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
+    If no MRZ, this field should be null.
+- "multilingual_info": (array of objects or null) For any text segments not in English:
+    - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
+    If no non-English text, this field can be null or an empty array.
+- "full_text_ocr": (string) Concatenation of all text found on the document.
+Extraction Guidelines:
+1.  Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
+2.  Extract all visible text, including small print, stamps, and handwritten annotations if legible.
+3.  For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
+4.  If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
+5.  If the document is multi-page and only one page is provided, note this if apparent.
+Ensure the entire output strictly adheres to the JSON format.
+"""
+    return prompt
+def process_single_image_with_openrouter(image_path, doc_type):
+    """
+    Encodes an image, sends it to OpenRouter with a generated prompt,
+    and attempts to parse the JSON response from the LLM.
+    """
+    if not OPENROUTER_API_KEY:
+        return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
+    try:
+        with open(image_path, "rb") as f:
+            encoded_image_bytes = f.read()
+            encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
+        mime_type, _ = mimetypes.guess_type(image_path)
+        if not mime_type:
+            # Fallback, try to infer from extension or default to common types
+            ext = os.path.splitext(image_path)[1].lower()
+            if ext == ".png": mime_type = "image/png"
+            elif ext == ".jpg" or ext == ".jpeg": mime_type = "image/jpeg"
+            elif ext == ".webp": mime_type = "image/webp"
+            else: mime_type = "image/jpeg" # A common default
+        data_url = f"data:{mime_type};base64,{encoded_image_string}"
+        prompt_text = generate_extraction_prompt(doc_type)
         payload = {
             "model": IMAGE_MODEL,
             "messages": [
                 {
                     "role": "user",
                     "content": [
+                        {"type": "text", "text": prompt_text},
+                        {"type": "image_url", "image_url": {"url": data_url}}
+                    ]
                 }
             ],
+            "max_tokens": 3000, # Increased for potentially large JSONs
+            "temperature": 0.1, # Lower temperature for more deterministic output
+            # "response_format": {"type": "json_object"}, # Uncomment if OpenRouter & model fully support this
+                                                         # for guaranteed JSON. Prompt is primary method now.
         }
         headers = {
+            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME", # Optional: Replace with your app's URL
+            "X-Title": "Gradio Document Extractor" # Optional: Replace with your app's name
         }
+        print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
+        response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120) # 120s timeout
+        response.raise_for_status()  # Raises HTTPError for bad responses (4XX or 5XX)
+        result = response.json()
+        print(f"Received response from OpenRouter. Status: {response.status_code}")
+        if "choices" in result and result["choices"]:
+            content_text = result["choices"][0]["message"]["content"]
+            # Try to clean up and parse JSON (models sometimes wrap in markdown)
+            clean_content = content_text.strip()
+            if clean_content.startswith("```json"):
+                clean_content = clean_content[7:]
+                if clean_content.endswith("```"):
+                    clean_content = clean_content[:-3]
+            elif clean_content.startswith("`") and clean_content.endswith("`"): # Single backtick
+                 clean_content = clean_content[1:-1]
+            try:
+                parsed_json = json.loads(clean_content)
+                # Ensure document_type_provided is in the root, even if LLM missed it
+                if "document_type_provided" not in parsed_json:
+                    parsed_json["document_type_provided"] = doc_type
+                return parsed_json
+            except json.JSONDecodeError as e:
+                print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}")
+                return {
+                    "error": "Failed to parse LLM output as JSON.",
+                    "raw_content_from_llm": content_text,
+                    "document_type_provided": doc_type
+                }
+        else:
+            print(f"No 'choices' in API response: {result}")
+            return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
+    except requests.exceptions.Timeout:
+        print(f"API Request Timeout for {os.path.basename(image_path)}")
+        return {"error": "API request timed out.", "document_type_provided": doc_type}
+    except requests.exceptions.RequestException as e:
+        error_message = f"API Request Error: {str(e)}"
+        if e.response is not None:
+            error_message += f" Status: {e.response.status_code}, Response: {e.response.text}"
+        print(error_message)
+        return {"error": error_message, "document_type_provided": doc_type}
+    except Exception as e:
+        print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
+        return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
+# --- Gradio Interface Callbacks ---
+def add_document_to_batch_ui(image_filepath, doc_type_selection):
+    """Adds an uploaded image and its type to the current batch state."""
+    global current_batch
+    if image_filepath and doc_type_selection:
+        filename = os.path.basename(image_filepath)
+        # Note: image_filepath is a temporary path from Gradio.
+        # It should be used relatively quickly. For long-lived state,
+        # you might copy the file or read its content.
+        current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
+        # Prepare display for Dataframe: list of lists
+        batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
+        return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
+    # Return current state if inputs are invalid
+    batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
+    return batch_display_data, "Failed to add: Image or document type missing."
+def process_batch_ui():
+    """Processes all documents in the current batch and returns combined JSON results."""
+    global current_batch
+    if not OPENROUTER_API_KEY:
+        return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
+    if not current_batch:
+        return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
+    all_results = []
+    status_updates = []
+    for i, item_to_process in enumerate(current_batch):
+        status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
+        print(status_msg)
+        # yield None, status_msg # This would require process_batch_ui to be a generator for live updates
+        # Ensure the file path is valid; Gradio's temp files should be okay here
+        # if not os.path.exists(item_to_process["path"]):
+        #     error_res = {"error": f"File not found: {item_to_process['filename']}. It might have been a temporary file that was removed.", "document_type_provided": item_to_process['type']}
+        #     all_results.append(error_res)
+        #     status_updates.append(f"Error: File {item_to_process['filename']} not found.")
+        #     continue
+        extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
+        all_results.append(extracted_data)
+        if "error" in extracted_data:
+            status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
+        else:
+            status_updates.append(f"Successfully processed {item_to_process['filename']}.")
+    # Attempt to group results by person (heuristic)
+    # This is a basic grouping; more sophisticated logic could be added.
+    grouped_by_person = {}
+    unidentified_docs = []
+    for result_item in all_results:
+        doc_id = None
+        if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
+            fields = result_item["extracted_fields"]
+            # Try common identifiers
+            passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
+            name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
+            surname = fields.get("Surname") or fields.get("Family Name")
+            dob = fields.get("Date of Birth") or fields.get("DOB")
+            if passport_no:
+                doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
+            elif name and surname and dob:
+                doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
+            elif name and surname:
+                 doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
+        if doc_id:
+            if doc_id not in grouped_by_person:
+                grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
+            grouped_by_person[doc_id]["documents"].append(result_item)
+        else:
+            unidentified_docs.append(result_item)
+    final_structured_output = {
+        "summary": f"Processed {len(current_batch)} documents.",
+        "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], # Convert dict to list for easier iteration in JSON
+        "unidentified_documents_or_errors": unidentified_docs
+    }
+    # Do not clear batch here, let user do it.
+    # current_batch = [] # Clears batch after processing
+    # batch_display_data = []
+    final_status = "Batch processing complete. " + " | ".join(status_updates)
+    print(final_status)
+    return final_structured_output, final_status # Output JSON and status message
+def clear_batch_ui():
+    """Clears the current batch and updates the UI."""
+    global current_batch
+    current_batch = []
+    return [], "Batch cleared successfully." # Cleared dataframe and status message
+# --- Gradio UI Layout ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📄 Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
+    gr.Markdown(
+        "**Instructions:**\n"
+        "1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n"
+        "2. Select the correct document type.\n"
+        "3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n"
+        "4. Review the batch. Click 'Clear Entire Batch' to start over.\n"
+        "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
+        "6. View the extracted information in JSON format below."
+    )
+    if not OPENROUTER_API_KEY:
+        gr.Markdown(
+            "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
+            "API calls will fail. Please set it and restart this application.</h3>"
         )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Step 1: Add Document")
+            image_input = gr.Image(
+                label="Upload Document Image",
+                type="filepath", # 'filepath' gives a temporary path to the uploaded file
+                sources=["upload"],
+                height=300
+            )
+            doc_type_choices = [
+                'passport_front', 'passport_back', 'national_id_front', 'national_id_back',
+                'drivers_license_front', 'drivers_license_back', 'visa_sticker',
+                'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document'
+            ]
+            doc_type_input = gr.Dropdown(
+                label="Select Document Type",
+                choices=doc_type_choices,
+                value='passport_front',
+                filterable=True
+            )
+            add_button = gr.Button("➕ Add Document to Current Batch", variant="secondary")
+        with gr.Column(scale=2):
+            gr.Markdown("### Step 2: Review Current Batch")
+            batch_dataframe = gr.Dataframe(
+                headers=["Filename", "Document Type"],
+                datatype=["str", "str"],
+                row_count=(0, "dynamic"), # Start with 0 rows, dynamically adjusts
+                col_count=(2, "fixed"),
+                wrap=True,
+                height=380,
+            )
+            clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
+    gr.Markdown("### Step 3: Process Batch")
+    process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
+    status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
+    gr.Markdown("### Step 4: View Results")
+    output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
+    # --- Connect UI elements to functions ---
+    add_button.click(
+        fn=add_document_to_batch_ui,
+        inputs=[image_input, doc_type_input],
+        outputs=[batch_dataframe, status_message_textbox]
+    ).then(lambda: None, outputs=image_input) # Clear image input after adding
+    clear_batch_button.click(
+        fn=clear_batch_ui,
+        inputs=[],
+        outputs=[batch_dataframe, status_message_textbox]
+    )
+    process_button.click(
+        fn=process_batch_ui,
+        inputs=[],
+        outputs=[output_json_display, status_message_textbox]
+    )
 if __name__ == "__main__":
+    if not OPENROUTER_API_KEY:
+        print("ERROR: The OPENROUTER_API_KEY environment variable is not set.")
+        print("Please set it before running the application, e.g.:")
+        print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
+        print("The application will launch, but API calls will fail.")
+    demo.launch()