Spaces:

SauravCh11
/

PassportExtrator

Runtime error

App Files Files Community

Sandy2636 commited on 18 days ago

Commit

e08f157

1 Parent(s): b3819fe

Update space

Browse files

Files changed (2) hide show

app.py +18 -88
requirements.txt +4 -4

app.py CHANGED Viewed

@@ -6,24 +6,16 @@ import json
 import mimetypes
 # --- Configuration ---
-# IMPORTANT: Set your OPENROUTER_API_KEY as an environment variable
-# For example, in your terminal: export OPENROUTER_API_KEY='your_key_here'
-OPENROUTER_API_KEY = "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"
 IMAGE_MODEL = "opengvlab/internvl3-14b:free"
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
 # --- Application State ---
-# Global list to store documents in the current batch
-# Each item: {"path": "image_file_path", "type": "document_type_string", "filename": "display_filename"}
 current_batch = []
 # --- Helper Functions ---
 def generate_extraction_prompt(doc_type_provided_by_user):
-    """
-    Generates a detailed prompt for the LLM to extract information
-    and structure it as a JSON object.
-    """
     prompt = f"""You are an advanced OCR and information extraction AI.
 The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
 Your task is to meticulously analyze this image and extract all relevant information.
@@ -58,30 +50,21 @@ Ensure the entire output strictly adheres to the JSON format.
     return prompt
 def process_single_image_with_openrouter(image_path, doc_type):
-    """
-    Encodes an image, sends it to OpenRouter with a generated prompt,
-    and attempts to parse the JSON response from the LLM.
-    """
     if not OPENROUTER_API_KEY:
         return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
     try:
         with open(image_path, "rb") as f:
             encoded_image_bytes = f.read()
             encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
         mime_type, _ = mimetypes.guess_type(image_path)
         if not mime_type:
-            # Fallback, try to infer from extension or default to common types
             ext = os.path.splitext(image_path)[1].lower()
             if ext == ".png": mime_type = "image/png"
-            elif ext == ".jpg" or ext == ".jpeg": mime_type = "image/jpeg"
             elif ext == ".webp": mime_type = "image/webp"
-            else: mime_type = "image/jpeg" # A common default
         data_url = f"data:{mime_type};base64,{encoded_image_string}"
         prompt_text = generate_extraction_prompt(doc_type)
         payload = {
             "model": IMAGE_MODEL,
             "messages": [
@@ -93,40 +76,31 @@ def process_single_image_with_openrouter(image_path, doc_type):
                     ]
                 }
             ],
-            "max_tokens": 3000, # Increased for potentially large JSONs
-            "temperature": 0.1, # Lower temperature for more deterministic output
-            # "response_format": {"type": "json_object"}, # Uncomment if OpenRouter & model fully support this
-                                                         # for guaranteed JSON. Prompt is primary method now.
         }
         headers = {
             "Authorization": f"Bearer {OPENROUTER_API_KEY}",
             "Content-Type": "application/json",
-            "HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME", # Optional: Replace with your app's URL
-            "X-Title": "Gradio Document Extractor" # Optional: Replace with your app's name
         }
         print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
-        response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120) # 120s timeout
-        response.raise_for_status()  # Raises HTTPError for bad responses (4XX or 5XX)
         result = response.json()
         print(f"Received response from OpenRouter. Status: {response.status_code}")
         if "choices" in result and result["choices"]:
             content_text = result["choices"][0]["message"]["content"]
-            # Try to clean up and parse JSON (models sometimes wrap in markdown)
             clean_content = content_text.strip()
             if clean_content.startswith("```json"):
                 clean_content = clean_content[7:]
                 if clean_content.endswith("```"):
                     clean_content = clean_content[:-3]
-            elif clean_content.startswith("`") and clean_content.endswith("`"): # Single backtick
                  clean_content = clean_content[1:-1]
             try:
                 parsed_json = json.loads(clean_content)
-                # Ensure document_type_provided is in the root, even if LLM missed it
                 if "document_type_provided" not in parsed_json:
                     parsed_json["document_type_provided"] = doc_type
                 return parsed_json
@@ -140,7 +114,6 @@ def process_single_image_with_openrouter(image_path, doc_type):
         else:
             print(f"No 'choices' in API response: {result}")
             return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
     except requests.exceptions.Timeout:
         print(f"API Request Timeout for {os.path.basename(image_path)}")
         return {"error": "API request timed out.", "document_type_provided": doc_type}
@@ -154,100 +127,69 @@ def process_single_image_with_openrouter(image_path, doc_type):
         print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
         return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
-# --- Gradio Interface Callbacks ---
 def add_document_to_batch_ui(image_filepath, doc_type_selection):
-    """Adds an uploaded image and its type to the current batch state."""
     global current_batch
     if image_filepath and doc_type_selection:
         filename = os.path.basename(image_filepath)
-        # Note: image_filepath is a temporary path from Gradio.
-        # It should be used relatively quickly. For long-lived state,
-        # you might copy the file or read its content.
         current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
-        # Prepare display for Dataframe: list of lists
         batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
         return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
-    # Return current state if inputs are invalid
     batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
     return batch_display_data, "Failed to add: Image or document type missing."
 def process_batch_ui():
-    """Processes all documents in the current batch and returns combined JSON results."""
     global current_batch
     if not OPENROUTER_API_KEY:
         return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
     if not current_batch:
         return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
     all_results = []
     status_updates = []
     for i, item_to_process in enumerate(current_batch):
         status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
         print(status_msg)
-        # yield None, status_msg # This would require process_batch_ui to be a generator for live updates
         extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
         all_results.append(extracted_data)
         if "error" in extracted_data:
             status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
         else:
             status_updates.append(f"Successfully processed {item_to_process['filename']}.")
-    # Attempt to group results by person (heuristic)
-    # This is a basic grouping; more sophisticated logic could be added.
     grouped_by_person = {}
     unidentified_docs = []
     for result_item in all_results:
         doc_id = None
         if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
             fields = result_item["extracted_fields"]
-            # Try common identifiers
             passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
             name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
             surname = fields.get("Surname") or fields.get("Family Name")
             dob = fields.get("Date of Birth") or fields.get("DOB")
             if passport_no:
                 doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
             elif name and surname and dob:
                 doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
             elif name and surname:
                  doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
         if doc_id:
             if doc_id not in grouped_by_person:
                 grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
             grouped_by_person[doc_id]["documents"].append(result_item)
         else:
             unidentified_docs.append(result_item)
     final_structured_output = {
         "summary": f"Processed {len(current_batch)} documents.",
-        "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], # Convert dict to list for easier iteration in JSON
         "unidentified_documents_or_errors": unidentified_docs
     }
     final_status = "Batch processing complete. " + " | ".join(status_updates)
     print(final_status)
-    return final_structured_output, final_status # Output JSON and status message
 def clear_batch_ui():
-    """Clears the current batch and updates the UI."""
     global current_batch
     current_batch = []
-    return [], "Batch cleared successfully." # Cleared dataframe and status message
-# --- Gradio UI Layout ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📄 Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
     gr.Markdown(
@@ -259,19 +201,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
         "6. View the extracted information in JSON format below."
     )
     if not OPENROUTER_API_KEY:
         gr.Markdown(
             "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
             "API calls will fail. Please set it and restart this application.</h3>"
         )
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Step 1: Add Document")
             image_input = gr.Image(
                 label="Upload Document Image",
-                type="filepath", # 'filepath' gives a temporary path to the uploaded file
                 sources=["upload"],
                 height=300
             )
@@ -287,40 +227,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 filterable=True
             )
             add_button = gr.Button("➕ Add Document to Current Batch", variant="secondary")
         with gr.Column(scale=2):
             gr.Markdown("### Step 2: Review Current Batch")
             batch_dataframe = gr.Dataframe(
                 headers=["Filename", "Document Type"],
                 datatype=["str", "str"],
-                row_count=(0, "dynamic"),
-                col_count=(2, "fixed"),
                 wrap=True
-                # Removed: height=380 from here
             )
             clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
     gr.Markdown("### Step 3: Process Batch")
     process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
     status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
     gr.Markdown("### Step 4: View Results")
     output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
-    # --- Connect UI elements to functions ---
     add_button.click(
         fn=add_document_to_batch_ui,
         inputs=[image_input, doc_type_input],
         outputs=[batch_dataframe, status_message_textbox]
-    ).then(lambda: None, outputs=image_input) # Clear image input after adding
     clear_batch_button.click(
         fn=clear_batch_ui,
         inputs=[],
         outputs=[batch_dataframe, status_message_textbox]
     )
     process_button.click(
         fn=process_batch_ui,
         inputs=[],
@@ -333,5 +264,4 @@ if __name__ == "__main__":
         print("Please set it before running the application, e.g.:")
         print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
         print("The application will launch, but API calls will fail.")
-    demo.launch()

 import mimetypes
 # --- Configuration ---
+OPENROUTER_API_KEY = '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'
 IMAGE_MODEL = "opengvlab/internvl3-14b:free"
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
 # --- Application State ---
 current_batch = []
 # --- Helper Functions ---
 def generate_extraction_prompt(doc_type_provided_by_user):
     prompt = f"""You are an advanced OCR and information extraction AI.
 The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
 Your task is to meticulously analyze this image and extract all relevant information.
     return prompt
 def process_single_image_with_openrouter(image_path, doc_type):
     if not OPENROUTER_API_KEY:
         return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
     try:
         with open(image_path, "rb") as f:
             encoded_image_bytes = f.read()
             encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
         mime_type, _ = mimetypes.guess_type(image_path)
         if not mime_type:
             ext = os.path.splitext(image_path)[1].lower()
             if ext == ".png": mime_type = "image/png"
+            elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg"
             elif ext == ".webp": mime_type = "image/webp"
+            else: mime_type = "image/jpeg"
         data_url = f"data:{mime_type};base64,{encoded_image_string}"
         prompt_text = generate_extraction_prompt(doc_type)
         payload = {
             "model": IMAGE_MODEL,
             "messages": [
                     ]
                 }
             ],
+            "max_tokens": 3000,
+            "temperature": 0.1,
         }
         headers = {
             "Authorization": f"Bearer {OPENROUTER_API_KEY}",
             "Content-Type": "application/json",
+            "HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME",
+            "X-Title": "Gradio Document Extractor"
         }
         print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
+        response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120)
+        response.raise_for_status()
         result = response.json()
         print(f"Received response from OpenRouter. Status: {response.status_code}")
         if "choices" in result and result["choices"]:
             content_text = result["choices"][0]["message"]["content"]
             clean_content = content_text.strip()
             if clean_content.startswith("```json"):
                 clean_content = clean_content[7:]
                 if clean_content.endswith("```"):
                     clean_content = clean_content[:-3]
+            elif clean_content.startswith("`") and clean_content.endswith("`"):
                  clean_content = clean_content[1:-1]
             try:
                 parsed_json = json.loads(clean_content)
                 if "document_type_provided" not in parsed_json:
                     parsed_json["document_type_provided"] = doc_type
                 return parsed_json
         else:
             print(f"No 'choices' in API response: {result}")
             return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
     except requests.exceptions.Timeout:
         print(f"API Request Timeout for {os.path.basename(image_path)}")
         return {"error": "API request timed out.", "document_type_provided": doc_type}
         print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
         return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
 def add_document_to_batch_ui(image_filepath, doc_type_selection):
     global current_batch
     if image_filepath and doc_type_selection:
         filename = os.path.basename(image_filepath)
         current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
         batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
         return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
     batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
     return batch_display_data, "Failed to add: Image or document type missing."
 def process_batch_ui():
     global current_batch
     if not OPENROUTER_API_KEY:
         return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
     if not current_batch:
         return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
     all_results = []
     status_updates = []
     for i, item_to_process in enumerate(current_batch):
         status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
         print(status_msg)
         extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
         all_results.append(extracted_data)
         if "error" in extracted_data:
             status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
         else:
             status_updates.append(f"Successfully processed {item_to_process['filename']}.")
     grouped_by_person = {}
     unidentified_docs = []
     for result_item in all_results:
         doc_id = None
         if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
             fields = result_item["extracted_fields"]
             passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
             name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
             surname = fields.get("Surname") or fields.get("Family Name")
             dob = fields.get("Date of Birth") or fields.get("DOB")
             if passport_no:
                 doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
             elif name and surname and dob:
                 doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
             elif name and surname:
                  doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
         if doc_id:
             if doc_id not in grouped_by_person:
                 grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
             grouped_by_person[doc_id]["documents"].append(result_item)
         else:
             unidentified_docs.append(result_item)
     final_structured_output = {
         "summary": f"Processed {len(current_batch)} documents.",
+        "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [],
         "unidentified_documents_or_errors": unidentified_docs
     }
     final_status = "Batch processing complete. " + " | ".join(status_updates)
     print(final_status)
+    return final_structured_output, final_status
 def clear_batch_ui():
     global current_batch
     current_batch = []
+    return [], "Batch cleared successfully."
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📄 Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
     gr.Markdown(
         "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
         "6. View the extracted information in JSON format below."
     )
     if not OPENROUTER_API_KEY:
         gr.Markdown(
             "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
             "API calls will fail. Please set it and restart this application.</h3>"
         )
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Step 1: Add Document")
             image_input = gr.Image(
                 label="Upload Document Image",
+                type="filepath",
                 sources=["upload"],
                 height=300
             )
                 filterable=True
             )
             add_button = gr.Button("➕ Add Document to Current Batch", variant="secondary")
         with gr.Column(scale=2):
             gr.Markdown("### Step 2: Review Current Batch")
             batch_dataframe = gr.Dataframe(
                 headers=["Filename", "Document Type"],
                 datatype=["str", "str"],
+                row_count=1,  # Changed: Start with 1 row, should grow dynamically
+                col_count=2,  # Changed: Simpler integer for fixed columns
                 wrap=True
             )
             clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
     gr.Markdown("### Step 3: Process Batch")
     process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
     status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
     gr.Markdown("### Step 4: View Results")
     output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
     add_button.click(
         fn=add_document_to_batch_ui,
         inputs=[image_input, doc_type_input],
         outputs=[batch_dataframe, status_message_textbox]
+    ).then(lambda: None, outputs=image_input)
     clear_batch_button.click(
         fn=clear_batch_ui,
         inputs=[],
         outputs=[batch_dataframe, status_message_textbox]
     )
     process_button.click(
         fn=process_batch_ui,
         inputs=[],
         print("Please set it before running the application, e.g.:")
         print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
         print("The application will launch, but API calls will fail.")
+    demo.launch(share=True) # Added share=True

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio==3.50.2
-requests
-python-dotenv
-Pillow

+gradio~=3.50.2
+requests>=2.25.0,<3.0.0
+# pillow might be needed explicitly if not pulled by gradio for image handling
+Pillow>=9.0.0