Spaces:

SauravCh11
/

PassportExtrator

Runtime error

App Files Files Community

Sandy2636 commited on Apr 27

Commit

6a6e280

1 Parent(s): 4b54b36

Update space

Browse files

Files changed (1) hide show

app.py +14 -25

app.py CHANGED Viewed

@@ -4,12 +4,12 @@ import base64
 import os
 import json
 import mimetypes
-# Re Commit
 # --- Configuration ---
 # IMPORTANT: Set your OPENROUTER_API_KEY as an environment variable
 # For example, in your terminal: export OPENROUTER_API_KEY='your_key_here'
-OPENROUTER_API_KEY = "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"
-IMAGE_MODEL = "opengvlab/internvl3-14b:free"
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
 # --- Application State ---
@@ -114,7 +114,7 @@ def process_single_image_with_openrouter(image_path, doc_type):
         if "choices" in result and result["choices"]:
             content_text = result["choices"][0]["message"]["content"]
             # Try to clean up and parse JSON (models sometimes wrap in markdown)
             clean_content = content_text.strip()
             if clean_content.startswith("```json"):
@@ -123,7 +123,7 @@ def process_single_image_with_openrouter(image_path, doc_type):
                     clean_content = clean_content[:-3]
             elif clean_content.startswith("`") and clean_content.endswith("`"): # Single backtick
                  clean_content = clean_content[1:-1]
             try:
                 parsed_json = json.loads(clean_content)
                 # Ensure document_type_provided is in the root, even if LLM missed it
@@ -165,11 +165,11 @@ def add_document_to_batch_ui(image_filepath, doc_type_selection):
         # It should be used relatively quickly. For long-lived state,
         # you might copy the file or read its content.
         current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
         # Prepare display for Dataframe: list of lists
         batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
         return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
     # Return current state if inputs are invalid
     batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
     return batch_display_data, "Failed to add: Image or document type missing."
@@ -191,13 +191,6 @@ def process_batch_ui():
         status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
         print(status_msg)
         # yield None, status_msg # This would require process_batch_ui to be a generator for live updates
-        # Ensure the file path is valid; Gradio's temp files should be okay here
-        # if not os.path.exists(item_to_process["path"]):
-        #     error_res = {"error": f"File not found: {item_to_process['filename']}. It might have been a temporary file that was removed.", "document_type_provided": item_to_process['type']}
-        #     all_results.append(error_res)
-        #     status_updates.append(f"Error: File {item_to_process['filename']} not found.")
-        #     continue
         extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
         all_results.append(extracted_data)
@@ -235,16 +228,12 @@ def process_batch_ui():
             grouped_by_person[doc_id]["documents"].append(result_item)
         else:
             unidentified_docs.append(result_item)
     final_structured_output = {
         "summary": f"Processed {len(current_batch)} documents.",
         "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], # Convert dict to list for easier iteration in JSON
         "unidentified_documents_or_errors": unidentified_docs
     }
-    # Do not clear batch here, let user do it.
-    # current_batch = [] # Clears batch after processing
-    # batch_display_data = []
     final_status = "Batch processing complete. " + " | ".join(status_updates)
     print(final_status)
@@ -270,7 +259,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
         "6. View the extracted information in JSON format below."
     )
     if not OPENROUTER_API_KEY:
         gr.Markdown(
             "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
@@ -304,16 +293,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             batch_dataframe = gr.Dataframe(
                 headers=["Filename", "Document Type"],
                 datatype=["str", "str"],
-                row_count=(0, "dynamic"), # Start with 0 rows, dynamically adjusts
                 col_count=(2, "fixed"),
-                wrap=True,
-                height=380,
             )
             clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
     gr.Markdown("### Step 3: Process Batch")
     process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
     status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
     gr.Markdown("### Step 4: View Results")
@@ -344,5 +333,5 @@ if __name__ == "__main__":
         print("Please set it before running the application, e.g.:")
         print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
         print("The application will launch, but API calls will fail.")
     demo.launch()

 import os
 import json
 import mimetypes
 # --- Configuration ---
 # IMPORTANT: Set your OPENROUTER_API_KEY as an environment variable
 # For example, in your terminal: export OPENROUTER_API_KEY='your_key_here'
+OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
+IMAGE_MODEL = "opengvlab/internvl3-14b"
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
 # --- Application State ---
         if "choices" in result and result["choices"]:
             content_text = result["choices"][0]["message"]["content"]
             # Try to clean up and parse JSON (models sometimes wrap in markdown)
             clean_content = content_text.strip()
             if clean_content.startswith("```json"):
                     clean_content = clean_content[:-3]
             elif clean_content.startswith("`") and clean_content.endswith("`"): # Single backtick
                  clean_content = clean_content[1:-1]
             try:
                 parsed_json = json.loads(clean_content)
                 # Ensure document_type_provided is in the root, even if LLM missed it
         # It should be used relatively quickly. For long-lived state,
         # you might copy the file or read its content.
         current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
         # Prepare display for Dataframe: list of lists
         batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
         return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
     # Return current state if inputs are invalid
     batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
     return batch_display_data, "Failed to add: Image or document type missing."
         status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
         print(status_msg)
         # yield None, status_msg # This would require process_batch_ui to be a generator for live updates
         extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
         all_results.append(extracted_data)
             grouped_by_person[doc_id]["documents"].append(result_item)
         else:
             unidentified_docs.append(result_item)
     final_structured_output = {
         "summary": f"Processed {len(current_batch)} documents.",
         "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], # Convert dict to list for easier iteration in JSON
         "unidentified_documents_or_errors": unidentified_docs
     }
     final_status = "Batch processing complete. " + " | ".join(status_updates)
     print(final_status)
         "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
         "6. View the extracted information in JSON format below."
     )
     if not OPENROUTER_API_KEY:
         gr.Markdown(
             "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
             batch_dataframe = gr.Dataframe(
                 headers=["Filename", "Document Type"],
                 datatype=["str", "str"],
+                row_count=(0, "dynamic"),
                 col_count=(2, "fixed"),
+                wrap=True
+                # Removed: height=380 from here
             )
             clear_batch_button = gr.Button("🗑️ Clear Entire Batch", variant="stop")
     gr.Markdown("### Step 3: Process Batch")
     process_button = gr.Button("🚀 Process Batch and Extract Information", variant="primary")
     status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
     gr.Markdown("### Step 4: View Results")
         print("Please set it before running the application, e.g.:")
         print("  export OPENROUTER_API_KEY='your_openrouter_key_here'")
         print("The application will launch, but API calls will fail.")
     demo.launch()