Spaces:
Runtime error
Runtime error
import gradio as gr | |
import requests | |
import base64 | |
import os | |
import json | |
import mimetypes | |
# --- Configuration --- | |
OPENROUTER_API_KEY = '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' | |
IMAGE_MODEL = "opengvlab/internvl3-14b:free" | |
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" | |
# --- Application State --- | |
current_batch = [] | |
# --- Helper Functions --- | |
def generate_extraction_prompt(doc_type_provided_by_user): | |
prompt = f"""You are an advanced OCR and information extraction AI. | |
The user has provided an image and identified it as a '{doc_type_provided_by_user}'. | |
Your task is to meticulously analyze this image and extract all relevant information. | |
Output Format Instructions: | |
Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON. | |
The JSON object should have the following top-level keys: | |
- "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}". | |
- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person"). | |
- "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples: | |
- For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code". | |
- For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type". | |
- For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]"). | |
- "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present: | |
- "raw_mrz_lines": (array of strings) Each line of the MRZ. | |
- "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number"). | |
If no MRZ, this field should be null. | |
- "multilingual_info": (array of objects or null) For any text segments not in English: | |
- Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}} | |
If no non-English text, this field can be null or an empty array. | |
- "full_text_ocr": (string) Concatenation of all text found on the document. | |
Extraction Guidelines: | |
1. Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation. | |
2. Extract all visible text, including small print, stamps, and handwritten annotations if legible. | |
3. For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous. | |
4. If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal. | |
5. If the document is multi-page and only one page is provided, note this if apparent. | |
Ensure the entire output strictly adheres to the JSON format. | |
""" | |
return prompt | |
def process_single_image_with_openrouter(image_path, doc_type): | |
if not OPENROUTER_API_KEY: | |
return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type} | |
try: | |
with open(image_path, "rb") as f: | |
encoded_image_bytes = f.read() | |
encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8") | |
mime_type, _ = mimetypes.guess_type(image_path) | |
if not mime_type: | |
ext = os.path.splitext(image_path)[1].lower() | |
if ext == ".png": mime_type = "image/png" | |
elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg" | |
elif ext == ".webp": mime_type = "image/webp" | |
else: mime_type = "image/jpeg" | |
data_url = f"data:{mime_type};base64,{encoded_image_string}" | |
prompt_text = generate_extraction_prompt(doc_type) | |
payload = { | |
"model": IMAGE_MODEL, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt_text}, | |
{"type": "image_url", "image_url": {"url": data_url}} | |
] | |
} | |
], | |
"max_tokens": 3000, | |
"temperature": 0.1, | |
} | |
headers = { | |
"Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
"Content-Type": "application/json", | |
"HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME", | |
"X-Title": "Gradio Document Extractor" | |
} | |
print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}") | |
response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120) | |
response.raise_for_status() | |
result = response.json() | |
print(f"Received response from OpenRouter. Status: {response.status_code}") | |
if "choices" in result and result["choices"]: | |
content_text = result["choices"][0]["message"]["content"] | |
clean_content = content_text.strip() | |
if clean_content.startswith("```json"): | |
clean_content = clean_content[7:] | |
if clean_content.endswith("```"): | |
clean_content = clean_content[:-3] | |
elif clean_content.startswith("`") and clean_content.endswith("`"): | |
clean_content = clean_content[1:-1] | |
try: | |
parsed_json = json.loads(clean_content) | |
if "document_type_provided" not in parsed_json: | |
parsed_json["document_type_provided"] = doc_type | |
return parsed_json | |
except json.JSONDecodeError as e: | |
print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}") | |
return { | |
"error": "Failed to parse LLM output as JSON.", | |
"raw_content_from_llm": content_text, | |
"document_type_provided": doc_type | |
} | |
else: | |
print(f"No 'choices' in API response: {result}") | |
return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type} | |
except requests.exceptions.Timeout: | |
print(f"API Request Timeout for {os.path.basename(image_path)}") | |
return {"error": "API request timed out.", "document_type_provided": doc_type} | |
except requests.exceptions.RequestException as e: | |
error_message = f"API Request Error: {str(e)}" | |
if e.response is not None: | |
error_message += f" Status: {e.response.status_code}, Response: {e.response.text}" | |
print(error_message) | |
return {"error": error_message, "document_type_provided": doc_type} | |
except Exception as e: | |
print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}") | |
return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type} | |
def add_document_to_batch_ui(image_filepath, doc_type_selection): | |
global current_batch | |
if image_filepath and doc_type_selection: | |
filename = os.path.basename(image_filepath) | |
current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename}) | |
batch_display_data = [[item["filename"], item["type"]] for item in current_batch] | |
return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'." | |
batch_display_data = [[item["filename"], item["type"]] for item in current_batch] | |
return batch_display_data, "Failed to add: Image or document type missing." | |
def process_batch_ui(): | |
global current_batch | |
if not OPENROUTER_API_KEY: | |
return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing." | |
if not current_batch: | |
return {"message": "Batch is empty. Add documents first."}, "Batch is empty." | |
all_results = [] | |
status_updates = [] | |
for i, item_to_process in enumerate(current_batch): | |
status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..." | |
print(status_msg) | |
extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"]) | |
all_results.append(extracted_data) | |
if "error" in extracted_data: | |
status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}") | |
else: | |
status_updates.append(f"Successfully processed {item_to_process['filename']}.") | |
grouped_by_person = {} | |
unidentified_docs = [] | |
for result_item in all_results: | |
doc_id = None | |
if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict): | |
fields = result_item["extracted_fields"] | |
passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number") | |
name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name") | |
surname = fields.get("Surname") or fields.get("Family Name") | |
dob = fields.get("Date of Birth") or fields.get("DOB") | |
if passport_no: | |
doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}" | |
elif name and surname and dob: | |
doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}" | |
elif name and surname: | |
doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}" | |
if doc_id: | |
if doc_id not in grouped_by_person: | |
grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []} | |
grouped_by_person[doc_id]["documents"].append(result_item) | |
else: | |
unidentified_docs.append(result_item) | |
final_structured_output = { | |
"summary": f"Processed {len(current_batch)} documents.", | |
"grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], | |
"unidentified_documents_or_errors": unidentified_docs | |
} | |
final_status = "Batch processing complete. " + " | ".join(status_updates) | |
print(final_status) | |
return final_structured_output, final_status | |
def clear_batch_ui(): | |
global current_batch | |
current_batch = [] | |
return [], "Batch cleared successfully." | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)") | |
gr.Markdown( | |
"**Instructions:**\n" | |
"1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n" | |
"2. Select the correct document type.\n" | |
"3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n" | |
"4. Review the batch. Click 'Clear Entire Batch' to start over.\n" | |
"5. Click 'Process Batch and Extract Information' to send documents to the AI.\n" | |
"6. View the extracted information in JSON format below." | |
) | |
if not OPENROUTER_API_KEY: | |
gr.Markdown( | |
"<h3 style='color:red;'>β οΈ Warning: `OPENROUTER_API_KEY` environment variable is not detected. " | |
"API calls will fail. Please set it and restart this application.</h3>" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### Step 1: Add Document") | |
image_input = gr.Image( | |
label="Upload Document Image", | |
type="filepath", | |
sources=["upload"], | |
height=300 | |
) | |
doc_type_choices = [ | |
'passport_front', 'passport_back', 'national_id_front', 'national_id_back', | |
'drivers_license_front', 'drivers_license_back', 'visa_sticker', | |
'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document' | |
] | |
doc_type_input = gr.Dropdown( | |
label="Select Document Type", | |
choices=doc_type_choices, | |
value='passport_front', | |
filterable=True | |
) | |
add_button = gr.Button("β Add Document to Current Batch", variant="secondary") | |
with gr.Column(scale=2): | |
gr.Markdown("### Step 2: Review Current Batch") | |
batch_dataframe = gr.Dataframe( | |
headers=["Filename", "Document Type"], | |
datatype=["str", "str"], | |
row_count=1, # Changed: Start with 1 row, should grow dynamically | |
col_count=2, # Changed: Simpler integer for fixed columns | |
wrap=True | |
) | |
clear_batch_button = gr.Button("ποΈ Clear Entire Batch", variant="stop") | |
gr.Markdown("### Step 3: Process Batch") | |
process_button = gr.Button("π Process Batch and Extract Information", variant="primary") | |
status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2) | |
gr.Markdown("### Step 4: View Results") | |
output_json_display = gr.JSON(label="Extracted Information (JSON Format)") | |
add_button.click( | |
fn=add_document_to_batch_ui, | |
inputs=[image_input, doc_type_input], | |
outputs=[batch_dataframe, status_message_textbox] | |
).then(lambda: None, outputs=image_input) | |
clear_batch_button.click( | |
fn=clear_batch_ui, | |
inputs=[], | |
outputs=[batch_dataframe, status_message_textbox] | |
) | |
process_button.click( | |
fn=process_batch_ui, | |
inputs=[], | |
outputs=[output_json_display, status_message_textbox] | |
) | |
if __name__ == "__main__": | |
if not OPENROUTER_API_KEY: | |
print("ERROR: The OPENROUTER_API_KEY environment variable is not set.") | |
print("Please set it before running the application, e.g.:") | |
print(" export OPENROUTER_API_KEY='your_openrouter_key_here'") | |
print("The application will launch, but API calls will fail.") | |
demo.launch(share=True) # Added share=True |