Spaces:
Runtime error
Runtime error
File size: 14,369 Bytes
df5c908 0a8e31d 3d827ec 6a6e280 3d827ec e08f157 b3819fe 3d827ec e08f157 3d827ec e08f157 3d827ec 0a8e31d 3d827ec 0a8e31d e08f157 0a8e31d 3d827ec e08f157 0a8e31d 3d827ec e08f157 3d827ec e08f157 3d827ec e08f157 3d827ec e08f157 3d827ec e08f157 3d827ec 0a8e31d 3d827ec e08f157 3d827ec e08f157 6a6e280 3d827ec e08f157 3d827ec df5c908 3d827ec e08f157 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
import gradio as gr
import requests
import base64
import os
import json
import mimetypes
# --- Configuration ---
OPENROUTER_API_KEY = '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'
IMAGE_MODEL = "opengvlab/internvl3-14b:free"
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
# --- Application State ---
current_batch = []
# --- Helper Functions ---
def generate_extraction_prompt(doc_type_provided_by_user):
prompt = f"""You are an advanced OCR and information extraction AI.
The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
Your task is to meticulously analyze this image and extract all relevant information.
Output Format Instructions:
Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
The JSON object should have the following top-level keys:
- "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}".
- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
- "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
- For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
- For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
- For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
- "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
- "raw_mrz_lines": (array of strings) Each line of the MRZ.
- "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
If no MRZ, this field should be null.
- "multilingual_info": (array of objects or null) For any text segments not in English:
- Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
If no non-English text, this field can be null or an empty array.
- "full_text_ocr": (string) Concatenation of all text found on the document.
Extraction Guidelines:
1. Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
2. Extract all visible text, including small print, stamps, and handwritten annotations if legible.
3. For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
4. If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
5. If the document is multi-page and only one page is provided, note this if apparent.
Ensure the entire output strictly adheres to the JSON format.
"""
return prompt
def process_single_image_with_openrouter(image_path, doc_type):
if not OPENROUTER_API_KEY:
return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
try:
with open(image_path, "rb") as f:
encoded_image_bytes = f.read()
encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type:
ext = os.path.splitext(image_path)[1].lower()
if ext == ".png": mime_type = "image/png"
elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg"
elif ext == ".webp": mime_type = "image/webp"
else: mime_type = "image/jpeg"
data_url = f"data:{mime_type};base64,{encoded_image_string}"
prompt_text = generate_extraction_prompt(doc_type)
payload = {
"model": IMAGE_MODEL,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt_text},
{"type": "image_url", "image_url": {"url": data_url}}
]
}
],
"max_tokens": 3000,
"temperature": 0.1,
}
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME",
"X-Title": "Gradio Document Extractor"
}
print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
print(f"Received response from OpenRouter. Status: {response.status_code}")
if "choices" in result and result["choices"]:
content_text = result["choices"][0]["message"]["content"]
clean_content = content_text.strip()
if clean_content.startswith("```json"):
clean_content = clean_content[7:]
if clean_content.endswith("```"):
clean_content = clean_content[:-3]
elif clean_content.startswith("`") and clean_content.endswith("`"):
clean_content = clean_content[1:-1]
try:
parsed_json = json.loads(clean_content)
if "document_type_provided" not in parsed_json:
parsed_json["document_type_provided"] = doc_type
return parsed_json
except json.JSONDecodeError as e:
print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}")
return {
"error": "Failed to parse LLM output as JSON.",
"raw_content_from_llm": content_text,
"document_type_provided": doc_type
}
else:
print(f"No 'choices' in API response: {result}")
return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
except requests.exceptions.Timeout:
print(f"API Request Timeout for {os.path.basename(image_path)}")
return {"error": "API request timed out.", "document_type_provided": doc_type}
except requests.exceptions.RequestException as e:
error_message = f"API Request Error: {str(e)}"
if e.response is not None:
error_message += f" Status: {e.response.status_code}, Response: {e.response.text}"
print(error_message)
return {"error": error_message, "document_type_provided": doc_type}
except Exception as e:
print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
def add_document_to_batch_ui(image_filepath, doc_type_selection):
global current_batch
if image_filepath and doc_type_selection:
filename = os.path.basename(image_filepath)
current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
return batch_display_data, "Failed to add: Image or document type missing."
def process_batch_ui():
global current_batch
if not OPENROUTER_API_KEY:
return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
if not current_batch:
return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
all_results = []
status_updates = []
for i, item_to_process in enumerate(current_batch):
status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
print(status_msg)
extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
all_results.append(extracted_data)
if "error" in extracted_data:
status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
else:
status_updates.append(f"Successfully processed {item_to_process['filename']}.")
grouped_by_person = {}
unidentified_docs = []
for result_item in all_results:
doc_id = None
if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
fields = result_item["extracted_fields"]
passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
surname = fields.get("Surname") or fields.get("Family Name")
dob = fields.get("Date of Birth") or fields.get("DOB")
if passport_no:
doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
elif name and surname and dob:
doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
elif name and surname:
doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
if doc_id:
if doc_id not in grouped_by_person:
grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
grouped_by_person[doc_id]["documents"].append(result_item)
else:
unidentified_docs.append(result_item)
final_structured_output = {
"summary": f"Processed {len(current_batch)} documents.",
"grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [],
"unidentified_documents_or_errors": unidentified_docs
}
final_status = "Batch processing complete. " + " | ".join(status_updates)
print(final_status)
return final_structured_output, final_status
def clear_batch_ui():
global current_batch
current_batch = []
return [], "Batch cleared successfully."
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# π Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
gr.Markdown(
"**Instructions:**\n"
"1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n"
"2. Select the correct document type.\n"
"3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n"
"4. Review the batch. Click 'Clear Entire Batch' to start over.\n"
"5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
"6. View the extracted information in JSON format below."
)
if not OPENROUTER_API_KEY:
gr.Markdown(
"<h3 style='color:red;'>β οΈ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
"API calls will fail. Please set it and restart this application.</h3>"
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Step 1: Add Document")
image_input = gr.Image(
label="Upload Document Image",
type="filepath",
sources=["upload"],
height=300
)
doc_type_choices = [
'passport_front', 'passport_back', 'national_id_front', 'national_id_back',
'drivers_license_front', 'drivers_license_back', 'visa_sticker',
'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document'
]
doc_type_input = gr.Dropdown(
label="Select Document Type",
choices=doc_type_choices,
value='passport_front',
filterable=True
)
add_button = gr.Button("β Add Document to Current Batch", variant="secondary")
with gr.Column(scale=2):
gr.Markdown("### Step 2: Review Current Batch")
batch_dataframe = gr.Dataframe(
headers=["Filename", "Document Type"],
datatype=["str", "str"],
row_count=1, # Changed: Start with 1 row, should grow dynamically
col_count=2, # Changed: Simpler integer for fixed columns
wrap=True
)
clear_batch_button = gr.Button("ποΈ Clear Entire Batch", variant="stop")
gr.Markdown("### Step 3: Process Batch")
process_button = gr.Button("π Process Batch and Extract Information", variant="primary")
status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
gr.Markdown("### Step 4: View Results")
output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
add_button.click(
fn=add_document_to_batch_ui,
inputs=[image_input, doc_type_input],
outputs=[batch_dataframe, status_message_textbox]
).then(lambda: None, outputs=image_input)
clear_batch_button.click(
fn=clear_batch_ui,
inputs=[],
outputs=[batch_dataframe, status_message_textbox]
)
process_button.click(
fn=process_batch_ui,
inputs=[],
outputs=[output_json_display, status_message_textbox]
)
if __name__ == "__main__":
if not OPENROUTER_API_KEY:
print("ERROR: The OPENROUTER_API_KEY environment variable is not set.")
print("Please set it before running the application, e.g.:")
print(" export OPENROUTER_API_KEY='your_openrouter_key_here'")
print("The application will launch, but API calls will fail.")
demo.launch(share=True) # Added share=True |