Sandy2636 commited on
Commit
e08f157
Β·
1 Parent(s): b3819fe

Update space

Browse files
Files changed (2) hide show
  1. app.py +18 -88
  2. requirements.txt +4 -4
app.py CHANGED
@@ -6,24 +6,16 @@ import json
6
  import mimetypes
7
 
8
  # --- Configuration ---
9
- # IMPORTANT: Set your OPENROUTER_API_KEY as an environment variable
10
- # For example, in your terminal: export OPENROUTER_API_KEY='your_key_here'
11
- OPENROUTER_API_KEY = "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"
12
  IMAGE_MODEL = "opengvlab/internvl3-14b:free"
13
  OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
14
 
15
  # --- Application State ---
16
- # Global list to store documents in the current batch
17
- # Each item: {"path": "image_file_path", "type": "document_type_string", "filename": "display_filename"}
18
  current_batch = []
19
 
20
  # --- Helper Functions ---
21
 
22
  def generate_extraction_prompt(doc_type_provided_by_user):
23
- """
24
- Generates a detailed prompt for the LLM to extract information
25
- and structure it as a JSON object.
26
- """
27
  prompt = f"""You are an advanced OCR and information extraction AI.
28
  The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
29
  Your task is to meticulously analyze this image and extract all relevant information.
@@ -58,30 +50,21 @@ Ensure the entire output strictly adheres to the JSON format.
58
  return prompt
59
 
60
  def process_single_image_with_openrouter(image_path, doc_type):
61
- """
62
- Encodes an image, sends it to OpenRouter with a generated prompt,
63
- and attempts to parse the JSON response from the LLM.
64
- """
65
  if not OPENROUTER_API_KEY:
66
  return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
67
-
68
  try:
69
  with open(image_path, "rb") as f:
70
  encoded_image_bytes = f.read()
71
  encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
72
-
73
  mime_type, _ = mimetypes.guess_type(image_path)
74
  if not mime_type:
75
- # Fallback, try to infer from extension or default to common types
76
  ext = os.path.splitext(image_path)[1].lower()
77
  if ext == ".png": mime_type = "image/png"
78
- elif ext == ".jpg" or ext == ".jpeg": mime_type = "image/jpeg"
79
  elif ext == ".webp": mime_type = "image/webp"
80
- else: mime_type = "image/jpeg" # A common default
81
-
82
  data_url = f"data:{mime_type};base64,{encoded_image_string}"
83
  prompt_text = generate_extraction_prompt(doc_type)
84
-
85
  payload = {
86
  "model": IMAGE_MODEL,
87
  "messages": [
@@ -93,40 +76,31 @@ def process_single_image_with_openrouter(image_path, doc_type):
93
  ]
94
  }
95
  ],
96
- "max_tokens": 3000, # Increased for potentially large JSONs
97
- "temperature": 0.1, # Lower temperature for more deterministic output
98
- # "response_format": {"type": "json_object"}, # Uncomment if OpenRouter & model fully support this
99
- # for guaranteed JSON. Prompt is primary method now.
100
  }
101
-
102
  headers = {
103
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
104
  "Content-Type": "application/json",
105
- "HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME", # Optional: Replace with your app's URL
106
- "X-Title": "Gradio Document Extractor" # Optional: Replace with your app's name
107
  }
108
-
109
  print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
110
- response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120) # 120s timeout
111
- response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX)
112
  result = response.json()
113
  print(f"Received response from OpenRouter. Status: {response.status_code}")
114
-
115
  if "choices" in result and result["choices"]:
116
  content_text = result["choices"][0]["message"]["content"]
117
-
118
- # Try to clean up and parse JSON (models sometimes wrap in markdown)
119
  clean_content = content_text.strip()
120
  if clean_content.startswith("```json"):
121
  clean_content = clean_content[7:]
122
  if clean_content.endswith("```"):
123
  clean_content = clean_content[:-3]
124
- elif clean_content.startswith("`") and clean_content.endswith("`"): # Single backtick
125
  clean_content = clean_content[1:-1]
126
-
127
  try:
128
  parsed_json = json.loads(clean_content)
129
- # Ensure document_type_provided is in the root, even if LLM missed it
130
  if "document_type_provided" not in parsed_json:
131
  parsed_json["document_type_provided"] = doc_type
132
  return parsed_json
@@ -140,7 +114,6 @@ def process_single_image_with_openrouter(image_path, doc_type):
140
  else:
141
  print(f"No 'choices' in API response: {result}")
142
  return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
143
-
144
  except requests.exceptions.Timeout:
145
  print(f"API Request Timeout for {os.path.basename(image_path)}")
146
  return {"error": "API request timed out.", "document_type_provided": doc_type}
@@ -154,100 +127,69 @@ def process_single_image_with_openrouter(image_path, doc_type):
154
  print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
155
  return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
156
 
157
- # --- Gradio Interface Callbacks ---
158
-
159
  def add_document_to_batch_ui(image_filepath, doc_type_selection):
160
- """Adds an uploaded image and its type to the current batch state."""
161
  global current_batch
162
  if image_filepath and doc_type_selection:
163
  filename = os.path.basename(image_filepath)
164
- # Note: image_filepath is a temporary path from Gradio.
165
- # It should be used relatively quickly. For long-lived state,
166
- # you might copy the file or read its content.
167
  current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
168
-
169
- # Prepare display for Dataframe: list of lists
170
  batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
171
  return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
172
-
173
- # Return current state if inputs are invalid
174
  batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
175
  return batch_display_data, "Failed to add: Image or document type missing."
176
 
177
-
178
  def process_batch_ui():
179
- """Processes all documents in the current batch and returns combined JSON results."""
180
  global current_batch
181
  if not OPENROUTER_API_KEY:
182
  return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
183
-
184
  if not current_batch:
185
  return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
186
-
187
  all_results = []
188
  status_updates = []
189
-
190
  for i, item_to_process in enumerate(current_batch):
191
  status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
192
  print(status_msg)
193
- # yield None, status_msg # This would require process_batch_ui to be a generator for live updates
194
-
195
  extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
196
  all_results.append(extracted_data)
197
  if "error" in extracted_data:
198
  status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
199
  else:
200
  status_updates.append(f"Successfully processed {item_to_process['filename']}.")
201
-
202
- # Attempt to group results by person (heuristic)
203
- # This is a basic grouping; more sophisticated logic could be added.
204
  grouped_by_person = {}
205
  unidentified_docs = []
206
-
207
  for result_item in all_results:
208
  doc_id = None
209
  if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
210
  fields = result_item["extracted_fields"]
211
- # Try common identifiers
212
  passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
213
  name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
214
  surname = fields.get("Surname") or fields.get("Family Name")
215
  dob = fields.get("Date of Birth") or fields.get("DOB")
216
-
217
  if passport_no:
218
  doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
219
  elif name and surname and dob:
220
  doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
221
  elif name and surname:
222
  doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
223
-
224
-
225
  if doc_id:
226
  if doc_id not in grouped_by_person:
227
  grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
228
  grouped_by_person[doc_id]["documents"].append(result_item)
229
  else:
230
  unidentified_docs.append(result_item)
231
-
232
  final_structured_output = {
233
  "summary": f"Processed {len(current_batch)} documents.",
234
- "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], # Convert dict to list for easier iteration in JSON
235
  "unidentified_documents_or_errors": unidentified_docs
236
  }
237
-
238
  final_status = "Batch processing complete. " + " | ".join(status_updates)
239
  print(final_status)
240
- return final_structured_output, final_status # Output JSON and status message
241
-
242
 
243
  def clear_batch_ui():
244
- """Clears the current batch and updates the UI."""
245
  global current_batch
246
  current_batch = []
247
- return [], "Batch cleared successfully." # Cleared dataframe and status message
248
 
249
-
250
- # --- Gradio UI Layout ---
251
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
252
  gr.Markdown("# πŸ“„ Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
253
  gr.Markdown(
@@ -259,19 +201,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
259
  "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
260
  "6. View the extracted information in JSON format below."
261
  )
262
-
263
  if not OPENROUTER_API_KEY:
264
  gr.Markdown(
265
  "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
266
  "API calls will fail. Please set it and restart this application.</h3>"
267
  )
268
-
269
  with gr.Row():
270
  with gr.Column(scale=1):
271
  gr.Markdown("### Step 1: Add Document")
272
  image_input = gr.Image(
273
  label="Upload Document Image",
274
- type="filepath", # 'filepath' gives a temporary path to the uploaded file
275
  sources=["upload"],
276
  height=300
277
  )
@@ -287,40 +227,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
287
  filterable=True
288
  )
289
  add_button = gr.Button("βž• Add Document to Current Batch", variant="secondary")
290
-
291
  with gr.Column(scale=2):
292
  gr.Markdown("### Step 2: Review Current Batch")
293
  batch_dataframe = gr.Dataframe(
294
  headers=["Filename", "Document Type"],
295
  datatype=["str", "str"],
296
- row_count=(0, "dynamic"),
297
- col_count=(2, "fixed"),
298
  wrap=True
299
- # Removed: height=380 from here
300
  )
301
  clear_batch_button = gr.Button("πŸ—‘οΈ Clear Entire Batch", variant="stop")
302
-
303
  gr.Markdown("### Step 3: Process Batch")
304
  process_button = gr.Button("πŸš€ Process Batch and Extract Information", variant="primary")
305
-
306
  status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
307
-
308
  gr.Markdown("### Step 4: View Results")
309
  output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
310
-
311
- # --- Connect UI elements to functions ---
312
  add_button.click(
313
  fn=add_document_to_batch_ui,
314
  inputs=[image_input, doc_type_input],
315
  outputs=[batch_dataframe, status_message_textbox]
316
- ).then(lambda: None, outputs=image_input) # Clear image input after adding
317
-
318
  clear_batch_button.click(
319
  fn=clear_batch_ui,
320
  inputs=[],
321
  outputs=[batch_dataframe, status_message_textbox]
322
  )
323
-
324
  process_button.click(
325
  fn=process_batch_ui,
326
  inputs=[],
@@ -333,5 +264,4 @@ if __name__ == "__main__":
333
  print("Please set it before running the application, e.g.:")
334
  print(" export OPENROUTER_API_KEY='your_openrouter_key_here'")
335
  print("The application will launch, but API calls will fail.")
336
-
337
- demo.launch()
 
6
  import mimetypes
7
 
8
  # --- Configuration ---
9
+ OPENROUTER_API_KEY = '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'
 
 
10
  IMAGE_MODEL = "opengvlab/internvl3-14b:free"
11
  OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
12
 
13
  # --- Application State ---
 
 
14
  current_batch = []
15
 
16
  # --- Helper Functions ---
17
 
18
  def generate_extraction_prompt(doc_type_provided_by_user):
 
 
 
 
19
  prompt = f"""You are an advanced OCR and information extraction AI.
20
  The user has provided an image and identified it as a '{doc_type_provided_by_user}'.
21
  Your task is to meticulously analyze this image and extract all relevant information.
 
50
  return prompt
51
 
52
  def process_single_image_with_openrouter(image_path, doc_type):
 
 
 
 
53
  if not OPENROUTER_API_KEY:
54
  return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type}
 
55
  try:
56
  with open(image_path, "rb") as f:
57
  encoded_image_bytes = f.read()
58
  encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8")
 
59
  mime_type, _ = mimetypes.guess_type(image_path)
60
  if not mime_type:
 
61
  ext = os.path.splitext(image_path)[1].lower()
62
  if ext == ".png": mime_type = "image/png"
63
+ elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg"
64
  elif ext == ".webp": mime_type = "image/webp"
65
+ else: mime_type = "image/jpeg"
 
66
  data_url = f"data:{mime_type};base64,{encoded_image_string}"
67
  prompt_text = generate_extraction_prompt(doc_type)
 
68
  payload = {
69
  "model": IMAGE_MODEL,
70
  "messages": [
 
76
  ]
77
  }
78
  ],
79
+ "max_tokens": 3000,
80
+ "temperature": 0.1,
 
 
81
  }
 
82
  headers = {
83
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
84
  "Content-Type": "application/json",
85
+ "HTTP-Referer": "https://huggingface.co/spaces/YOUR_SPACE_NAME",
86
+ "X-Title": "Gradio Document Extractor"
87
  }
 
88
  print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}")
89
+ response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120)
90
+ response.raise_for_status()
91
  result = response.json()
92
  print(f"Received response from OpenRouter. Status: {response.status_code}")
 
93
  if "choices" in result and result["choices"]:
94
  content_text = result["choices"][0]["message"]["content"]
 
 
95
  clean_content = content_text.strip()
96
  if clean_content.startswith("```json"):
97
  clean_content = clean_content[7:]
98
  if clean_content.endswith("```"):
99
  clean_content = clean_content[:-3]
100
+ elif clean_content.startswith("`") and clean_content.endswith("`"):
101
  clean_content = clean_content[1:-1]
 
102
  try:
103
  parsed_json = json.loads(clean_content)
 
104
  if "document_type_provided" not in parsed_json:
105
  parsed_json["document_type_provided"] = doc_type
106
  return parsed_json
 
114
  else:
115
  print(f"No 'choices' in API response: {result}")
116
  return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type}
 
117
  except requests.exceptions.Timeout:
118
  print(f"API Request Timeout for {os.path.basename(image_path)}")
119
  return {"error": "API request timed out.", "document_type_provided": doc_type}
 
127
  print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}")
128
  return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type}
129
 
 
 
130
  def add_document_to_batch_ui(image_filepath, doc_type_selection):
 
131
  global current_batch
132
  if image_filepath and doc_type_selection:
133
  filename = os.path.basename(image_filepath)
 
 
 
134
  current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename})
 
 
135
  batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
136
  return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'."
 
 
137
  batch_display_data = [[item["filename"], item["type"]] for item in current_batch]
138
  return batch_display_data, "Failed to add: Image or document type missing."
139
 
 
140
  def process_batch_ui():
 
141
  global current_batch
142
  if not OPENROUTER_API_KEY:
143
  return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing."
 
144
  if not current_batch:
145
  return {"message": "Batch is empty. Add documents first."}, "Batch is empty."
 
146
  all_results = []
147
  status_updates = []
 
148
  for i, item_to_process in enumerate(current_batch):
149
  status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..."
150
  print(status_msg)
 
 
151
  extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"])
152
  all_results.append(extracted_data)
153
  if "error" in extracted_data:
154
  status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}")
155
  else:
156
  status_updates.append(f"Successfully processed {item_to_process['filename']}.")
 
 
 
157
  grouped_by_person = {}
158
  unidentified_docs = []
 
159
  for result_item in all_results:
160
  doc_id = None
161
  if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict):
162
  fields = result_item["extracted_fields"]
 
163
  passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number")
164
  name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name")
165
  surname = fields.get("Surname") or fields.get("Family Name")
166
  dob = fields.get("Date of Birth") or fields.get("DOB")
 
167
  if passport_no:
168
  doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}"
169
  elif name and surname and dob:
170
  doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}"
171
  elif name and surname:
172
  doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}"
 
 
173
  if doc_id:
174
  if doc_id not in grouped_by_person:
175
  grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []}
176
  grouped_by_person[doc_id]["documents"].append(result_item)
177
  else:
178
  unidentified_docs.append(result_item)
 
179
  final_structured_output = {
180
  "summary": f"Processed {len(current_batch)} documents.",
181
+ "grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [],
182
  "unidentified_documents_or_errors": unidentified_docs
183
  }
 
184
  final_status = "Batch processing complete. " + " | ".join(status_updates)
185
  print(final_status)
186
+ return final_structured_output, final_status
 
187
 
188
  def clear_batch_ui():
 
189
  global current_batch
190
  current_batch = []
191
+ return [], "Batch cleared successfully."
192
 
 
 
193
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
194
  gr.Markdown("# πŸ“„ Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)")
195
  gr.Markdown(
 
201
  "5. Click 'Process Batch and Extract Information' to send documents to the AI.\n"
202
  "6. View the extracted information in JSON format below."
203
  )
 
204
  if not OPENROUTER_API_KEY:
205
  gr.Markdown(
206
  "<h3 style='color:red;'>⚠️ Warning: `OPENROUTER_API_KEY` environment variable is not detected. "
207
  "API calls will fail. Please set it and restart this application.</h3>"
208
  )
 
209
  with gr.Row():
210
  with gr.Column(scale=1):
211
  gr.Markdown("### Step 1: Add Document")
212
  image_input = gr.Image(
213
  label="Upload Document Image",
214
+ type="filepath",
215
  sources=["upload"],
216
  height=300
217
  )
 
227
  filterable=True
228
  )
229
  add_button = gr.Button("βž• Add Document to Current Batch", variant="secondary")
 
230
  with gr.Column(scale=2):
231
  gr.Markdown("### Step 2: Review Current Batch")
232
  batch_dataframe = gr.Dataframe(
233
  headers=["Filename", "Document Type"],
234
  datatype=["str", "str"],
235
+ row_count=1, # Changed: Start with 1 row, should grow dynamically
236
+ col_count=2, # Changed: Simpler integer for fixed columns
237
  wrap=True
 
238
  )
239
  clear_batch_button = gr.Button("πŸ—‘οΈ Clear Entire Batch", variant="stop")
 
240
  gr.Markdown("### Step 3: Process Batch")
241
  process_button = gr.Button("πŸš€ Process Batch and Extract Information", variant="primary")
 
242
  status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2)
 
243
  gr.Markdown("### Step 4: View Results")
244
  output_json_display = gr.JSON(label="Extracted Information (JSON Format)")
 
 
245
  add_button.click(
246
  fn=add_document_to_batch_ui,
247
  inputs=[image_input, doc_type_input],
248
  outputs=[batch_dataframe, status_message_textbox]
249
+ ).then(lambda: None, outputs=image_input)
 
250
  clear_batch_button.click(
251
  fn=clear_batch_ui,
252
  inputs=[],
253
  outputs=[batch_dataframe, status_message_textbox]
254
  )
 
255
  process_button.click(
256
  fn=process_batch_ui,
257
  inputs=[],
 
264
  print("Please set it before running the application, e.g.:")
265
  print(" export OPENROUTER_API_KEY='your_openrouter_key_here'")
266
  print("The application will launch, but API calls will fail.")
267
+ demo.launch(share=True) # Added share=True
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==3.50.2
2
- requests
3
- python-dotenv
4
- Pillow
 
1
+ gradio~=3.50.2
2
+ requests>=2.25.0,<3.0.0
3
+ # pillow might be needed explicitly if not pulled by gradio for image handling
4
+ Pillow>=9.0.0