adil9858 commited on
Commit
d4664d1
·
verified ·
1 Parent(s): 34b887b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -374
app.py CHANGED
@@ -6,82 +6,43 @@ import io
6
  import fitz # PyMuPDF
7
  import tempfile
8
  import os
9
- import shutil # Added for cleaning up temp dirs
10
 
11
  # --- OPENAI CLIENT SETUP ---
12
- # Use environment variable or textbox for API key for better security in deployed apps
13
- # client = OpenAI(
14
- # base_url="https://openrouter.ai/api/v1",
15
- # api_key=os.getenv("OPENROUTER_API_KEY") # Recommended approach
16
- # )
17
- # For this example, we'll get the key from the input field
18
 
19
- def get_openai_client(api_key):
20
- """Initializes and returns the OpenAI client."""
21
- if not api_key:
22
- # Handle case where API key is missing (though Gradio will likely prevent this)
23
- raise ValueError("API key is required.")
24
-
25
- return OpenAI(
26
- base_url="https://openrouter.ai/api/v1",
27
- api_key=api_key
28
- )
29
-
30
- def convert_pdf_to_images(pdf_path):
31
- """Convert PDF file path to list of PIL Images and return the images,
32
- and a list of temporary image file paths."""
33
  images = []
34
- temp_image_paths = []
35
- temp_dir = None
36
  try:
37
- pdf_document = fitz.open(pdf_path)
38
- num_pages = len(pdf_document)
39
-
40
- # Create a temporary directory for images
41
- temp_dir = tempfile.mkdtemp()
42
-
43
- for page_num in range(num_pages):
44
  page = pdf_document.load_page(page_num)
45
- # Render at a higher DPI for better clarity for VLM
46
- pix = page.get_pixmap(dpi=300)
47
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
48
  images.append(img)
49
-
50
- # Save image to temp directory for Gradio preview/processing later
51
- temp_img_path = os.path.join(temp_dir, f"page_{page_num+1}.png")
52
- img.save(temp_img_path, format="PNG")
53
- temp_image_paths.append(temp_img_path)
54
 
55
  pdf_document.close()
56
-
57
- return images, temp_image_paths, num_pages, temp_dir
58
-
59
  except Exception as e:
60
- print(f"Error converting PDF: {e}")
61
- # Clean up temp dir if it was created
62
- if temp_dir and os.path.exists(temp_dir):
63
- shutil.rmtree(temp_dir)
64
- return [], [], 0, None
65
 
66
  def image_to_base64(image):
67
  """Convert PIL Image to base64 string"""
68
- # Ensure image is RGB (some images might be RGBA, etc.)
69
- if image.mode != 'RGB':
70
- image = image.convert('RGB')
71
-
72
  with io.BytesIO() as buffer:
73
- # Using PNG as it's lossless and well-supported
74
  image.save(buffer, format="PNG")
75
  return base64.b64encode(buffer.getvalue()).decode("utf-8")
76
 
77
- def generate_summary(extracted_texts, api_key):
78
  """Generate a comprehensive summary of all extracted texts"""
79
- if not extracted_texts:
80
- return "No content extracted to summarize."
81
-
82
  try:
83
- client = get_openai_client(api_key)
84
-
85
  summary_prompt = f"""
86
  You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
87
  Please provide a comprehensive, detailed summary that:
@@ -91,357 +52,145 @@ def generate_summary(extracted_texts, api_key):
91
  4. Presents the information in a clear, structured format
92
 
93
  Extracted contents from pages:
94
- ---
95
  {extracted_texts}
96
- ---
97
 
98
  Comprehensive Summary:
99
  """
100
-
101
  response = client.chat.completions.create(
102
- model="opengvlab/internvl3-14b:free", # Ensure this model is available via OpenRouter
103
  messages=[
104
  {"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
105
  {"role": "user", "content": summary_prompt}
106
  ],
107
- max_tokens=2048 # Adjust as needed
108
  )
109
-
110
  return response.choices[0].message.content
111
-
112
  except Exception as e:
113
- print(f"Error generating summary: {e}")
114
  return f"Error generating summary: {e}"
115
 
116
- # --- Gradio App Functions ---
 
 
117
 
118
- def process_upload(file_obj):
119
- """Handle file upload - converts PDF, prepares image previews, and updates state."""
120
- if file_obj is None:
121
- # Clear outputs
122
- return None, None, [], [], "Please upload a document.", None, None, None
123
 
124
- file_path = file_obj.name # Gradio's File component provides a path
125
- file_type = file_obj.orig_name.split('.')[-1].lower() # Get extension from original name
126
 
127
- if file_type == "pdf":
128
- images, temp_paths, num_pages, temp_dir = convert_pdf_to_images(file_path)
129
- if not images:
130
- return None, None, [], [], "Failed to convert PDF to images.", None, None, None
131
-
132
- page_options = [f"Page {i}" for i in range(1, num_pages + 1)]
133
- # By default select all pages
134
- default_selection = page_options
135
-
136
- # Store original PIL images and temp dir in state
137
- # State will hold (list of PIL images, list of temp file paths, temp directory path)
138
- images_state = (images, temp_paths, temp_dir)
139
-
140
- status = f"PDF uploaded. {num_pages} pages detected. Select pages to analyze."
141
- # Return selected pages (as names), image previews (as paths), page options, status
142
- return images_state, default_selection, temp_paths, page_options, status, None, None, None # Also return None for results and summary
143
 
144
- elif file_type in ["jpg", "jpeg", "png"]:
145
  try:
146
- img = Image.open(file_path)
147
- # Ensure it's RGB
148
- if img.mode != 'RGB':
149
- img = img.convert('RGB')
150
-
151
- # Save to a temp file for Gradio preview
152
- temp_dir = tempfile.mkdtemp()
153
- temp_img_path = os.path.join(temp_dir, "uploaded_image.png")
154
- img.save(temp_img_path, format="PNG")
155
-
156
- # Store original PIL image and temp dir in state
157
- # State will hold (list of PIL images, list of temp file paths, temp directory path)
158
- images_state = ([img], [temp_img_path], temp_dir)
159
-
160
- status = "Image uploaded."
161
- # Return empty selection/options for image, but provide the single image path for preview
162
- return images_state, [], [temp_img_path], [], status, None, None, None # Also return None for results and summary
 
 
 
 
163
 
164
  except Exception as e:
165
- print(f"Error loading image: {e}")
166
- # Clean up temp dir if created
167
- if temp_dir and os.path.exists(temp_dir):
168
- shutil.rmtree(temp_dir)
169
- return None, None, [], [], f"Failed to load image: {e}", None, None, None
 
 
 
 
 
 
 
170
  else:
171
- return None, None, [], [], "Unsupported file type. Please upload JPG, PNG, or PDF.", None, None, None
172
-
173
- def analyze_document(api_key, user_prompt, images_state, selected_page_names):
174
- """Analyze selected images using the VLM and generate summary."""
175
- if not api_key:
176
- return None, None, "Please enter your Open Router API Key."
177
-
178
- if not images_state or not images_state[0]: # Check if images_state exists and contains images
179
- return None, None, "No document uploaded or converted."
180
-
181
- all_pil_images = images_state[0]
182
- temp_dir = images_state[2] # Get the temp directory path
183
-
184
- images_to_analyze = []
185
- extracted_texts = []
186
- all_results = []
187
-
188
- # Determine which images to process based on selection (or process all if image file)
189
- if selected_page_names: # This indicates PDF and pages were selected
190
- selected_indices = [int(name.split(" ")[1]) - 1 for name in selected_page_names]
191
- images_to_analyze = [(idx + 1, all_pil_images[idx]) for idx in selected_indices if idx < len(all_pil_images)]
192
- elif all_pil_images: # This indicates a single image file
193
- images_to_analyze = [(1, all_pil_images[0])]
194
-
195
- if not images_to_analyze:
196
- # Clean up temp dir as analysis failed or no pages selected
197
- if temp_dir and os.path.exists(temp_dir):
198
- shutil.rmtree(temp_dir)
199
- return None, None, "No pages selected for analysis."
200
-
201
-
202
- try:
203
- client = get_openai_client(api_key)
204
-
205
- for page_num, image in images_to_analyze:
206
- status_message = f"Analyzing page {page_num}..."
207
- yield None, None, status_message # Update status message during processing
208
-
209
- try:
210
- image_base64_data = image_to_base64(image)
211
-
212
- response = client.chat.completions.create(
213
- model="opengvlab/internvl3-14b:free", # Ensure this model is available via OpenRouter
214
- messages=[
215
- {"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
216
- {"role": "user", "content": [
217
- {"type": "text", "text": user_prompt},
218
- {"type": "image_url", "image_url": {
219
- "url": f"data:image/png;base64,{image_base64_data}"
220
- }}
221
- ]}
222
- ],
223
- max_tokens=1024 # Adjust as needed
224
- )
225
-
226
- result = response.choices[0].message.content
227
- extracted_texts.append(f"=== Page {page_num} ===\n{result}\n")
228
-
229
- if len(images_to_analyze) > 1:
230
- all_results.append(f"### 📄 Page {page_num} Result:")
231
- else:
232
- all_results.append("### ✅ Analysis Result:")
233
- all_results.append(result)
234
- all_results.append("---")
235
-
236
- except Exception as e:
237
- error_msg = f"An error occurred analyzing page {page_num}: {e}"
238
- print(error_msg)
239
- all_results.append(f"### ❌ Error on Page {page_num}:")
240
- all_results.append(error_msg)
241
- all_results.append("---")
242
- # Don't stop, try other pages
243
-
244
- # Combine individual results
245
- individual_results_markdown = "\n".join(all_results) if all_results else "No results generated."
246
-
247
- # Generate and display comprehensive summary if multiple pages were processed
248
- summary_text = ""
249
- if len(images_to_analyze) > 1 and extracted_texts:
250
- yield individual_results_markdown, None, "Generating comprehensive summary..."
251
- full_extracted_text = "\n".join(extracted_texts)
252
- summary_text = generate_summary(full_extracted_text, api_key)
253
- status_message = "Analysis complete. Summary generated."
254
- elif extracted_texts: # Single page case
255
- summary_text = "Summary not generated for single page analysis. See analysis result above."
256
- status_message = "Analysis complete."
257
- else:
258
- summary_text = "No content extracted for summary."
259
- status_message = "Analysis complete, but no text extracted."
260
-
261
- # Clean up the temporary directory used for images
262
- if temp_dir and os.path.exists(temp_dir):
263
- shutil.rmtree(temp_dir)
264
-
265
- return individual_results_markdown, summary_text, status_message
266
-
267
- except Exception as e:
268
- # Clean up the temporary directory in case of error
269
- if temp_dir and os.path.exists(temp_dir):
270
- shutil.rmtree(temp_dir)
271
-
272
- error_msg = f"An unhandled error occurred during analysis: {e}"
273
- print(error_msg)
274
- return None, None, error_msg
275
-
276
-
277
- # Function to clean up temp dir when session ends or is closed
278
- def clean_temp_dir(temp_dir):
279
- if temp_dir and os.path.exists(temp_dir):
280
- print(f"Cleaning up temporary directory: {temp_dir}")
281
- shutil.rmtree(temp_dir)
282
-
283
 
284
- # --- Gradio Interface Layout ---
 
 
285
 
286
- # Custom CSS (simplified from Streamlit CSS)
287
- css = """
288
- body {
289
- font-family: 'Inter', sans-serif;
290
- }
291
- .gradio-container {
292
- max-width: 800px !important;
293
- margin: auto;
294
- padding: 20px;
295
- background-color: #f9fafb; /* Light gray background */
296
- }
297
- h1, h2, h3, h4 {
298
- color: #111827; /* Darker text for headers */
299
- }
300
- .subtitle {
301
- font-size: 1rem;
302
- color: #6b7280; /* Gray text for subtitle */
303
- margin-bottom: 2rem;
304
- }
305
- .summary-box {
306
- background-color: #e0f2fe; /* Light blue background */
307
- padding: 1.5rem;
308
- border-radius: 8px;
309
- margin-top: 1rem; /* Reduced margin-top */
310
- border: 1px solid #bfdbfe; /* Light blue border */
311
- }
312
- .summary-box p {
313
- margin: 0; /* Remove paragraph margin */
314
- }
315
- .file-upload-label .wrap {
316
- text-align: center !important;
317
- }
318
- .gr-button {
319
- margin-top: 1rem !important;
320
- }
321
- /* Style for the status message */
322
- #status_message_id {
323
- margin-top: 1rem;
324
- font-weight: bold;
325
- color: #1f2937;
326
- }
327
- """
328
 
329
- with gr.Blocks(css=css, title="DocSum - Document Summarizer", theme=gr.themes.Soft()) as demo:
330
-
331
- # State to hold images and temp paths after PDF conversion
332
- # Structure: (list of PIL images, list of temp file paths for preview/analysis, temp directory path)
333
- images_state = gr.State(None)
334
- # State to hold the temp dir path for cleanup
335
- current_temp_dir = gr.State(None)
 
 
 
 
 
 
336
 
337
- gr.HTML("""
338
- <div style="text-align: center;">
339
- <img src='https://raw.githubusercontent.com/KoshurAI/DocSum/main/blob.png' width='100'>
340
- <h1>DocSum</h1>
341
- <p class="subtitle">Document Summarizer Powered by VLM • Developed by <a href="https://koshurai.com" target="_blank">Koshur AI</a></p>
342
- </div>
343
  """)
344
 
345
  with gr.Row():
346
- user_prompt_input = gr.Textbox(
347
- label="📝 Enter Your Prompt",
348
- value="Extract all content structurally",
349
- lines=2,
350
- interactive=True,
351
- container=True,
352
- scale=2
353
- )
354
- api_key_input = gr.Textbox(
355
- label="🔒 OpenRouter API Key",
356
- type="password",
357
- interactive=True,
358
- container=True,
359
- scale=1,
360
- info="Your key is not stored."
361
- # Consider adding value=os.getenv("OPENROUTER_API_KEY", "") for easier local testing
362
- )
363
-
364
- file_upload = gr.File(
365
- label="Upload a document (JPG/PNG/PDF)",
366
- file_types=[".jpg", ".jpeg", ".png", ".pdf"],
367
- interactive=True
368
- )
369
-
370
- # Components for PDF page selection and preview (initially hidden)
371
- page_selector = gr.Checkboxgroup(
372
- label="Select PDF Pages to Analyze",
373
- choices=[],
374
- value=[],
375
- visible=False,
376
- interactive=True
377
- )
378
- preview_gallery = gr.Gallery(
379
- label="Selected Page Previews",
380
- visible=False,
381
- container=True,
382
- preview=True, # Show previews
383
- columns=3,
384
- rows=1,
385
- object_fit="contain",
386
- height="auto"
387
- )
388
-
389
- status_message = gr.Markdown(elem_id="status_message_id") # Use a Markdown element for status updates
390
-
391
- analyze_button = gr.Button("🔍 Analyze Document")
392
-
393
- # Outputs
394
- individual_results_output = gr.Markdown(label="Page-by-Page Analysis Results")
395
- summary_output = gr.Markdown(label="Comprehensive Document Summary", elem_classes="summary-box") # Apply CSS class
396
 
397
- # --- Event Handling ---
398
 
399
- # When a file is uploaded, process it (convert PDF, show previews, update state)
400
- file_upload.change(
401
- fn=process_upload,
402
- inputs=[file_upload],
403
- outputs=[images_state, page_selector, preview_gallery, page_selector.choices, status_message, individual_results_output, summary_output, current_temp_dir],
404
- show_progress=True # Show Gradio's built-in progress indicator
405
- )
406
-
407
- # When page selection changes (for PDF), update the preview gallery
408
- # Note: This requires saving the temp image paths in the state from process_upload
409
- page_selector.change(
410
- fn=lambda selected_pages, images_state: [images_state[1][int(name.split(" ")[1]) - 1] for name in selected_pages] if images_state and images_state[1] else [],
411
- inputs=[page_selector, images_state],
412
- outputs=[preview_gallery],
413
- show_progress=False # No need for progress bar here
414
- ).then( # Chain another event to update status message
415
- fn=lambda num_selected: f"{num_selected} pages selected." if num_selected > 0 else "No pages selected.",
416
- inputs=[page_selector],
417
- outputs=[status_message],
418
- show_progress=False
419
- )
420
 
 
 
 
421
 
422
- # When the Analyze button is clicked, run the analysis function
423
- analyze_button.click(
424
- fn=analyze_document,
425
- inputs=[api_key_input, user_prompt_input, images_state, page_selector],
426
- outputs=[individual_results_output, summary_output, status_message],
427
- show_progress=False # We handle progress manually with status_message yield
428
- )
429
-
430
- # --- Footer ---
431
- gr.HTML("<footer style='text-align: center; margin-top: 3rem; color: #9ca3af; font-size: 0.875rem;'>© 2025 Koshur AI. All rights reserved.</footer>")
432
 
433
- # Clean up temp directory when the Gradio app finishes or encounters a critical error
434
- # Note: This might not catch all termination scenarios, especially if the server crashes unexpectedly.
435
- # A more robust solution for production might involve monitoring temp dirs periodically.
436
- # Using demo.load() to clean up at startup and demo.close() to clean up at exit.
437
- demo.load(fn=lambda: clean_temp_dir(current_temp_dir.value), inputs=[], outputs=[], every=10, show_progress=False) # Check & cleanup periodically (adjust interval)
438
- # The close event handler is tricky for cleanup; rely more on periodic check or OS cleanup.
439
 
440
- # --- Launch App ---
441
- if __name__ == "__main__":
442
- # The share=True option creates a public URL (useful for testing)
443
- # The debug=True option provides more detailed error messages
444
- demo.launch(share=False, debug=True)
445
 
446
- # You might want to add cleanup here if running locally and not sharing
447
- # clean_temp_dir(current_temp_dir.value) # This won't run if the app is killed externally
 
6
  import fitz # PyMuPDF
7
  import tempfile
8
  import os
 
9
 
10
  # --- OPENAI CLIENT SETUP ---
11
+ client = OpenAI(
12
+ base_url="https://openrouter.ai/api/v1",
13
+ api_key='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'
14
+ )
 
 
15
 
16
+ def convert_pdf_to_images(pdf_file):
17
+ """Convert PDF to list of PIL Images"""
 
 
 
 
 
 
 
 
 
 
 
 
18
  images = []
 
 
19
  try:
20
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
21
+ tmp_file.write(pdf_file.read())
22
+ tmp_file_path = tmp_file.name
23
+
24
+ pdf_document = fitz.open(tmp_file_path)
25
+ for page_num in range(len(pdf_document)):
 
26
  page = pdf_document.load_page(page_num)
27
+ pix = page.get_pixmap()
 
28
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
29
  images.append(img)
 
 
 
 
 
30
 
31
  pdf_document.close()
32
+ os.unlink(tmp_file_path)
 
 
33
  except Exception as e:
34
+ return f"Error converting PDF: {e}"
35
+ return images
 
 
 
36
 
37
  def image_to_base64(image):
38
  """Convert PIL Image to base64 string"""
 
 
 
 
39
  with io.BytesIO() as buffer:
 
40
  image.save(buffer, format="PNG")
41
  return base64.b64encode(buffer.getvalue()).decode("utf-8")
42
 
43
+ def generate_summary(extracted_texts):
44
  """Generate a comprehensive summary of all extracted texts"""
 
 
 
45
  try:
 
 
46
  summary_prompt = f"""
47
  You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
48
  Please provide a comprehensive, detailed summary that:
 
52
  4. Presents the information in a clear, structured format
53
 
54
  Extracted contents from pages:
 
55
  {extracted_texts}
 
56
 
57
  Comprehensive Summary:
58
  """
59
+
60
  response = client.chat.completions.create(
61
+ model="opengvlab/internvl3-14b:free",
62
  messages=[
63
  {"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
64
  {"role": "user", "content": summary_prompt}
65
  ],
66
+ max_tokens=2048
67
  )
68
+
69
  return response.choices[0].message.content
 
70
  except Exception as e:
 
71
  return f"Error generating summary: {e}"
72
 
73
+ def analyze_images(images, user_prompt, selected_pages=None):
74
+ if not images:
75
+ return "No images provided for analysis."
76
 
77
+ if isinstance(images, str): # error message
78
+ return images
 
 
 
79
 
80
+ if selected_pages is None:
81
+ selected_pages = list(range(1, len(images) + 1))
82
 
83
+ images_to_analyze = [images[i - 1] for i in selected_pages]
84
+ all_results = []
85
+ extracted_texts = []
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ for idx, image in enumerate(images_to_analyze, 1):
88
  try:
89
+ image_base64 = image_to_base64(image)
90
+
91
+ response = client.chat.completions.create(
92
+ model="opengvlab/internvl3-14b:free",
93
+ messages=[
94
+ {"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
95
+ {"role": "user", "content": [
96
+ {"type": "text", "text": user_prompt},
97
+ {"type": "image_url", "image_url": {
98
+ "url": f"data:image/png;base64,{image_base64}"
99
+ }}
100
+ ]}
101
+ ],
102
+ max_tokens=1024
103
+ )
104
+
105
+ result = response.choices[0].message.content
106
+ extracted_texts.append(f"=== Page {selected_pages[idx-1]} ===\n{result}\n")
107
+ all_results.append(f"### 📄 Page {selected_pages[idx-1]} Result:")
108
+ all_results.append(result)
109
+ all_results.append("---")
110
 
111
  except Exception as e:
112
+ all_results.append(f"An error occurred analyzing page {selected_pages[idx-1]}: {e}")
113
+
114
+ full_result = "\n".join(all_results)
115
+
116
+ if len(extracted_texts) > 1:
117
+ full_extracted_text = "\n".join(extracted_texts)
118
+ summary = generate_summary(full_extracted_text)
119
+ full_result += "\n\n## 📝 Comprehensive Document Summary\n"
120
+ full_result += summary
121
+ return full_result, summary
122
+ elif len(extracted_texts) == 1:
123
+ return full_result, None
124
  else:
125
+ return "No valid results generated.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ def process_input(file, user_prompt, page_numbers):
128
+ if file is None:
129
+ return "Please upload a file.", None
130
 
131
+ mime_type = file.type
132
+ images = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ if mime_type == "application/pdf":
135
+ images = convert_pdf_to_images(file)
136
+ if isinstance(images, str): # error message
137
+ return images, None
138
+ page_options = list(range(1, len(images) + 1))
139
+ if not page_numbers or len(page_numbers) == 0:
140
+ page_numbers = page_options
141
+ return analyze_images(images, user_prompt, page_numbers)
142
+ elif mime_type.startswith("image/"):
143
+ images = [Image.open(file)]
144
+ return analyze_images(images, user_prompt)
145
+ else:
146
+ return "Unsupported file type. Please upload a JPG/PNG/PDF.", None
147
 
148
+ # --- GRADIO INTERFACE ---
149
+ with gr.Blocks(title="DocSum - Document Summarizer") as demo:
150
+ gr.Markdown("""
151
+ <h1 style="text-align:center;">🧾 DocSum</h1>
152
+ <p style="text-align:center;">Document Summarizer Powered by VLM • Developed by <a href='https://koshurai.com' target='_blank'>Koshur AI</a></p>
 
153
  """)
154
 
155
  with gr.Row():
156
+ with gr.Column():
157
+ file_upload = gr.File(label="Upload a document (JPG/PNG/PDF)", file_types=[".jpg", ".jpeg", ".png", ".pdf"])
158
+ prompt = gr.Textbox(label="📝 Enter Your Prompt", value="Extract all content structurally")
159
+ page_selector = gr.CheckboxGroup(label="Select Pages (for PDFs only)", choices=[], visible=False)
160
+
161
+ def update_page_selector(file):
162
+ if file and file.type == "application/pdf":
163
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
164
+ tmp_file.write(file.read())
165
+ tmp_file_path = tmp_file.name
166
+ doc = fitz.open(tmp_file_path)
167
+ num_pages = len(doc)
168
+ doc.close()
169
+ os.unlink(tmp_file_path)
170
+ return gr.update(choices=list(range(1, num_pages + 1)), visible=True)
171
+ else:
172
+ return gr.update(choices=[], visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ file_upload.change(fn=update_page_selector, inputs=file_upload, outputs=page_selector)
175
 
176
+ submit_btn = gr.Button("🔍 Analyze Document")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ with gr.Column():
179
+ output_box = gr.Markdown(label="Analysis Output")
180
+ summary_download = gr.File(label="Download Summary", visible=False)
181
 
182
+ def handle_submit(file, prompt, pages):
183
+ result, summary = process_input(file, prompt, pages)
184
+ summary_file = None
185
+ if summary:
186
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as tmpfile:
187
+ tmpfile.write(summary)
188
+ summary_file = tmpfile.name
189
+ return result, summary_file
 
 
190
 
191
+ submit_btn.click(fn=handle_submit, inputs=[file_upload, prompt, page_selector], outputs=[output_box, summary_download])
 
 
 
 
 
192
 
193
+ gr.Markdown("<footer>© 2025 Koshur AI. All rights reserved.</footer>")
 
 
 
 
194
 
195
+ # Launch Gradio App
196
+ demo.launch()