Daemontatox commited on
Commit
78af081
·
verified ·
1 Parent(s): 7af3ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +337 -481
app.py CHANGED
@@ -1,38 +1,40 @@
1
- from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer , AutoModel,Qwen2VLForConditionalGeneration, AutoModelForImageTextToText , Qwen2_5_VLForConditionalGeneration
2
- from qwen_vl_utils import process_vision_info
3
- from PIL import Image
4
- import requests
5
- import torch
6
- from threading import Thread
7
- import gradio as gr
8
- from gradio import FileData
9
- import time
10
- import spaces
11
- import fitz # PyMuPDF
12
  import io
13
- import numpy as np
14
  import logging
 
 
 
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
- # Load model and processor
21
- ckpt ="Qwen/Qwen2.5-VL-7B-Instruct"
22
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(ckpt, torch_dtype=torch.bfloat16,trust_remote_code=True).to("cuda")
23
- processor = AutoProcessor.from_pretrained(ckpt,trust_remote_code=True)
 
 
 
 
 
24
 
 
 
 
25
  class DocumentState:
26
  def __init__(self):
27
  self.current_doc_images = []
28
  self.current_doc_text = ""
29
  self.doc_type = None
30
-
31
  def clear(self):
32
  self.current_doc_images = []
33
  self.current_doc_text = ""
34
  self.doc_type = None
35
-
36
  doc_state = DocumentState()
37
 
38
  def process_pdf_file(file_path):
@@ -41,7 +43,6 @@ def process_pdf_file(file_path):
41
  doc = fitz.open(file_path)
42
  images = []
43
  text = ""
44
-
45
  for page_num in range(doc.page_count):
46
  try:
47
  page = doc[page_num]
@@ -49,6 +50,7 @@ def process_pdf_file(file_path):
49
  if page_text.strip():
50
  text += f"Page {page_num + 1}:\n{page_text}\n\n"
51
 
 
52
  zoom = 3
53
  mat = fitz.Matrix(zoom, zoom)
54
  pix = page.get_pixmap(matrix=mat, alpha=False)
@@ -56,6 +58,7 @@ def process_pdf_file(file_path):
56
  img = Image.open(io.BytesIO(img_data))
57
  img = img.convert("RGB")
58
 
 
59
  max_size = 1600
60
  if max(img.size) > max_size:
61
  ratio = max_size / max(img.size)
@@ -63,18 +66,13 @@ def process_pdf_file(file_path):
63
  img = img.resize(new_size, Image.Resampling.LANCZOS)
64
 
65
  images.append(img)
66
-
67
  except Exception as e:
68
  logger.error(f"Error processing page {page_num}: {str(e)}")
69
  continue
70
-
71
  doc.close()
72
-
73
  if not images:
74
  raise ValueError("No valid images could be extracted from the PDF")
75
-
76
  return images, text
77
-
78
  except Exception as e:
79
  logger.error(f"Error processing PDF file: {str(e)}")
80
  raise
@@ -83,7 +81,6 @@ def process_uploaded_file(file):
83
  """Process uploaded file and update document state."""
84
  try:
85
  doc_state.clear()
86
-
87
  if file is None:
88
  return "No file uploaded. Please upload a file."
89
 
@@ -92,11 +89,7 @@ def process_uploaded_file(file):
92
  file_path = file["name"]
93
  else:
94
  file_path = file.name
95
-
96
- # Get file extension
97
  file_ext = file_path.lower().split('.')[-1]
98
-
99
- # Define allowed extensions
100
  image_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}
101
 
102
  if file_ext == 'pdf':
@@ -122,11 +115,18 @@ def process_uploaded_file(file):
122
  else:
123
  return f"Unsupported file type: {file_ext}. Please upload a PDF or image file (PNG, JPG, JPEG, GIF, BMP, WEBP)."
124
  except Exception as e:
125
- logger.error(f"Error in process_file: {str(e)}")
126
  return "An error occurred while processing the file. Please try again."
127
 
128
- @spaces.GPU()
 
 
129
  def bot_streaming(prompt_option, max_new_tokens=4096):
 
 
 
 
 
130
  try:
131
  # Define predetermined prompts
132
  prompts = {
@@ -173,10 +173,9 @@ Noc representative's date approval_date
173
 
174
  Noc representative status as approval_status
175
 
 
176
 
177
-
178
-
179
- The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted."""
180
  ),
181
  "NOC Basic": (
182
  "Based on the provided timesheet details, extract the following information:\n"
@@ -187,474 +186,332 @@ The output should be formatted as a JSON instance that conforms to the JSON sche
187
  " - NOC ID\n"
188
  " - Month and year (in MM/YYYY format)"
189
  ),
190
- "NOC Structured test": (
191
- "You are an advanced data extraction assistant. Your task is to parse structured input text and extract key data points into clearly defined categories. Focus only on the requested details, ensuring accuracy and proper grouping. Below is the format for extracting the data:\n\n"
192
- "---\n"
193
- "Project Information\n\n"
194
- "Project Name:\n\n"
195
- "Project and Package:\n\n"
196
- "RPO Number:\n\n"
197
- "PMC Name:\n\n"
198
- "Project Location:\n\n"
199
- "Year:\n\n"
200
- "Month:\n\n"
201
- "Timesheet Details\n\n"
202
- "Week X (Date)\n\n"
203
- "Holidays:\n\n"
204
- "Regular Hours:\n\n"
205
- "Overtime Hours:\n\n"
206
- "Total Hours:\n\n"
207
- "Comments:\n\n"
208
- "Additional Data\n\n"
209
- "Reviewed By:\n\n"
210
- "Date of Review:\n\n"
211
- "Position:\n\n"
212
- "Supervisor Business:\n\n"
213
- "Date of Approval:\n\n"
214
- "---\n\n"
215
- "Ensure the extracted data strictly follows the format above and is organized by category. Ignore unrelated text. Respond only with the formatted output."
216
- ),
217
- "Aramco Full structured": (
218
  """You are a document parsing assistant designed to extract structured data from various document types, including invoices, timesheets, purchase orders, and travel bookings. Your goal is to return highly accurate, properly formatted JSON for each document type.
219
- General Rules:
220
- 1. Always return ONLY valid JSON—no explanations, comments, or additional text.
221
- 2. Use null for any fields that are not present or cannot be extracted.
222
- 3. Ensure all JSON keys are enclosed in double quotes and properly formatted.
223
- 4. Validate financial, time tracking, and contract details carefully before output.
224
-
225
- Extraction Instructions:
226
-
227
- 1. Invoice:
228
- - Parse and extract financial and invoice-specific details.
229
- - JSON structure:
230
- ```json
231
- {
232
- "invoice": {
233
- "date": null,
234
- "dueDate": null,
235
- "accountNumber": null,
236
- "invoiceNumber": null,
237
- "customerContact": null,
238
- "kintecContact": null,
239
- "accountsContact": null,
240
- "periodEnd": null,
241
- "contractNo": null,
242
- "specialistsName": null,
243
- "rpoNumber": null,
244
- "assignmentProject": null,
245
- "workLocation": null,
246
- "expenses": null,
247
- "regularHours": null,
248
- "overtime": null,
249
- "mobilisationAllowance": null,
250
- "dailyHousing": null,
251
- "opPipTechnical": null,
252
- "code": null,
253
- "vatBasis": null,
254
- "vatRate": null,
255
- "vatAmount": null,
256
- "totalExclVat": null,
257
- "totalInclVat": null
258
- }
259
- }
260
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- 2. Timesheet:
263
- - Extract time tracking, work details, and approvals.
264
- - JSON structure:
265
- ```json
266
- {
267
- "timesheet": {
268
- "Year": null,
269
- "RPO_Number": null,
270
- "PMC_Name": null,
271
- "Project_Location": null,
272
- "Project_and_Package": null,
273
- "Month": null,
274
- "Timesheet_Details": [
275
- {
276
- "Week": null,
277
- "Regular_Hours": null,
278
- "Overtime_Hours": null,
279
- "Total_Hours": null,
280
- "Comments": null
281
- },
282
- {
283
- "Week": null,
284
- "Regular_Hours": null,
285
- "Overtime_Hours": null,
286
- "Total_Hours": null,
287
- "Comments": null
288
- }
289
- ],
290
- "Monthly_Totals": {
291
- "Regular_Hours": null,
292
- "Overtime_Hours": null,
293
- "Total_Hours": null
294
- },
295
- "reviewedBy": {
296
- "name": null,
297
- "position": null,
298
- "date": null
299
- },
300
- "approvedBy": {
301
- "name": null,
302
- "position": null,
303
- "date": null
304
- }
305
- }
306
- }
307
- ```
308
 
309
- 3. Purchase Order:
310
- - Extract contract and pricing details with precision.
311
- - JSON structure:
312
- ```json
313
- {
314
- "purchaseOrder": {
315
- "contractNo": null,
316
- "relPoNo": null,
317
- "version": null,
318
- "title": null,
319
- "startDate": null,
320
- "endDate": null,
321
- "costCenter": null,
322
- "purchasingGroup": null,
323
- "contractor": null,
324
- "location": null,
325
- "workDescription": null,
326
- "pricing": {
327
- "regularRate": null,
328
- "overtimeRate": null,
329
- "totalBudget": null
330
- }
331
- }
332
- }
333
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
- 4. Travel Booking:
336
- - Parse travel-specific and employee information.
337
- - JSON structure:
338
- ```json
339
- {
340
- "travelBooking": {
341
- "requestId": null,
342
- "approvalStatus": null,
343
- "employee": {
344
- "name": null,
345
- "id": null,
346
- "email": null,
347
- "firstName": null,
348
- "lastName": null,
349
- "gradeCodeGroup": null
350
- },
351
- "defaultManager": {
352
- "name": null,
353
- "email": null
354
- },
355
- "sender": {
356
- "name": null,
357
- "email": null
358
- },
359
- "travel": {
360
- "startDate": null,
361
- "endDate": null,
362
- "requestPolicy": null,
363
- "requestType": null,
364
- "employeeType": null,
365
- "travelActivity": null,
366
- "tripType": null
367
- },
368
- "cost": {
369
- "companyCode": null,
370
- "costObject": null,
371
- "costObjectId": null
372
- },
373
- "transport": {
374
- "type": null,
375
- "comments": null
376
- },
377
- "changeRequired": null,
378
- "comments": null
379
- }
380
- }
381
- ```
382
-
383
- Use these structures for parsing documents and ensure compliance with the rules and instructions provided for each type.
384
- """
385
  ),
386
- "Aramco Timesheet only": (
387
- """ Extract time tracking, work details, and approvals.
388
- - JSON structure:
389
- ```json
390
- {
391
- "timesheet": {
392
- "Year": null,
393
- "RPO_Number": null,
394
- "PMC_Name": null,
395
- "Project_Location": null,
396
- "Project_and_Package": null,
397
- "Month": null,
398
- "Timesheet_Details": [
399
- {
400
- "Week": null,
401
- "Regular_Hours": null,
402
- "Overtime_Hours": null,
403
- "Total_Hours": null,
404
- "Comments": null
405
- },
406
- {
407
- "Week": null,
408
- "Regular_Hours": null,
409
- "Overtime_Hours": null,
410
- "Total_Hours": null,
411
- "Comments": null
412
- }
413
- ],
414
- "Monthly_Totals": {
415
- "Regular_Hours": null,
416
- "Overtime_Hours": null,
417
- "Total_Hours": null
418
- },
419
- "reviewedBy": {
420
- "name": null,
421
- "position": null,
422
- "date": null
423
- },
424
- "approvedBy": {
425
- "name": null,
426
- "position": null,
427
- "date": null
428
- }
429
- }
430
- }
431
- ```"""
432
  ),
433
- "Aramco test": (
434
- """You are a high-performance document parsing assistant, optimized for speed and accuracy. Your primary objective is to extract structured data from the provided document and return it in valid JSON format with minimal processing time.
435
-
436
- Guidelines for Speed Optimization:
437
- 1. Process the document with minimal computation and only extract the required fields.
438
- 2. Use null for any fields that are missing or not clearly identifiable.
439
- 3. Avoid redundant checks or deep parsing; rely on the most straightforward extraction methods.
440
- 4. Always return ONLY valid JSON—no additional text, explanations, or formatting errors.
441
- 5. Focus on precision for key-value pairs; skip over ambiguous or irrelevant information.
442
-
443
- Document-Specific JSON Structures:
444
-
445
- 1. **Invoice**:
446
- - Extract financial and customer details efficiently.
447
- - JSON format:
448
- ```json
449
- {
450
- "invoice": {
451
- "date": null,
452
- "dueDate": null,
453
- "accountNumber": null,
454
- "invoiceNumber": null,
455
- "customerContact": null,
456
- "kintecContact": null,
457
- "accountsContact": null,
458
- "periodEnd": null,
459
- "contractNo": null,
460
- "specialistsName": null,
461
- "rpoNumber": null,
462
- "assignmentProject": null,
463
- "workLocation": null,
464
- "expenses": null,
465
- "regularHours": null,
466
- "overtime": null,
467
- "mobilisationAllowance": null,
468
- "dailyHousing": null,
469
- "opPipTechnical": null,
470
- "code": null,
471
- "vatBasis": null,
472
- "vatRate": null,
473
- "vatAmount": null,
474
- "totalExclVat": null,
475
- "totalInclVat": null
476
- }
477
- }
478
- ```
479
-
480
- 2. **Timesheet**:
481
- - Extract time tracking and approval data swiftly.
482
- - JSON format:
483
- ```json
484
- {
485
- "timesheet": {
486
- "Year": null,
487
- "RPO_Number": null,
488
- "PMC_Name": null,
489
- "Project_Location": null,
490
- "Project_and_Package": null,
491
- "Month": null,
492
- "Timesheet_Details": [
493
- {
494
- "Week": null,
495
- "Regular_Hours": null,
496
- "Overtime_Hours": null,
497
- "Total_Hours": null,
498
- "Comments": null
499
- },
500
- {
501
- "Week": null,
502
- "Regular_Hours": null,
503
- "Overtime_Hours": null,
504
- "Total_Hours": null,
505
- "Comments": null
506
- }
507
- ],
508
- "Monthly_Totals": {
509
- "Regular_Hours": null,
510
- "Overtime_Hours": null,
511
- "Total_Hours": null
512
- },
513
- "reviewedBy": {
514
- "name": null,
515
- "position": null,
516
- "date": null
517
- },
518
- "approvedBy": {
519
- "name": null,
520
- "position": null,
521
- "date": null
522
- }
523
- }
524
- }
525
- ```
526
-
527
- 3. **Purchase Order**:
528
- - Extract contract and pricing details with minimal overhead.
529
- - JSON format:
530
- ```json
531
- {
532
- "purchaseOrder": {
533
- "contractNo": null,
534
- "relPoNo": null,
535
- "version": null,
536
- "title": null,
537
- "startDate": null,
538
- "endDate": null,
539
- "costCenter": null,
540
- "purchasingGroup": null,
541
- "contractor": null,
542
- "location": null,
543
- "workDescription": null,
544
- "pricing": {
545
- "regularRate": null,
546
- "overtimeRate": null,
547
- "totalBudget": null
548
- }
549
- }
550
- }
551
- ```
552
-
553
- 4. **Travel Booking**:
554
- - Extract essential travel and employee data efficiently.
555
- - JSON format:
556
- ```json
557
- {
558
- "travelBooking": {
559
- "requestId": null,
560
- "approvalStatus": null,
561
- "employee": {
562
- "name": null,
563
- "id": null,
564
- "email": null,
565
- "firstName": null,
566
- "lastName": null,
567
- "gradeCodeGroup": null
568
- },
569
- "defaultManager": {
570
- "name": null,
571
- "email": null
572
- },
573
- "sender": {
574
- "name": null,
575
- "email": null
576
- },
577
- "travel": {
578
- "startDate": null,
579
- "endDate": null,
580
- "requestPolicy": null,
581
- "requestType": null,
582
- "employeeType": null,
583
- "travelActivity": null,
584
- "tripType": null
585
- },
586
- "cost": {
587
- "companyCode": null,
588
- "costObject": null,
589
- "costObjectId": null
590
- },
591
- "transport": {
592
- "type": null,
593
- "comments": null
594
- },
595
- "changeRequired": null,
596
- "comments": null
597
- }
598
- }
599
- ```
600
-
601
- Ensure your parsing method balances accuracy and speed, prioritizing quick turnaround without compromising JSON validity or structural integrity.
602
- """
603
  )
604
  }
605
 
606
- # Get the selected prompt
607
  selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
 
 
 
 
 
608
 
609
- messages = []
 
610
 
611
- # Include document context
612
  if doc_state.current_doc_images:
613
- context = f"\nDocument context:\n{doc_state.current_doc_text}" if doc_state.current_doc_text else ""
614
- current_msg = f"{selected_prompt}{context}"
615
- messages.append({"role": "user", "content": [{"type": "text", "text": current_msg}, {"type": "image"}]})
616
- else:
617
- messages.append({"role": "user", "content": [{"type": "text", "text": selected_prompt}]})
618
-
619
- # Process inputs
620
- texts = processor.apply_chat_template(messages, add_generation_prompt=True)
621
 
622
- try:
623
- if doc_state.current_doc_images:
624
- inputs = processor(
625
- text=texts,
626
- images=doc_state.current_doc_images[0:1],
627
- return_tensors="pt"
628
- ).to("cuda")
629
- else:
630
- inputs = processor(text=texts, return_tensors="pt").to("cuda")
631
-
632
- streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
633
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
634
-
635
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
636
- thread.start()
637
-
638
- buffer = ""
639
- for new_text in streamer:
640
- buffer += new_text
641
  time.sleep(0.01)
642
  yield buffer
643
 
644
- except Exception as e:
645
- logger.error(f"Error in model processing: {str(e)}")
646
- yield "An error occurred while processing your request. Please try again."
647
-
648
  except Exception as e:
649
  logger.error(f"Error in bot_streaming: {str(e)}")
650
- yield "An error occurred. Please try again."
651
 
652
  def clear_context():
653
  """Clear the current document context."""
654
  doc_state.clear()
655
  return "Document context cleared. You can upload a new document."
656
 
657
- # Create the Gradio interface
 
 
658
  with gr.Blocks() as demo:
659
  gr.Markdown("# Document Analyzer with Predetermined Prompts")
660
  gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP) and select a prompt to analyze its contents.")
@@ -675,12 +532,11 @@ with gr.Blocks() as demo:
675
  choices=[
676
  "NOC Timesheet",
677
  "NOC Basic",
678
- "NOC Structured test",
679
  "Aramco Full structured",
680
  "Aramco Timesheet only",
681
- "Aramco test"
682
  ],
683
- value="Options"
684
  )
685
  generate_btn = gr.Button("Generate")
686
 
@@ -709,4 +565,4 @@ with gr.Blocks() as demo:
709
  )
710
 
711
  # Launch the interface
712
- demo.launch(debug=True)
 
1
+ import openai
2
+ import base64
 
 
 
 
 
 
 
 
 
3
  import io
4
+ import time
5
  import logging
6
+ import fitz # PyMuPDF
7
+ from PIL import Image
8
+ import gradio as gr
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
+ import os
15
+ OPENROUTER_API_KEY = os.getenv("OPENAI_TOKEN")
16
+ if not OPENROUTER_API_KEY:
17
+ raise ValueError("OPENROUTER_API_KEY environment variable not set")
18
+ openai.api_key = OPENROUTER_API_KEY
19
+
20
+ # Configure the OpenAI API to use OpenRouter
21
+ openai.api_base = "https://openrouter.ai/api/v1"
22
+ openai.api_key = OPENROUTER_API_KEY
23
 
24
+ # -------------------------------
25
+ # Document State and File Processing
26
+ # -------------------------------
27
  class DocumentState:
28
  def __init__(self):
29
  self.current_doc_images = []
30
  self.current_doc_text = ""
31
  self.doc_type = None
32
+
33
  def clear(self):
34
  self.current_doc_images = []
35
  self.current_doc_text = ""
36
  self.doc_type = None
37
+
38
  doc_state = DocumentState()
39
 
40
  def process_pdf_file(file_path):
 
43
  doc = fitz.open(file_path)
44
  images = []
45
  text = ""
 
46
  for page_num in range(doc.page_count):
47
  try:
48
  page = doc[page_num]
 
50
  if page_text.strip():
51
  text += f"Page {page_num + 1}:\n{page_text}\n\n"
52
 
53
+ # Render page to an image
54
  zoom = 3
55
  mat = fitz.Matrix(zoom, zoom)
56
  pix = page.get_pixmap(matrix=mat, alpha=False)
 
58
  img = Image.open(io.BytesIO(img_data))
59
  img = img.convert("RGB")
60
 
61
+ # Resize if image is too large
62
  max_size = 1600
63
  if max(img.size) > max_size:
64
  ratio = max_size / max(img.size)
 
66
  img = img.resize(new_size, Image.Resampling.LANCZOS)
67
 
68
  images.append(img)
 
69
  except Exception as e:
70
  logger.error(f"Error processing page {page_num}: {str(e)}")
71
  continue
 
72
  doc.close()
 
73
  if not images:
74
  raise ValueError("No valid images could be extracted from the PDF")
 
75
  return images, text
 
76
  except Exception as e:
77
  logger.error(f"Error processing PDF file: {str(e)}")
78
  raise
 
81
  """Process uploaded file and update document state."""
82
  try:
83
  doc_state.clear()
 
84
  if file is None:
85
  return "No file uploaded. Please upload a file."
86
 
 
89
  file_path = file["name"]
90
  else:
91
  file_path = file.name
 
 
92
  file_ext = file_path.lower().split('.')[-1]
 
 
93
  image_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}
94
 
95
  if file_ext == 'pdf':
 
115
  else:
116
  return f"Unsupported file type: {file_ext}. Please upload a PDF or image file (PNG, JPG, JPEG, GIF, BMP, WEBP)."
117
  except Exception as e:
118
+ logger.error(f"Error in process_uploaded_file: {str(e)}")
119
  return "An error occurred while processing the file. Please try again."
120
 
121
+ # -------------------------------
122
+ # Bot Streaming Function Using OpenAI API
123
+ # -------------------------------
124
  def bot_streaming(prompt_option, max_new_tokens=4096):
125
+ """
126
+ Generate a response using the OpenAI API.
127
+
128
+ If an image is available, it is encoded in base64 and appended to the prompt.
129
+ """
130
  try:
131
  # Define predetermined prompts
132
  prompts = {
 
173
 
174
  Noc representative status as approval_status
175
 
176
+ The output should be formatted as a JSON instance that conforms to the JSON schema below.
177
 
178
+ As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]} the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted."""
 
 
179
  ),
180
  "NOC Basic": (
181
  "Based on the provided timesheet details, extract the following information:\n"
 
186
  " - NOC ID\n"
187
  " - Month and year (in MM/YYYY format)"
188
  ),
189
+ "Aramco Full structured": (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  """You are a document parsing assistant designed to extract structured data from various document types, including invoices, timesheets, purchase orders, and travel bookings. Your goal is to return highly accurate, properly formatted JSON for each document type.
191
+ General Rules:
192
+ 1. Always return ONLY valid JSON—no explanations, comments, or additional text.
193
+ 2. Use null for any fields that are not present or cannot be extracted.
194
+ 3. Ensure all JSON keys are enclosed in double quotes and properly formatted.
195
+ 4. Validate financial, time tracking, and contract details carefully before output.
196
+
197
+ Extraction Instructions:
198
+
199
+ 1. Invoice:
200
+ - Parse and extract financial and invoice-specific details.
201
+ - JSON structure:
202
+ ```json
203
+ {
204
+ "invoice": {
205
+ "date": null,
206
+ "dueDate": null,
207
+ "accountNumber": null,
208
+ "invoiceNumber": null,
209
+ "customerContact": null,
210
+ "kintecContact": null,
211
+ "accountsContact": null,
212
+ "periodEnd": null,
213
+ "contractNo": null,
214
+ "specialistsName": null,
215
+ "rpoNumber": null,
216
+ "assignmentProject": null,
217
+ "workLocation": null,
218
+ "expenses": null,
219
+ "regularHours": null,
220
+ "overtime": null,
221
+ "mobilisationAllowance": null,
222
+ "dailyHousing": null,
223
+ "opPipTechnical": null,
224
+ "code": null,
225
+ "vatBasis": null,
226
+ "vatRate": null,
227
+ "vatAmount": null,
228
+ "totalExclVat": null,
229
+ "totalInclVat": null
230
+ }
231
+ }
232
+ ```
233
+
234
+ 2. Timesheet:
235
+ - Extract time tracking, work details, and approvals.
236
+ - JSON structure:
237
+ ```json
238
+ {
239
+ "timesheet": {
240
+ "Year": null,
241
+ "RPO_Number": null,
242
+ "PMC_Name": null,
243
+ "Project_Location": null,
244
+ "Project_and_Package": null,
245
+ "Month": null,
246
+ "Timesheet_Details": [
247
+ {
248
+ "Week": null,
249
+ "Regular_Hours": null,
250
+ "Overtime_Hours": null,
251
+ "Total_Hours": null,
252
+ "Comments": null
253
+ },
254
+ {
255
+ "Week": null,
256
+ "Regular_Hours": null,
257
+ "Overtime_Hours": null,
258
+ "Total_Hours": null,
259
+ "Comments": null
260
+ }
261
+ ],
262
+ "Monthly_Totals": {
263
+ "Regular_Hours": null,
264
+ "Overtime_Hours": null,
265
+ "Total_Hours": null
266
+ },
267
+ "reviewedBy": {
268
+ "name": null,
269
+ "position": null,
270
+ "date": null
271
+ },
272
+ "approvedBy": {
273
+ "name": null,
274
+ "position": null,
275
+ "date": null
276
+ }
277
+ }
278
+ }
279
+ ```
280
 
281
+ 3. Purchase Order:
282
+ - Extract contract and pricing details with precision.
283
+ - JSON structure:
284
+ ```json
285
+ {
286
+ "purchaseOrder": {
287
+ "contractNo": null,
288
+ "relPoNo": null,
289
+ "version": null,
290
+ "title": null,
291
+ "startDate": null,
292
+ "endDate": null,
293
+ "costCenter": null,
294
+ "purchasingGroup": null,
295
+ "contractor": null,
296
+ "location": null,
297
+ "workDescription": null,
298
+ "pricing": {
299
+ "regularRate": null,
300
+ "overtimeRate": null,
301
+ "totalBudget": null
302
+ }
303
+ }
304
+ }
305
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ 4. Travel Booking:
308
+ - Parse travel-specific and employee information.
309
+ - JSON structure:
310
+ ```json
311
+ {
312
+ "travelBooking": {
313
+ "requestId": null,
314
+ "approvalStatus": null,
315
+ "employee": {
316
+ "name": null,
317
+ "id": null,
318
+ "email": null,
319
+ "firstName": null,
320
+ "lastName": null,
321
+ "gradeCodeGroup": null
322
+ },
323
+ "defaultManager": {
324
+ "name": null,
325
+ "email": null
326
+ },
327
+ "sender": {
328
+ "name": null,
329
+ "email": null
330
+ },
331
+ "travel": {
332
+ "startDate": null,
333
+ "endDate": null,
334
+ "requestPolicy": null,
335
+ "requestType": null,
336
+ "employeeType": null,
337
+ "travelActivity": null,
338
+ "tripType": null
339
+ },
340
+ "cost": {
341
+ "companyCode": null,
342
+ "costObject": null,
343
+ "costObjectId": null
344
+ },
345
+ "transport": {
346
+ "type": null,
347
+ "comments": null
348
+ },
349
+ "changeRequired": null,
350
+ "comments": null
351
+ }
352
+ }
353
+ ```
354
 
355
+ Use these structures for parsing documents and ensure compliance with the rules and instructions provided for each type."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  ),
357
+ "Aramco Timesheet only": (
358
+ """Extract time tracking, work details, and approvals.
359
+ - JSON structure:
360
+ ```json
361
+ {
362
+ "timesheet": {
363
+ "Year": null,
364
+ "RPO_Number": null,
365
+ "PMC_Name": null,
366
+ "Project_Location": null,
367
+ "Project_and_Package": null,
368
+ "Month": null,
369
+ "Timesheet_Details": [
370
+ {
371
+ "Week": null,
372
+ "Regular_Hours": null,
373
+ "Overtime_Hours": null,
374
+ "Total_Hours": null,
375
+ "Comments": null
376
+ },
377
+ {
378
+ "Week": null,
379
+ "Regular_Hours": null,
380
+ "Overtime_Hours": null,
381
+ "Total_Hours": null,
382
+ "Comments": null
383
+ }
384
+ ],
385
+ "Monthly_Totals": {
386
+ "Regular_Hours": null,
387
+ "Overtime_Hours": null,
388
+ "Total_Hours": null
389
+ },
390
+ "reviewedBy": {
391
+ "name": null,
392
+ "position": null,
393
+ "date": null
394
+ },
395
+ "approvedBy": {
396
+ "name": null,
397
+ "position": null,
398
+ "date": null
399
+ }
400
+ }
401
+ }
402
+ ```"""
403
  ),
404
+ "NOC Invoice": (
405
+ """You are a highly accurate data extraction system. Your task is to analyze the provided image of an invoice and extract all data, paying close attention to the structure and formatting of the document. Organize the extracted data in a clear, structured format, such as JSON. Do not invent any information. If a field cannot be read with high confidence, indicate that with "UNCLEAR" or a similar designation. Be as specific as possible, and do not summarize or combine fields unless explicitly indicated.
406
+
407
+ Here's the expected output format, in JSON, with all required fields:
408
+
409
+ ```json
410
+ {
411
+ "invoiceDetails": {
412
+ "pleaseQuote": "string",
413
+ "invoiceNumber": "string",
414
+ "workPeriod": "string",
415
+ "invoiceDate": "string",
416
+ "assignmentReference": "string"
417
+ },
418
+ "from": {
419
+ "companyName": "string",
420
+ "addressLine1": "string",
421
+ "addressLine2": "string",
422
+ "city": "string",
423
+ "postalCode": "string",
424
+ "country": "string"
425
+ },
426
+ "to": {
427
+ "companyName": "string",
428
+ "office": "string",
429
+ "floor": "string",
430
+ "building": "string",
431
+ "addressLine1": "string",
432
+ "poBox": "string",
433
+ "city": "string"
434
+ },
435
+ "services": [
436
+ {
437
+ "serviceDetails": "string",
438
+ "fromDate": "string",
439
+ "toDate": "string",
440
+ "currency": "string",
441
+ "fx": "string",
442
+ "noOfDays": "number or string (if range)",
443
+ "rate": "number",
444
+ "total": "number"
445
+ }
446
+ ],
447
+ "totals": {
448
+ "subTotal": "number",
449
+ "tax": "number",
450
+ "totalDue": "number"
451
+ },
452
+ "bankDetails": {
453
+ "bankName": "string",
454
+ "descriptionReferenceField": "string",
455
+ "bankAddress": "string",
456
+ "swiftBicCode": "string",
457
+ "ibanNumber": "string",
458
+ "accountNumber": "string",
459
+ "beneficiaryName": "string",
460
+ "accountCurrency": "string",
461
+ "expectedAmount": "string"
462
+ }
463
+ }
464
+ ```"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  )
466
  }
467
 
468
+ # Retrieve the selected prompt
469
  selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
470
+ context = ""
471
+ if doc_state.current_doc_images:
472
+ if doc_state.current_doc_text:
473
+ context = f"\nDocument context:\n{doc_state.current_doc_text}"
474
+ full_prompt = selected_prompt + context
475
 
476
+ # Create the messages list for the API call
477
+ messages = [{"role": "user", "content": full_prompt}]
478
 
479
+ # If an image is available, encode it in base64 and append to the prompt
480
  if doc_state.current_doc_images:
481
+ buffered = io.BytesIO()
482
+ doc_state.current_doc_images[0].save(buffered, format="PNG")
483
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
484
+ messages[0]["content"] += f"\n[Image Data: {img_str}]"
 
 
 
 
485
 
486
+ # Call the OpenAI API with streaming enabled.
487
+ response = openai.ChatCompletion.create(
488
+ model="qwen/qwen2.5-vl-72b-instruct:free",
489
+ messages=messages,
490
+ max_tokens=max_new_tokens,
491
+ stream=True,
492
+ )
493
+
494
+ buffer = ""
495
+ for chunk in response:
496
+ if 'choices' in chunk:
497
+ delta = chunk['choices'][0].get('delta', {})
498
+ content = delta.get('content', '')
499
+ buffer += content
 
 
 
 
 
500
  time.sleep(0.01)
501
  yield buffer
502
 
 
 
 
 
503
  except Exception as e:
504
  logger.error(f"Error in bot_streaming: {str(e)}")
505
+ yield "An error occurred while processing your request. Please try again."
506
 
507
  def clear_context():
508
  """Clear the current document context."""
509
  doc_state.clear()
510
  return "Document context cleared. You can upload a new document."
511
 
512
+ # -------------------------------
513
+ # Create the Gradio Interface
514
+ # -------------------------------
515
  with gr.Blocks() as demo:
516
  gr.Markdown("# Document Analyzer with Predetermined Prompts")
517
  gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP) and select a prompt to analyze its contents.")
 
532
  choices=[
533
  "NOC Timesheet",
534
  "NOC Basic",
 
535
  "Aramco Full structured",
536
  "Aramco Timesheet only",
537
+ "NOC Invoice"
538
  ],
539
+ value="NOC Timesheet"
540
  )
541
  generate_btn = gr.Button("Generate")
542
 
 
565
  )
566
 
567
  # Launch the interface
568
+ demo.launch(debug=True)