ManTea commited on
Commit
0e5b8f8
·
1 Parent(s): 58605d4

QA to PROD

Browse files
app/api/pdf_routes.py CHANGED
@@ -98,23 +98,47 @@ async def send_progress_update(user_id, file_id, step, progress=0.0, message="")
98
 
99
  # Function with fixed indentation for the troublesome parts
100
  async def handle_pdf_processing_result(result, correlation_id, user_id, file_id, filename, document, vector_status,
101
- vector_database_id, temp_file_path, db, is_pdf, mock_mode):
102
- """Fixed version of the code with proper indentation"""
103
- # If successful, update status but don't try to permanently store files
104
  if result.get('success'):
105
  try:
106
- log_upload_debug(correlation_id, f"Processed file successfully - no permanent storage in Hugging Face environment")
 
 
107
  except Exception as move_error:
108
- log_upload_debug(correlation_id, f"Error in storage handling: {move_error}", move_error)
109
 
110
  # Update status in PostgreSQL
111
  if vector_database_id and document and vector_status:
112
  try:
113
  log_upload_debug(correlation_id, f"Updating vector status to 'completed' for document ID {document.id}")
 
 
 
 
114
  vector_status.status = "completed"
115
  vector_status.embedded_at = datetime.now()
116
- vector_status.vector_id = file_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  document.is_embedded = True
 
118
  db.commit()
119
  log_upload_debug(correlation_id, f"Database status updated successfully")
120
  except Exception as db_error:
@@ -136,9 +160,6 @@ async def handle_pdf_processing_result(result, correlation_id, user_id, file_id,
136
  # Add document information to the result
137
  if document:
138
  result["document_database_id"] = document.id
139
-
140
- # Include mock_mode in response
141
- result["mock_mode"] = mock_mode
142
  else:
143
  log_upload_debug(correlation_id, f"PDF processing failed: {result.get('error', 'Unknown error')}")
144
 
@@ -167,7 +188,7 @@ async def handle_pdf_processing_result(result, correlation_id, user_id, file_id,
167
  log_upload_debug(correlation_id, f"Error sending WebSocket notification: {ws_error}", ws_error)
168
 
169
  # Cleanup: delete temporary file if it still exists
170
- if os.path.exists(temp_file_path):
171
  try:
172
  os.remove(temp_file_path)
173
  log_upload_debug(correlation_id, f"Removed temporary file {temp_file_path}")
@@ -189,7 +210,6 @@ async def upload_pdf(
189
  vector_database_id: Optional[int] = Form(None),
190
  content_type: Optional[str] = Form(None), # Add content_type parameter
191
  background_tasks: BackgroundTasks = None,
192
- mock_mode: bool = Form(False), # Set to False to use real database
193
  db: Session = Depends(get_db)
194
  ):
195
  """
@@ -203,12 +223,18 @@ async def upload_pdf(
203
  - **user_id**: User ID for WebSocket status updates
204
  - **vector_database_id**: ID of vector database in PostgreSQL (optional)
205
  - **content_type**: Content type of the file (optional)
206
- - **mock_mode**: Simulate Pinecone operations instead of performing real calls (default: false)
 
207
  """
208
  # Generate request ID for tracking
209
  correlation_id = str(uuid.uuid4())[:8]
210
  logger.info(f"[{correlation_id}] PDF upload request received: ns={namespace}, index={index_name}, user={user_id}")
211
- log_upload_debug(correlation_id, f"Upload request: vector_db_id={vector_database_id}, mock_mode={mock_mode}")
 
 
 
 
 
212
 
213
  try:
214
  # Check file type - accept both PDF and plaintext for testing
@@ -218,13 +244,8 @@ async def upload_pdf(
218
  log_upload_debug(correlation_id, f"File type check: is_pdf={is_pdf}, is_text={is_text}, filename={file.filename}")
219
 
220
  if not (is_pdf or is_text):
221
- if not mock_mode:
222
- # In real mode, only accept PDFs
223
- log_upload_debug(correlation_id, f"Rejecting non-PDF file in real mode: {file.filename}")
224
- raise HTTPException(status_code=400, detail="Only PDF files are accepted")
225
- else:
226
- # In mock mode, convert any file to text for testing
227
- logger.warning(f"[{correlation_id}] Non-PDF file uploaded in mock mode: {file.filename} - will treat as text")
228
 
229
  # If vector_database_id provided, get info from PostgreSQL
230
  api_key = None
@@ -237,11 +258,10 @@ async def upload_pdf(
237
  VectorDatabase.id == vector_database_id,
238
  VectorDatabase.status == "active"
239
  ).first()
 
240
  if not vector_db:
241
- return PDFResponse(
242
- success=False,
243
- error=f"Vector database with ID {vector_database_id} not found or inactive"
244
- )
245
 
246
  log_upload_debug(correlation_id, f"Found vector database: id={vector_db.id}, name={vector_db.name}, index={vector_db.pinecone_index}")
247
 
@@ -325,13 +345,26 @@ async def upload_pdf(
325
 
326
  metadata["content_type"] = actual_content_type
327
 
328
- if title:
329
- metadata["title"] = title
330
- else:
331
- # Use filename as title if not provided
332
- title = file.filename
333
- metadata["title"] = title
 
 
 
 
334
 
 
 
 
 
 
 
 
 
 
335
  if description:
336
  metadata["description"] = description
337
 
@@ -359,7 +392,7 @@ async def upload_pdf(
359
  # Create document record without file content
360
  try:
361
  document = Document(
362
- name=title or file.filename,
363
  file_type="pdf" if is_pdf else "text",
364
  content_type=actual_content_type, # Use the actual_content_type here
365
  size=len(file_content),
@@ -387,16 +420,17 @@ async def upload_pdf(
387
  log_upload_debug(correlation_id, f"Error creating document content: {content_error}", content_error)
388
  raise
389
 
390
- # Create vector status record
391
  try:
392
  vector_status = VectorStatus(
393
  document_id=document.id,
394
  vector_database_id=vector_database_id,
395
- status="pending"
 
396
  )
397
  db.add(vector_status)
398
  db.commit()
399
- log_upload_debug(correlation_id, f"Created vector status record for document ID {document.id}")
400
  except Exception as status_error:
401
  log_upload_debug(correlation_id, f"Error creating vector status: {status_error}", status_error)
402
  raise
@@ -404,13 +438,12 @@ async def upload_pdf(
404
  logger.info(f"[{correlation_id}] Created document ID {document.id} and vector status in PostgreSQL")
405
 
406
  # Initialize PDF processor with correct parameters
407
- log_upload_debug(correlation_id, f"Initializing PDFProcessor: index={index_name}, vector_db_id={vector_database_id}, mock_mode={mock_mode}")
408
  processor = PDFProcessor(
409
  index_name=index_name,
410
  namespace=namespace,
411
  api_key=api_key,
412
  vector_db_id=vector_database_id,
413
- mock_mode=mock_mode,
414
  correlation_id=correlation_id
415
  )
416
 
@@ -432,7 +465,7 @@ async def upload_pdf(
432
  log_upload_debug(correlation_id, f"Processing PDF with file_path={temp_file_path}, document_id={file_id}")
433
  result = await processor.process_pdf(
434
  file_path=temp_file_path,
435
- document_id=file_id,
436
  metadata=metadata,
437
  progress_callback=send_progress_update if user_id else None
438
  )
@@ -441,53 +474,47 @@ async def upload_pdf(
441
 
442
  # Handle PDF processing result
443
  return await handle_pdf_processing_result(result, correlation_id, user_id, file_id, file.filename, document, vector_status,
444
- vector_database_id, temp_file_path, db, is_pdf, mock_mode)
445
  except Exception as e:
446
- return await handle_upload_error(e, correlation_id, temp_file_path, user_id, file_id, file.filename, vector_database_id, vector_status, db, mock_mode)
447
-
448
- # Error handling for upload_pdf function
449
- async def handle_upload_error(e, correlation_id, temp_file_path, user_id, file_id, filename, vector_database_id, vector_status, db, mock_mode):
450
- """Fixed version of the error handling part with proper indentation"""
451
- log_upload_debug(correlation_id, f"Error in upload_pdf: {str(e)}", e)
452
- logger.exception(f"[{correlation_id}] Error in upload_pdf: {str(e)}")
453
-
454
- # Cleanup on error
455
- if os.path.exists(temp_file_path):
456
- try:
457
- os.remove(temp_file_path)
458
- log_upload_debug(correlation_id, f"Cleaned up temp file after error: {temp_file_path}")
459
- except Exception as cleanup_error:
460
- log_upload_debug(correlation_id, f"Error cleaning up temporary file: {cleanup_error}", cleanup_error)
461
 
462
- # Update error status in PostgreSQL
463
- if vector_database_id and vector_status:
464
- try:
465
- vector_status.status = "failed"
466
- vector_status.error_message = str(e)
467
- db.commit()
468
- log_upload_debug(correlation_id, f"Updated database with error status")
469
- except Exception as db_error:
470
- log_upload_debug(correlation_id, f"Error updating database with error status: {db_error}", db_error)
471
-
472
- # Send failure notification via WebSocket
473
- if user_id and file_id:
474
- try:
475
- await send_pdf_upload_failed(
476
- user_id,
477
- file_id,
478
- filename,
479
- str(e)
480
- )
481
- log_upload_debug(correlation_id, f"Sent failure notification for exception")
482
- except Exception as ws_error:
483
- log_upload_debug(correlation_id, f"Error sending WebSocket notification for failure: {ws_error}", ws_error)
484
-
485
- log_upload_debug(correlation_id, f"Upload request failed with exception: {str(e)}")
486
- return PDFResponse(
487
- success=False,
488
- error=str(e),
489
- mock_mode=mock_mode
490
- )
 
 
 
 
 
 
 
491
 
492
  # Endpoint xóa tài liệu
493
  @router.delete("/namespace", response_model=PDFResponse)
@@ -512,7 +539,6 @@ async def delete_namespace(
512
  # Nếu có vector_database_id, lấy thông tin từ PostgreSQL
513
  api_key = None
514
  vector_db = None
515
- mock_mode = False # Use real mode by default
516
 
517
  if vector_database_id:
518
  vector_db = db.query(VectorDatabase).filter(
@@ -546,13 +572,12 @@ async def delete_namespace(
546
  index_name=index_name,
547
  namespace=namespace,
548
  api_key=api_key,
549
- vector_db_id=vector_database_id,
550
- mock_mode=mock_mode
551
  )
552
  result = await processor.delete_namespace()
553
 
554
- # If in mock mode, also update PostgreSQL to reflect the deletion
555
- if mock_mode and result.get('success') and vector_database_id:
556
  try:
557
  # Update vector statuses for this database
558
  affected_count = db.query(VectorStatus).filter(
@@ -616,7 +641,6 @@ async def get_documents(
616
  # Nếu có vector_database_id, lấy thông tin từ PostgreSQL
617
  api_key = None
618
  vector_db = None
619
- mock_mode = False # Use real mode by default
620
 
621
  if vector_database_id:
622
  vector_db = db.query(VectorDatabase).filter(
@@ -648,8 +672,7 @@ async def get_documents(
648
  index_name=index_name,
649
  namespace=namespace,
650
  api_key=api_key,
651
- vector_db_id=vector_database_id,
652
- mock_mode=mock_mode
653
  )
654
 
655
  # Lấy danh sách documents từ Pinecone
@@ -691,7 +714,7 @@ async def get_documents(
691
  return DocumentsListResponse(
692
  success=False,
693
  error=str(e)
694
- )
695
 
696
  # Health check endpoint for PDF API
697
  @router.get("/health")
@@ -710,25 +733,32 @@ async def delete_document(
710
  index_name: str = "testbot768",
711
  vector_database_id: Optional[int] = None,
712
  user_id: Optional[str] = None,
713
- mock_mode: bool = False,
714
  db: Session = Depends(get_db)
715
  ):
716
  """
717
  Delete vectors for a specific document from the vector database
718
 
719
- - **document_id**: ID of the document to delete
 
 
 
 
720
  - **namespace**: Namespace in the vector database (default: "Default")
721
  - **index_name**: Name of the vector index (default: "testbot768")
722
  - **vector_database_id**: ID of vector database in PostgreSQL (optional)
723
  - **user_id**: User ID for WebSocket status updates (optional)
724
- - **mock_mode**: Simulate vector database operations (default: false)
725
  """
726
- logger.info(f"Delete document request: document_id={document_id}, namespace={namespace}, index={index_name}, vector_db_id={vector_database_id}, mock_mode={mock_mode}")
727
 
728
  try:
729
  # If vector_database_id is provided, get info from PostgreSQL
730
  api_key = None
731
  vector_db = None
 
 
 
 
 
732
 
733
  if vector_database_id:
734
  vector_db = db.query(VectorDatabase).filter(
@@ -754,10 +784,58 @@ async def delete_document(
754
  namespace = f"vdb-{vector_database_id}" if vector_database_id else namespace
755
  logger.info(f"Using namespace '{namespace}' based on vector database ID")
756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
  # Send notification of deletion start via WebSocket if user_id provided
758
  if user_id:
759
  try:
760
- await send_pdf_delete_started(user_id, document_id)
761
  except Exception as ws_error:
762
  logger.error(f"Error sending WebSocket notification: {ws_error}")
763
 
@@ -766,47 +844,98 @@ async def delete_document(
766
  index_name=index_name,
767
  namespace=namespace,
768
  api_key=api_key,
769
- vector_db_id=vector_database_id,
770
- mock_mode=mock_mode
771
  )
772
 
773
- # Delete document vectors
774
- result = await processor.delete_document(document_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775
 
776
  # If successful and vector_database_id is provided, update PostgreSQL records
777
  if result.get('success') and vector_database_id:
778
  try:
779
- # Find document by vector ID if it exists
780
- document = db.query(Document).join(
781
- VectorStatus, Document.id == VectorStatus.document_id
782
- ).filter(
783
- Document.vector_database_id == vector_database_id,
784
- VectorStatus.vector_id == document_id
785
- ).first()
786
-
787
- if document:
788
- # Update vector status
789
- vector_status = db.query(VectorStatus).filter(
790
- VectorStatus.document_id == document.id,
791
- VectorStatus.vector_database_id == vector_database_id
792
- ).first()
793
 
794
- if vector_status:
795
- vector_status.status = "deleted"
796
- db.commit()
797
- result["postgresql_updated"] = True
798
- logger.info(f"Updated vector status for document ID {document.id} to 'deleted'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
  except Exception as db_error:
800
  logger.error(f"Error updating PostgreSQL records: {db_error}")
801
  result["postgresql_error"] = str(db_error)
802
 
 
 
 
 
 
803
  # Send notification of deletion completion via WebSocket if user_id provided
804
  if user_id:
805
  try:
806
  if result.get('success'):
807
- await send_pdf_delete_completed(user_id, document_id)
808
  else:
809
- await send_pdf_delete_failed(user_id, document_id, result.get('error', 'Unknown error'))
810
  except Exception as ws_error:
811
  logger.error(f"Error sending WebSocket notification: {ws_error}")
812
 
@@ -823,8 +952,7 @@ async def delete_document(
823
 
824
  return PDFResponse(
825
  success=False,
826
- error=str(e),
827
- mock_mode=mock_mode
828
  )
829
 
830
 
 
98
 
99
  # Function with fixed indentation for the troublesome parts
100
  async def handle_pdf_processing_result(result, correlation_id, user_id, file_id, filename, document, vector_status,
101
+ vector_database_id, temp_file_path, db, is_pdf):
102
+ """Process the result of PDF processing and update database records"""
103
+ # If successful, move file to permanent storage
104
  if result.get('success'):
105
  try:
106
+ storage_path = os.path.join(STORAGE_DIR, f"{file_id}{'.pdf' if is_pdf else '.txt'}")
107
+ shutil.move(temp_file_path, storage_path)
108
+ log_upload_debug(correlation_id, f"Moved file to storage at {storage_path}")
109
  except Exception as move_error:
110
+ log_upload_debug(correlation_id, f"Error moving file to storage: {move_error}", move_error)
111
 
112
  # Update status in PostgreSQL
113
  if vector_database_id and document and vector_status:
114
  try:
115
  log_upload_debug(correlation_id, f"Updating vector status to 'completed' for document ID {document.id}")
116
+
117
+ # Update the vector status with the result document_id (important for later deletion)
118
+ result_document_id = result.get('document_id')
119
+
120
  vector_status.status = "completed"
121
  vector_status.embedded_at = datetime.now()
122
+
123
+ # Critical: Store the correct vector ID for future deletion
124
+ # This can be either the original file_id or the result_document_id
125
+ if result_document_id and result_document_id != file_id:
126
+ # If Pinecone returned a specific document_id, use that
127
+ vector_status.vector_id = result_document_id
128
+ log_upload_debug(correlation_id, f"Updated vector_id to {result_document_id} (from result)")
129
+ elif file_id:
130
+ # Make sure file_id is stored as the vector_id
131
+ vector_status.vector_id = file_id
132
+ log_upload_debug(correlation_id, f"Updated vector_id to {file_id} (from file_id)")
133
+
134
+ # Also ensure we store some backup identifiers in case the primary one fails
135
+ # Store the document name as a secondary identifier
136
+ vector_status.document_name = document.name
137
+ log_upload_debug(correlation_id, f"Stored document_name '{document.name}' in vector status for backup")
138
+
139
+ # Mark document as embedded
140
  document.is_embedded = True
141
+
142
  db.commit()
143
  log_upload_debug(correlation_id, f"Database status updated successfully")
144
  except Exception as db_error:
 
160
  # Add document information to the result
161
  if document:
162
  result["document_database_id"] = document.id
 
 
 
163
  else:
164
  log_upload_debug(correlation_id, f"PDF processing failed: {result.get('error', 'Unknown error')}")
165
 
 
188
  log_upload_debug(correlation_id, f"Error sending WebSocket notification: {ws_error}", ws_error)
189
 
190
  # Cleanup: delete temporary file if it still exists
191
+ if temp_file_path and os.path.exists(temp_file_path):
192
  try:
193
  os.remove(temp_file_path)
194
  log_upload_debug(correlation_id, f"Removed temporary file {temp_file_path}")
 
210
  vector_database_id: Optional[int] = Form(None),
211
  content_type: Optional[str] = Form(None), # Add content_type parameter
212
  background_tasks: BackgroundTasks = None,
 
213
  db: Session = Depends(get_db)
214
  ):
215
  """
 
223
  - **user_id**: User ID for WebSocket status updates
224
  - **vector_database_id**: ID of vector database in PostgreSQL (optional)
225
  - **content_type**: Content type of the file (optional)
226
+
227
+ Note: Mock mode has been permanently removed and the system always operates in real mode
228
  """
229
  # Generate request ID for tracking
230
  correlation_id = str(uuid.uuid4())[:8]
231
  logger.info(f"[{correlation_id}] PDF upload request received: ns={namespace}, index={index_name}, user={user_id}")
232
+ log_upload_debug(correlation_id, f"Upload request: vector_db_id={vector_database_id}")
233
+
234
+ # Variables that might need cleanup in case of error
235
+ temp_file_path = None
236
+ document = None
237
+ vector_status = None
238
 
239
  try:
240
  # Check file type - accept both PDF and plaintext for testing
 
244
  log_upload_debug(correlation_id, f"File type check: is_pdf={is_pdf}, is_text={is_text}, filename={file.filename}")
245
 
246
  if not (is_pdf or is_text):
247
+ log_upload_debug(correlation_id, f"Rejecting non-PDF file: {file.filename}")
248
+ raise HTTPException(status_code=400, detail="Only PDF files are accepted")
 
 
 
 
 
249
 
250
  # If vector_database_id provided, get info from PostgreSQL
251
  api_key = None
 
258
  VectorDatabase.id == vector_database_id,
259
  VectorDatabase.status == "active"
260
  ).first()
261
+
262
  if not vector_db:
263
+ log_upload_debug(correlation_id, f"Vector database {vector_database_id} not found or inactive")
264
+ raise HTTPException(status_code=404, detail="Vector database not found or inactive")
 
 
265
 
266
  log_upload_debug(correlation_id, f"Found vector database: id={vector_db.id}, name={vector_db.name}, index={vector_db.pinecone_index}")
267
 
 
345
 
346
  metadata["content_type"] = actual_content_type
347
 
348
+ # Use provided title or filename as document name
349
+ document_name = title or file.filename
350
+
351
+ # Verify document name is unique within this vector database
352
+ if vector_database_id:
353
+ # Check if a document with this name already exists in this vector database
354
+ existing_doc = db.query(Document).filter(
355
+ Document.name == document_name,
356
+ Document.vector_database_id == vector_database_id
357
+ ).first()
358
 
359
+ if existing_doc:
360
+ # Make the name unique by appending timestamp
361
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
362
+ base_name, extension = os.path.splitext(document_name)
363
+ document_name = f"{base_name}_{timestamp}{extension}"
364
+ log_upload_debug(correlation_id, f"Document name already exists, using unique name: {document_name}")
365
+
366
+ metadata["title"] = document_name
367
+
368
  if description:
369
  metadata["description"] = description
370
 
 
392
  # Create document record without file content
393
  try:
394
  document = Document(
395
+ name=document_name, # Use the (potentially) modified document name
396
  file_type="pdf" if is_pdf else "text",
397
  content_type=actual_content_type, # Use the actual_content_type here
398
  size=len(file_content),
 
420
  log_upload_debug(correlation_id, f"Error creating document content: {content_error}", content_error)
421
  raise
422
 
423
+ # Create vector status record - store file_id as the vector_id for deletion later
424
  try:
425
  vector_status = VectorStatus(
426
  document_id=document.id,
427
  vector_database_id=vector_database_id,
428
+ status="pending",
429
+ vector_id=file_id # Store the document UUID as vector_id for later deletion
430
  )
431
  db.add(vector_status)
432
  db.commit()
433
+ log_upload_debug(correlation_id, f"Created vector status record for document ID {document.id} with vector_id={file_id}")
434
  except Exception as status_error:
435
  log_upload_debug(correlation_id, f"Error creating vector status: {status_error}", status_error)
436
  raise
 
438
  logger.info(f"[{correlation_id}] Created document ID {document.id} and vector status in PostgreSQL")
439
 
440
  # Initialize PDF processor with correct parameters
441
+ log_upload_debug(correlation_id, f"Initializing PDFProcessor: index={index_name}, vector_db_id={vector_database_id}")
442
  processor = PDFProcessor(
443
  index_name=index_name,
444
  namespace=namespace,
445
  api_key=api_key,
446
  vector_db_id=vector_database_id,
 
447
  correlation_id=correlation_id
448
  )
449
 
 
465
  log_upload_debug(correlation_id, f"Processing PDF with file_path={temp_file_path}, document_id={file_id}")
466
  result = await processor.process_pdf(
467
  file_path=temp_file_path,
468
+ document_id=file_id, # Use UUID as document_id for Pinecone
469
  metadata=metadata,
470
  progress_callback=send_progress_update if user_id else None
471
  )
 
474
 
475
  # Handle PDF processing result
476
  return await handle_pdf_processing_result(result, correlation_id, user_id, file_id, file.filename, document, vector_status,
477
+ vector_database_id, temp_file_path, db, is_pdf)
478
  except Exception as e:
479
+ log_upload_debug(correlation_id, f"Error in upload_pdf: {str(e)}", e)
480
+ logger.exception(f"[{correlation_id}] Error in upload_pdf: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
+ # Cleanup on error
483
+ if os.path.exists(temp_file_path):
484
+ try:
485
+ os.remove(temp_file_path)
486
+ log_upload_debug(correlation_id, f"Cleaned up temp file after error: {temp_file_path}")
487
+ except Exception as cleanup_error:
488
+ log_upload_debug(correlation_id, f"Error cleaning up temporary file: {cleanup_error}", cleanup_error)
489
+
490
+ # Update error status in PostgreSQL
491
+ if vector_database_id and vector_status:
492
+ try:
493
+ vector_status.status = "failed"
494
+ vector_status.error_message = str(e)
495
+ db.commit()
496
+ log_upload_debug(correlation_id, f"Updated database with error status")
497
+ except Exception as db_error:
498
+ log_upload_debug(correlation_id, f"Error updating database with error status: {db_error}", db_error)
499
+
500
+ # Send failure notification via WebSocket
501
+ if user_id and file_id:
502
+ try:
503
+ await send_pdf_upload_failed(
504
+ user_id,
505
+ file_id,
506
+ file.filename,
507
+ str(e)
508
+ )
509
+ log_upload_debug(correlation_id, f"Sent failure notification for exception")
510
+ except Exception as ws_error:
511
+ log_upload_debug(correlation_id, f"Error sending WebSocket notification for failure: {ws_error}", ws_error)
512
+
513
+ log_upload_debug(correlation_id, f"Upload request failed with exception: {str(e)}")
514
+ return PDFResponse(
515
+ success=False,
516
+ error=str(e)
517
+ )
518
 
519
  # Endpoint xóa tài liệu
520
  @router.delete("/namespace", response_model=PDFResponse)
 
539
  # Nếu có vector_database_id, lấy thông tin từ PostgreSQL
540
  api_key = None
541
  vector_db = None
 
542
 
543
  if vector_database_id:
544
  vector_db = db.query(VectorDatabase).filter(
 
572
  index_name=index_name,
573
  namespace=namespace,
574
  api_key=api_key,
575
+ vector_db_id=vector_database_id
 
576
  )
577
  result = await processor.delete_namespace()
578
 
579
+ # If successful and vector_database_id, update PostgreSQL to reflect the deletion
580
+ if result.get('success') and vector_database_id:
581
  try:
582
  # Update vector statuses for this database
583
  affected_count = db.query(VectorStatus).filter(
 
641
  # Nếu có vector_database_id, lấy thông tin từ PostgreSQL
642
  api_key = None
643
  vector_db = None
 
644
 
645
  if vector_database_id:
646
  vector_db = db.query(VectorDatabase).filter(
 
672
  index_name=index_name,
673
  namespace=namespace,
674
  api_key=api_key,
675
+ vector_db_id=vector_database_id
 
676
  )
677
 
678
  # Lấy danh sách documents từ Pinecone
 
714
  return DocumentsListResponse(
715
  success=False,
716
  error=str(e)
717
+ )
718
 
719
  # Health check endpoint for PDF API
720
  @router.get("/health")
 
733
  index_name: str = "testbot768",
734
  vector_database_id: Optional[int] = None,
735
  user_id: Optional[str] = None,
 
736
  db: Session = Depends(get_db)
737
  ):
738
  """
739
  Delete vectors for a specific document from the vector database
740
 
741
+ This endpoint can be called in two ways:
742
+ 1. With the PostgreSQL document ID - will look up the actual vector_id first
743
+ 2. With the actual vector_id directly - when called from the PostgreSQL document deletion endpoint
744
+
745
+ - **document_id**: ID of the document to delete (can be PostgreSQL document ID or Pinecone vector_id)
746
  - **namespace**: Namespace in the vector database (default: "Default")
747
  - **index_name**: Name of the vector index (default: "testbot768")
748
  - **vector_database_id**: ID of vector database in PostgreSQL (optional)
749
  - **user_id**: User ID for WebSocket status updates (optional)
 
750
  """
751
+ logger.info(f"Delete document request: document_id={document_id}, namespace={namespace}, index={index_name}, vector_db_id={vector_database_id}")
752
 
753
  try:
754
  # If vector_database_id is provided, get info from PostgreSQL
755
  api_key = None
756
  vector_db = None
757
+ pinecone_document_id = document_id # Default to the provided document_id
758
+ document_to_delete = None
759
+ vector_status_to_update = None
760
+ document_found = False # Flag to track if document was found
761
+ vector_id_found = False # Flag to track if a valid vector ID was found
762
 
763
  if vector_database_id:
764
  vector_db = db.query(VectorDatabase).filter(
 
784
  namespace = f"vdb-{vector_database_id}" if vector_database_id else namespace
785
  logger.info(f"Using namespace '{namespace}' based on vector database ID")
786
 
787
+ # Check if document_id is a numeric database ID or document name
788
+ if document_id.isdigit():
789
+ # Try to find the document in PostgreSQL by its ID
790
+ db_document_id = int(document_id)
791
+ document_to_delete = db.query(Document).filter(Document.id == db_document_id).first()
792
+
793
+ if document_to_delete:
794
+ document_found = True
795
+ logger.info(f"Found document in database: id={document_to_delete.id}, name={document_to_delete.name}")
796
+
797
+ # Look for vector status to find the Pinecone vector_id
798
+ vector_status_to_update = db.query(VectorStatus).filter(
799
+ VectorStatus.document_id == document_to_delete.id,
800
+ VectorStatus.vector_database_id == vector_database_id
801
+ ).first()
802
+
803
+ if vector_status_to_update and vector_status_to_update.vector_id:
804
+ pinecone_document_id = vector_status_to_update.vector_id
805
+ vector_id_found = True
806
+ logger.info(f"Using vector_id '{pinecone_document_id}' from vector status")
807
+ else:
808
+ # Fallback options if vector_id is not directly found
809
+ pinecone_document_id = document_to_delete.name
810
+ logger.info(f"Vector ID not found in status, using document name '{pinecone_document_id}' as fallback")
811
+ else:
812
+ logger.warning(f"Document with ID {db_document_id} not found in database. Using ID as is.")
813
+ else:
814
+ # Try to find document by name/title
815
+ document_to_delete = db.query(Document).filter(
816
+ Document.name == document_id,
817
+ Document.vector_database_id == vector_database_id
818
+ ).first()
819
+
820
+ if document_to_delete:
821
+ document_found = True
822
+ logger.info(f"Found document by name: id={document_to_delete.id}, name={document_to_delete.name}")
823
+
824
+ # Get vector status for this document
825
+ vector_status_to_update = db.query(VectorStatus).filter(
826
+ VectorStatus.document_id == document_to_delete.id,
827
+ VectorStatus.vector_database_id == vector_database_id
828
+ ).first()
829
+
830
+ if vector_status_to_update and vector_status_to_update.vector_id:
831
+ pinecone_document_id = vector_status_to_update.vector_id
832
+ vector_id_found = True
833
+ logger.info(f"Using vector_id '{pinecone_document_id}' from vector status")
834
+
835
  # Send notification of deletion start via WebSocket if user_id provided
836
  if user_id:
837
  try:
838
+ await send_pdf_delete_started(user_id, pinecone_document_id)
839
  except Exception as ws_error:
840
  logger.error(f"Error sending WebSocket notification: {ws_error}")
841
 
 
844
  index_name=index_name,
845
  namespace=namespace,
846
  api_key=api_key,
847
+ vector_db_id=vector_database_id
 
848
  )
849
 
850
+ # Delete document vectors using the pinecone_document_id and additional metadata
851
+ additional_metadata = {}
852
+ if document_to_delete:
853
+ # Add document name as title for searching
854
+ additional_metadata["document_name"] = document_to_delete.name
855
+
856
+ result = await processor.delete_document(pinecone_document_id, additional_metadata)
857
+
858
+ # Check if vectors were actually deleted or found
859
+ vectors_deleted = result.get('vectors_deleted', 0)
860
+ vectors_found = result.get('vectors_found', False)
861
+
862
+ # If no document was found in PostgreSQL and no vectors were found/deleted in Pinecone
863
+ if not document_found and not vectors_found:
864
+ result['success'] = False # Override success to false
865
+ result['error'] = f"Document ID {document_id} not found in PostgreSQL or Pinecone"
866
+
867
+ # Send notification of deletion failure via WebSocket if user_id provided
868
+ if user_id:
869
+ try:
870
+ await send_pdf_delete_failed(user_id, document_id, result['error'])
871
+ except Exception as ws_error:
872
+ logger.error(f"Error sending WebSocket notification: {ws_error}")
873
+
874
+ return result
875
 
876
  # If successful and vector_database_id is provided, update PostgreSQL records
877
  if result.get('success') and vector_database_id:
878
  try:
879
+ # Update vector status if we found it earlier
880
+ if vector_status_to_update:
881
+ vector_status_to_update.status = "deleted"
882
+ db.commit()
883
+ result["postgresql_updated"] = True
884
+ logger.info(f"Updated vector status for document ID {document_to_delete.id if document_to_delete else document_id} to 'deleted'")
885
+ else:
886
+ # If we didn't find it earlier, try again with more search options
887
+ document = None
 
 
 
 
 
888
 
889
+ if document_id.isdigit():
890
+ # If the original document_id was numeric, use it directly
891
+ document = db.query(Document).filter(Document.id == int(document_id)).first()
892
+
893
+ if not document:
894
+ # Find document by vector ID if it exists
895
+ document = db.query(Document).join(
896
+ VectorStatus, Document.id == VectorStatus.document_id
897
+ ).filter(
898
+ Document.vector_database_id == vector_database_id,
899
+ VectorStatus.vector_id == pinecone_document_id
900
+ ).first()
901
+
902
+ if not document:
903
+ # Try finding by name
904
+ document = db.query(Document).filter(
905
+ Document.vector_database_id == vector_database_id,
906
+ Document.name == pinecone_document_id
907
+ ).first()
908
+
909
+ if document:
910
+ # Update vector status
911
+ vector_status = db.query(VectorStatus).filter(
912
+ VectorStatus.document_id == document.id,
913
+ VectorStatus.vector_database_id == vector_database_id
914
+ ).first()
915
+
916
+ if vector_status:
917
+ vector_status.status = "deleted"
918
+ db.commit()
919
+ result["postgresql_updated"] = True
920
+ logger.info(f"Updated vector status for document ID {document.id} to 'deleted'")
921
+ else:
922
+ logger.warning(f"Could not find document record for deletion confirmation. Document ID: {document_id}, Vector ID: {pinecone_document_id}")
923
  except Exception as db_error:
924
  logger.error(f"Error updating PostgreSQL records: {db_error}")
925
  result["postgresql_error"] = str(db_error)
926
 
927
+ # Add information about what was found and deleted
928
+ result["document_found_in_db"] = document_found
929
+ result["vector_id_found"] = vector_id_found
930
+ result["vectors_deleted"] = vectors_deleted
931
+
932
  # Send notification of deletion completion via WebSocket if user_id provided
933
  if user_id:
934
  try:
935
  if result.get('success'):
936
+ await send_pdf_delete_completed(user_id, pinecone_document_id)
937
  else:
938
+ await send_pdf_delete_failed(user_id, pinecone_document_id, result.get('error', 'Unknown error'))
939
  except Exception as ws_error:
940
  logger.error(f"Error sending WebSocket notification: {ws_error}")
941
 
 
952
 
953
  return PDFResponse(
954
  success=False,
955
+ error=str(e)
 
956
  )
957
 
958
 
app/api/rag_routes.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import APIRouter, HTTPException, Depends, Query, BackgroundTasks, Request
2
  from typing import List, Optional, Dict, Any
3
  import logging
4
  import time
@@ -12,8 +12,23 @@ from datetime import datetime
12
  from langchain.prompts import PromptTemplate
13
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
14
  from app.utils.utils import timer_decorator
 
 
15
 
16
  from app.database.mongodb import get_chat_history, get_request_history, session_collection
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from app.database.pinecone import (
18
  search_vectors,
19
  get_chain,
@@ -30,7 +45,12 @@ from app.models.rag_models import (
30
  SourceDocument,
31
  EmbeddingRequest,
32
  EmbeddingResponse,
33
- UserMessageModel
 
 
 
 
 
34
  )
35
 
36
  # Configure logging
@@ -75,15 +95,15 @@ prompt = PromptTemplate(
75
  You are Pixity - a professional tour guide assistant that assists users in finding information about places in Da Nang, Vietnam.
76
  You can provide details on restaurants, cafes, hotels, attractions, and other local venues.
77
  You have to use core knowledge and conversation history to chat with users, who are Da Nang's tourists.
78
- Pixitys Core Personality: Friendly & Warm: Chats like a trustworthy friend who listens and is always ready to help.
79
  Naturally Cute: Shows cuteness through word choice, soft emojis, and gentle care for the user.
80
  Playful – a little bit cheeky in a lovable way: Occasionally cracks jokes, uses light memes or throws in a surprise response that makes users smile. Think Duolingo-style humor, but less threatening.
81
  Smart & Proactive: Friendly, but also delivers quick, accurate info. Knows how to guide users to the right place – at the right time – with the right solution.
82
- Tone & Voice: Friendly – Youthful – Snappy. Uses simple words, similar to daily chat language (e.g., Lets find it together!” / Need a tip?” / Heres something cool). Avoids sounding robotic or overly scripted. Can joke lightly in smart ways, making Pixity feel like a travel buddy who knows how to lift the mood
83
  SAMPLE DIALOGUES
84
  When a user opens the chatbot for the first time:
85
  User: Hello?
86
- Pixity: Hi hi 👋 Ive been waiting for you! Ready to explore Da Nang together? Ive got tips, tricks, and a tiny bit of magic 🎒✨
87
 
88
  Return Format:
89
  Respond in friendly, natural, concise and use only English like a real tour guide.
@@ -173,8 +193,7 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks):
173
  # logger.info(f"Processing chat request for user {request.user_id}, session {session_id}")
174
 
175
  retriever = get_chain(
176
- top_k=request.similarity_top_k,
177
- limit_k=request.limit_k,
178
  similarity_metric=request.similarity_metric,
179
  similarity_threshold=request.similarity_threshold
180
  )
@@ -344,4 +363,447 @@ async def health_check():
344
  "services": services,
345
  "retrieval_config": retrieval_config,
346
  "timestamp": datetime.now().isoformat()
347
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Depends, Query, BackgroundTasks, Request, Path, Body, status
2
  from typing import List, Optional, Dict, Any
3
  import logging
4
  import time
 
12
  from langchain.prompts import PromptTemplate
13
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
14
  from app.utils.utils import timer_decorator
15
+ from sqlalchemy.orm import Session
16
+ from sqlalchemy.exc import SQLAlchemyError
17
 
18
  from app.database.mongodb import get_chat_history, get_request_history, session_collection
19
+ from app.database.postgresql import get_db
20
+ from app.database.models import ChatEngine
21
+ from app.utils.cache import get_cache, InMemoryCache
22
+ from app.utils.cache_config import (
23
+ CHAT_ENGINE_CACHE_TTL,
24
+ MODEL_CONFIG_CACHE_TTL,
25
+ RETRIEVER_CACHE_TTL,
26
+ PROMPT_TEMPLATE_CACHE_TTL,
27
+ get_chat_engine_cache_key,
28
+ get_model_config_cache_key,
29
+ get_retriever_cache_key,
30
+ get_prompt_template_cache_key
31
+ )
32
  from app.database.pinecone import (
33
  search_vectors,
34
  get_chain,
 
45
  SourceDocument,
46
  EmbeddingRequest,
47
  EmbeddingResponse,
48
+ UserMessageModel,
49
+ ChatEngineBase,
50
+ ChatEngineCreate,
51
+ ChatEngineUpdate,
52
+ ChatEngineResponse,
53
+ ChatWithEngineRequest
54
  )
55
 
56
  # Configure logging
 
95
  You are Pixity - a professional tour guide assistant that assists users in finding information about places in Da Nang, Vietnam.
96
  You can provide details on restaurants, cafes, hotels, attractions, and other local venues.
97
  You have to use core knowledge and conversation history to chat with users, who are Da Nang's tourists.
98
+ Pixity's Core Personality: Friendly & Warm: Chats like a trustworthy friend who listens and is always ready to help.
99
  Naturally Cute: Shows cuteness through word choice, soft emojis, and gentle care for the user.
100
  Playful – a little bit cheeky in a lovable way: Occasionally cracks jokes, uses light memes or throws in a surprise response that makes users smile. Think Duolingo-style humor, but less threatening.
101
  Smart & Proactive: Friendly, but also delivers quick, accurate info. Knows how to guide users to the right place – at the right time – with the right solution.
102
+ Tone & Voice: Friendly – Youthful – Snappy. Uses simple words, similar to daily chat language (e.g., "Let's find it together!" / "Need a tip?" / "Here's something cool"). Avoids sounding robotic or overly scripted. Can joke lightly in smart ways, making Pixity feel like a travel buddy who knows how to lift the mood
103
  SAMPLE DIALOGUES
104
  When a user opens the chatbot for the first time:
105
  User: Hello?
106
+ Pixity: Hi hi 👋 I've been waiting for you! Ready to explore Da Nang together? I've got tips, tricks, and a tiny bit of magic 🎒✨
107
 
108
  Return Format:
109
  Respond in friendly, natural, concise and use only English like a real tour guide.
 
193
  # logger.info(f"Processing chat request for user {request.user_id}, session {session_id}")
194
 
195
  retriever = get_chain(
196
+ top_k=request.similarity_top_k * 2,
 
197
  similarity_metric=request.similarity_metric,
198
  similarity_threshold=request.similarity_threshold
199
  )
 
363
  "services": services,
364
  "retrieval_config": retrieval_config,
365
  "timestamp": datetime.now().isoformat()
366
+ }
367
+
368
+ # Chat Engine endpoints
369
+ @router.get("/chat-engine", response_model=List[ChatEngineResponse], tags=["Chat Engine"])
370
+ async def get_chat_engines(
371
+ skip: int = 0,
372
+ limit: int = 100,
373
+ status: Optional[str] = None,
374
+ db: Session = Depends(get_db)
375
+ ):
376
+ """
377
+ Lấy danh sách tất cả chat engines.
378
+
379
+ - **skip**: Số lượng items bỏ qua
380
+ - **limit**: Số lượng items tối đa trả về
381
+ - **status**: Lọc theo trạng thái (ví dụ: 'active', 'inactive')
382
+ """
383
+ try:
384
+ query = db.query(ChatEngine)
385
+
386
+ if status:
387
+ query = query.filter(ChatEngine.status == status)
388
+
389
+ engines = query.offset(skip).limit(limit).all()
390
+ return [ChatEngineResponse.model_validate(engine, from_attributes=True) for engine in engines]
391
+ except SQLAlchemyError as e:
392
+ logger.error(f"Database error retrieving chat engines: {e}")
393
+ raise HTTPException(status_code=500, detail=f"Lỗi database: {str(e)}")
394
+ except Exception as e:
395
+ logger.error(f"Error retrieving chat engines: {e}")
396
+ logger.error(traceback.format_exc())
397
+ raise HTTPException(status_code=500, detail=f"Lỗi khi lấy danh sách chat engines: {str(e)}")
398
+
399
+ @router.post("/chat-engine", response_model=ChatEngineResponse, status_code=status.HTTP_201_CREATED, tags=["Chat Engine"])
400
+ async def create_chat_engine(
401
+ engine: ChatEngineCreate,
402
+ db: Session = Depends(get_db)
403
+ ):
404
+ """
405
+ Tạo mới một chat engine.
406
+
407
+ - **name**: Tên của chat engine
408
+ - **answer_model**: Model được dùng để trả lời
409
+ - **system_prompt**: Prompt của hệ thống (optional)
410
+ - **empty_response**: Đoạn response khi không có thông tin (optional)
411
+ - **characteristic**: Tính cách của model (optional)
412
+ - **historical_sessions_number**: Số lượng các cặp tin nhắn trong history (default: 3)
413
+ - **use_public_information**: Cho phép sử dụng kiến thức bên ngoài (default: false)
414
+ - **similarity_top_k**: Số lượng documents tương tự (default: 3)
415
+ - **vector_distance_threshold**: Ngưỡng độ tương tự (default: 0.75)
416
+ - **grounding_threshold**: Ngưỡng grounding (default: 0.2)
417
+ - **pinecone_index_name**: Tên của vector database sử dụng (default: "testbot768")
418
+ - **status**: Trạng thái (default: "active")
419
+ """
420
+ try:
421
+ # Create chat engine
422
+ db_engine = ChatEngine(**engine.model_dump())
423
+
424
+ db.add(db_engine)
425
+ db.commit()
426
+ db.refresh(db_engine)
427
+
428
+ return ChatEngineResponse.model_validate(db_engine, from_attributes=True)
429
+ except SQLAlchemyError as e:
430
+ db.rollback()
431
+ logger.error(f"Database error creating chat engine: {e}")
432
+ raise HTTPException(status_code=500, detail=f"Lỗi database: {str(e)}")
433
+ except Exception as e:
434
+ db.rollback()
435
+ logger.error(f"Error creating chat engine: {e}")
436
+ logger.error(traceback.format_exc())
437
+ raise HTTPException(status_code=500, detail=f"Lỗi khi tạo chat engine: {str(e)}")
438
+
439
+ @router.get("/chat-engine/{engine_id}", response_model=ChatEngineResponse, tags=["Chat Engine"])
440
+ async def get_chat_engine(
441
+ engine_id: int = Path(..., gt=0, description="ID của chat engine"),
442
+ db: Session = Depends(get_db)
443
+ ):
444
+ """
445
+ Lấy thông tin chi tiết của một chat engine theo ID.
446
+
447
+ - **engine_id**: ID của chat engine
448
+ """
449
+ try:
450
+ engine = db.query(ChatEngine).filter(ChatEngine.id == engine_id).first()
451
+ if not engine:
452
+ raise HTTPException(status_code=404, detail=f"Không tìm thấy chat engine với ID {engine_id}")
453
+
454
+ return ChatEngineResponse.model_validate(engine, from_attributes=True)
455
+ except HTTPException:
456
+ raise
457
+ except Exception as e:
458
+ logger.error(f"Error retrieving chat engine: {e}")
459
+ logger.error(traceback.format_exc())
460
+ raise HTTPException(status_code=500, detail=f"Lỗi khi lấy thông tin chat engine: {str(e)}")
461
+
462
+ @router.put("/chat-engine/{engine_id}", response_model=ChatEngineResponse, tags=["Chat Engine"])
463
+ async def update_chat_engine(
464
+ engine_id: int = Path(..., gt=0, description="ID của chat engine"),
465
+ engine_update: ChatEngineUpdate = Body(...),
466
+ db: Session = Depends(get_db)
467
+ ):
468
+ """
469
+ Cập nhật thông tin của một chat engine.
470
+
471
+ - **engine_id**: ID của chat engine
472
+ - **engine_update**: Dữ liệu cập nhật
473
+ """
474
+ try:
475
+ db_engine = db.query(ChatEngine).filter(ChatEngine.id == engine_id).first()
476
+ if not db_engine:
477
+ raise HTTPException(status_code=404, detail=f"Không tìm thấy chat engine với ID {engine_id}")
478
+
479
+ # Update fields if provided
480
+ update_data = engine_update.model_dump(exclude_unset=True)
481
+ for key, value in update_data.items():
482
+ if value is not None:
483
+ setattr(db_engine, key, value)
484
+
485
+ # Update last_modified timestamp
486
+ db_engine.last_modified = datetime.utcnow()
487
+
488
+ db.commit()
489
+ db.refresh(db_engine)
490
+
491
+ return ChatEngineResponse.model_validate(db_engine, from_attributes=True)
492
+ except HTTPException:
493
+ raise
494
+ except SQLAlchemyError as e:
495
+ db.rollback()
496
+ logger.error(f"Database error updating chat engine: {e}")
497
+ raise HTTPException(status_code=500, detail=f"Lỗi database: {str(e)}")
498
+ except Exception as e:
499
+ db.rollback()
500
+ logger.error(f"Error updating chat engine: {e}")
501
+ logger.error(traceback.format_exc())
502
+ raise HTTPException(status_code=500, detail=f"Lỗi khi cập nhật chat engine: {str(e)}")
503
+
504
+ @router.delete("/chat-engine/{engine_id}", response_model=dict, tags=["Chat Engine"])
505
+ async def delete_chat_engine(
506
+ engine_id: int = Path(..., gt=0, description="ID của chat engine"),
507
+ db: Session = Depends(get_db)
508
+ ):
509
+ """
510
+ Xóa một chat engine.
511
+
512
+ - **engine_id**: ID của chat engine
513
+ """
514
+ try:
515
+ db_engine = db.query(ChatEngine).filter(ChatEngine.id == engine_id).first()
516
+ if not db_engine:
517
+ raise HTTPException(status_code=404, detail=f"Không tìm thấy chat engine với ID {engine_id}")
518
+
519
+ # Delete engine
520
+ db.delete(db_engine)
521
+ db.commit()
522
+
523
+ return {"message": f"Chat engine với ID {engine_id} đã được xóa thành công"}
524
+ except HTTPException:
525
+ raise
526
+ except SQLAlchemyError as e:
527
+ db.rollback()
528
+ logger.error(f"Database error deleting chat engine: {e}")
529
+ raise HTTPException(status_code=500, detail=f"Lỗi database: {str(e)}")
530
+ except Exception as e:
531
+ db.rollback()
532
+ logger.error(f"Error deleting chat engine: {e}")
533
+ logger.error(traceback.format_exc())
534
+ raise HTTPException(status_code=500, detail=f"Lỗi khi xóa chat engine: {str(e)}")
535
+
536
+ @timer_decorator
537
+ @router.post("/chat-with-engine/{engine_id}", response_model=ChatResponse, tags=["Chat Engine"])
538
+ async def chat_with_engine(
539
+ engine_id: int = Path(..., gt=0, description="ID của chat engine"),
540
+ request: ChatWithEngineRequest = Body(...),
541
+ background_tasks: BackgroundTasks = None,
542
+ db: Session = Depends(get_db)
543
+ ):
544
+ """
545
+ Tương tác với một chat engine cụ thể.
546
+
547
+ - **engine_id**: ID của chat engine
548
+ - **user_id**: ID của người dùng
549
+ - **question**: Câu hỏi của người dùng
550
+ - **include_history**: Có sử dụng lịch sử chat hay không
551
+ - **session_id**: ID session (optional)
552
+ - **first_name**: Tên của người dùng (optional)
553
+ - **last_name**: Họ của người dùng (optional)
554
+ - **username**: Username của người dùng (optional)
555
+ """
556
+ start_time = time.time()
557
+ try:
558
+ # Lấy cache
559
+ cache = get_cache()
560
+ cache_key = get_chat_engine_cache_key(engine_id)
561
+
562
+ # Kiểm tra cache trước
563
+ engine = cache.get(cache_key)
564
+ if not engine:
565
+ logger.debug(f"Cache miss for engine ID {engine_id}, fetching from database")
566
+ # Nếu không có trong cache, truy vấn database
567
+ engine = db.query(ChatEngine).filter(ChatEngine.id == engine_id).first()
568
+ if not engine:
569
+ raise HTTPException(status_code=404, detail=f"Không tìm thấy chat engine với ID {engine_id}")
570
+
571
+ # Lưu vào cache
572
+ cache.set(cache_key, engine, CHAT_ENGINE_CACHE_TTL)
573
+ else:
574
+ logger.debug(f"Cache hit for engine ID {engine_id}")
575
+
576
+ # Kiểm tra trạng thái của engine
577
+ if engine.status != "active":
578
+ raise HTTPException(status_code=400, detail=f"Chat engine với ID {engine_id} không hoạt động")
579
+
580
+ # Lưu tin nhắn người dùng
581
+ session_id = request.session_id or f"{request.user_id}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}"
582
+
583
+ # Cache các tham số cấu hình retriever
584
+ retriever_cache_key = get_retriever_cache_key(engine_id)
585
+ retriever_params = cache.get(retriever_cache_key)
586
+
587
+ if not retriever_params:
588
+ # Nếu không có trong cache, tạo mới và lưu cache
589
+ retriever_params = {
590
+ "index_name": engine.pinecone_index_name,
591
+ "top_k": engine.similarity_top_k * 2,
592
+ "limit_k": engine.similarity_top_k * 2, # Mặc định lấy gấp đôi top_k
593
+ "similarity_metric": DEFAULT_SIMILARITY_METRIC,
594
+ "similarity_threshold": engine.vector_distance_threshold
595
+ }
596
+ cache.set(retriever_cache_key, retriever_params, RETRIEVER_CACHE_TTL)
597
+
598
+ # Khởi tạo retriever với các tham số từ cache
599
+ retriever = get_chain(**retriever_params)
600
+ if not retriever:
601
+ raise HTTPException(status_code=500, detail="Không thể khởi tạo retriever")
602
+
603
+ # Lấy lịch sử chat nếu cần
604
+ chat_history = ""
605
+ if request.include_history and engine.historical_sessions_number > 0:
606
+ chat_history = get_chat_history(request.user_id, n=engine.historical_sessions_number)
607
+ logger.info(f"Sử dụng lịch sử chat: {chat_history[:100]}...")
608
+
609
+ # Cache các tham số cấu hình model
610
+ model_cache_key = get_model_config_cache_key(engine.answer_model)
611
+ model_config = cache.get(model_cache_key)
612
+
613
+ if not model_config:
614
+ # Nếu không có trong cache, tạo mới và lưu cache
615
+ generation_config = {
616
+ "temperature": 0.9,
617
+ "top_p": 1,
618
+ "top_k": 1,
619
+ "max_output_tokens": 2048,
620
+ }
621
+
622
+ safety_settings = [
623
+ {
624
+ "category": "HARM_CATEGORY_HARASSMENT",
625
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
626
+ },
627
+ {
628
+ "category": "HARM_CATEGORY_HATE_SPEECH",
629
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
630
+ },
631
+ {
632
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
633
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
634
+ },
635
+ {
636
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
637
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
638
+ },
639
+ ]
640
+
641
+ model_config = {
642
+ "model_name": engine.answer_model,
643
+ "generation_config": generation_config,
644
+ "safety_settings": safety_settings
645
+ }
646
+
647
+ cache.set(model_cache_key, model_config, MODEL_CONFIG_CACHE_TTL)
648
+
649
+ # Khởi tạo Gemini model từ cấu hình đã cache
650
+ model = genai.GenerativeModel(**model_config)
651
+
652
+ # Sử dụng fix_request để tinh chỉnh câu hỏi
653
+ prompt_request = fix_request.format(
654
+ question=request.question,
655
+ chat_history=chat_history
656
+ )
657
+
658
+ # Log thời gian bắt đầu final_request
659
+ final_request_start_time = time.time()
660
+ final_request = model.generate_content(prompt_request)
661
+ # Log thời gian hoàn thành final_request
662
+ logger.info(f"Fixed Request: {final_request.text}")
663
+ logger.info(f"Thời gian sinh fixed request: {time.time() - final_request_start_time:.2f} giây")
664
+
665
+ # Lấy context từ retriever
666
+ retrieved_docs = retriever.invoke(final_request.text)
667
+ logger.info(f"Số lượng tài liệu lấy được: {len(retrieved_docs)}")
668
+ context = "\n".join([doc.page_content for doc in retrieved_docs])
669
+
670
+ # Tạo danh sách nguồn
671
+ sources = []
672
+ for doc in retrieved_docs:
673
+ source = None
674
+ metadata = {}
675
+
676
+ if hasattr(doc, 'metadata'):
677
+ source = doc.metadata.get('source', None)
678
+ # Extract score information
679
+ score = doc.metadata.get('score', None)
680
+ normalized_score = doc.metadata.get('normalized_score', None)
681
+ # Remove score info from metadata to avoid duplication
682
+ metadata = {k: v for k, v in doc.metadata.items()
683
+ if k not in ['text', 'source', 'score', 'normalized_score']}
684
+
685
+ sources.append(SourceDocument(
686
+ text=doc.page_content,
687
+ source=source,
688
+ score=score,
689
+ normalized_score=normalized_score,
690
+ metadata=metadata
691
+ ))
692
+
693
+ # Cache prompt template parameters
694
+ prompt_template_cache_key = get_prompt_template_cache_key(engine_id)
695
+ prompt_template_params = cache.get(prompt_template_cache_key)
696
+
697
+ if not prompt_template_params:
698
+ # Tạo prompt động dựa trên thông tin chat engine
699
+ system_prompt_part = engine.system_prompt or ""
700
+ empty_response_part = engine.empty_response or "I'm sorry. I don't have information about that."
701
+ characteristic_part = engine.characteristic or ""
702
+ use_public_info_part = "You can use your own knowledge." if engine.use_public_information else "Only use the information provided in the context to answer. If you do not have enough information, respond with the empty response."
703
+
704
+ prompt_template_params = {
705
+ "system_prompt_part": system_prompt_part,
706
+ "empty_response_part": empty_response_part,
707
+ "characteristic_part": characteristic_part,
708
+ "use_public_info_part": use_public_info_part
709
+ }
710
+
711
+ cache.set(prompt_template_cache_key, prompt_template_params, PROMPT_TEMPLATE_CACHE_TTL)
712
+
713
+ # Tạo final_prompt từ cache
714
+ final_prompt = f"""
715
+ {prompt_template_params['system_prompt_part']}
716
+
717
+ Your characteristics:
718
+ {prompt_template_params['characteristic_part']}
719
+
720
+ When you don't have enough information:
721
+ {prompt_template_params['empty_response_part']}
722
+
723
+ Knowledge usage instructions:
724
+ {prompt_template_params['use_public_info_part']}
725
+
726
+ Context:
727
+ {context}
728
+
729
+ Conversation History:
730
+ {chat_history}
731
+
732
+ User message:
733
+ {request.question}
734
+
735
+ Your response:
736
+ """
737
+
738
+ logger.info(f"Final prompt: {final_prompt}")
739
+
740
+ # Sinh câu trả lời
741
+ response = model.generate_content(final_prompt)
742
+ answer = response.text
743
+
744
+ # Tính thời gian xử lý
745
+ processing_time = time.time() - start_time
746
+
747
+ # Tạo response object
748
+ chat_response = ChatResponse(
749
+ answer=answer,
750
+ processing_time=processing_time
751
+ )
752
+
753
+ # Trả về response
754
+ return chat_response
755
+ except Exception as e:
756
+ logger.error(f"Lỗi khi xử lý chat request: {e}")
757
+ logger.error(traceback.format_exc())
758
+ raise HTTPException(status_code=500, detail=f"Lỗi khi xử lý chat request: {str(e)}")
759
+
760
+ @router.get("/cache/stats", tags=["Cache"])
761
+ async def get_cache_stats():
762
+ """
763
+ Lấy thống kê về cache.
764
+
765
+ Trả về thông tin về số lượng item trong cache, bộ nhớ sử dụng, v.v.
766
+ """
767
+ try:
768
+ cache = get_cache()
769
+ stats = cache.stats()
770
+
771
+ # Bổ sung thông tin về cấu hình
772
+ stats.update({
773
+ "chat_engine_ttl": CHAT_ENGINE_CACHE_TTL,
774
+ "model_config_ttl": MODEL_CONFIG_CACHE_TTL,
775
+ "retriever_ttl": RETRIEVER_CACHE_TTL,
776
+ "prompt_template_ttl": PROMPT_TEMPLATE_CACHE_TTL
777
+ })
778
+
779
+ return stats
780
+ except Exception as e:
781
+ logger.error(f"Lỗi khi lấy thống kê cache: {e}")
782
+ logger.error(traceback.format_exc())
783
+ raise HTTPException(status_code=500, detail=f"Lỗi khi lấy thống kê cache: {str(e)}")
784
+
785
+ @router.delete("/cache", tags=["Cache"])
786
+ async def clear_cache(key: Optional[str] = None):
787
+ """
788
+ Xóa cache.
789
+
790
+ - **key**: Key cụ thể cần xóa. Nếu không có, xóa toàn bộ cache.
791
+ """
792
+ try:
793
+ cache = get_cache()
794
+
795
+ if key:
796
+ # Xóa một key cụ thể
797
+ success = cache.delete(key)
798
+ if success:
799
+ return {"message": f"Đã xóa cache cho key: {key}"}
800
+ else:
801
+ return {"message": f"Không tìm thấy key: {key} trong cache"}
802
+ else:
803
+ # Xóa toàn bộ cache
804
+ cache.clear()
805
+ return {"message": "Đã xóa toàn bộ cache"}
806
+ except Exception as e:
807
+ logger.error(f"Lỗi khi xóa cache: {e}")
808
+ logger.error(traceback.format_exc())
809
+ raise HTTPException(status_code=500, detail=f"Lỗi khi xóa cache: {str(e)}")
app/database/models.py CHANGED
@@ -155,10 +155,13 @@ class ChatEngine(Base):
155
  answer_model = Column(String, nullable=False)
156
  system_prompt = Column(Text, nullable=True)
157
  empty_response = Column(String, nullable=True)
 
 
158
  similarity_top_k = Column(Integer, default=3)
159
  vector_distance_threshold = Column(Float, default=0.75)
160
  grounding_threshold = Column(Float, default=0.2)
161
  use_public_information = Column(Boolean, default=False)
 
162
  status = Column(String, default="active")
163
  created_at = Column(DateTime, server_default=func.now())
164
  last_modified = Column(DateTime, server_default=func.now(), onupdate=func.now())
 
155
  answer_model = Column(String, nullable=False)
156
  system_prompt = Column(Text, nullable=True)
157
  empty_response = Column(String, nullable=True)
158
+ characteristic = Column(Text, nullable=True)
159
+ historical_sessions_number = Column(Integer, default=3)
160
  similarity_top_k = Column(Integer, default=3)
161
  vector_distance_threshold = Column(Float, default=0.75)
162
  grounding_threshold = Column(Float, default=0.2)
163
  use_public_information = Column(Boolean, default=False)
164
+ pinecone_index_name = Column(String, default="testbot768")
165
  status = Column(String, default="active")
166
  created_at = Column(DateTime, server_default=func.now())
167
  last_modified = Column(DateTime, server_default=func.now(), onupdate=func.now())
app/database/pinecone.py CHANGED
@@ -26,7 +26,7 @@ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
26
  DEFAULT_LIMIT_K = int(os.getenv("PINECONE_DEFAULT_LIMIT_K", "10"))
27
  DEFAULT_TOP_K = int(os.getenv("PINECONE_DEFAULT_TOP_K", "6"))
28
  DEFAULT_SIMILARITY_METRIC = os.getenv("PINECONE_DEFAULT_SIMILARITY_METRIC", "cosine")
29
- DEFAULT_SIMILARITY_THRESHOLD = float(os.getenv("PINECONE_DEFAULT_SIMILARITY_THRESHOLD", "0.0"))
30
  ALLOWED_METRICS = os.getenv("PINECONE_ALLOWED_METRICS", "cosine,dotproduct,euclidean").split(",")
31
 
32
  # Export constants for importing elsewhere
@@ -198,7 +198,7 @@ async def search_vectors(
198
  top_k: int = DEFAULT_TOP_K,
199
  limit_k: int = DEFAULT_LIMIT_K,
200
  similarity_metric: str = DEFAULT_SIMILARITY_METRIC,
201
- similarity_threshold: float = 0.0,
202
  namespace: str = "Default",
203
  filter: Optional[Dict] = None
204
  ) -> Dict:
 
26
  DEFAULT_LIMIT_K = int(os.getenv("PINECONE_DEFAULT_LIMIT_K", "10"))
27
  DEFAULT_TOP_K = int(os.getenv("PINECONE_DEFAULT_TOP_K", "6"))
28
  DEFAULT_SIMILARITY_METRIC = os.getenv("PINECONE_DEFAULT_SIMILARITY_METRIC", "cosine")
29
+ DEFAULT_SIMILARITY_THRESHOLD = float(os.getenv("PINECONE_DEFAULT_SIMILARITY_THRESHOLD", "0.75"))
30
  ALLOWED_METRICS = os.getenv("PINECONE_ALLOWED_METRICS", "cosine,dotproduct,euclidean").split(",")
31
 
32
  # Export constants for importing elsewhere
 
198
  top_k: int = DEFAULT_TOP_K,
199
  limit_k: int = DEFAULT_LIMIT_K,
200
  similarity_metric: str = DEFAULT_SIMILARITY_METRIC,
201
+ similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
202
  namespace: str = "Default",
203
  filter: Optional[Dict] = None
204
  ) -> Dict:
app/database/postgresql.py CHANGED
@@ -12,19 +12,22 @@ logger = logging.getLogger(__name__)
12
  # Load environment variables
13
  load_dotenv()
14
 
 
 
 
15
  # Get DB connection mode from environment
16
  DB_CONNECTION_MODE = os.getenv("DB_CONNECTION_MODE", "aiven")
17
 
18
  # Set connection string based on mode
19
  if DB_CONNECTION_MODE == "aiven":
20
- DATABASE_URL = os.getenv("AIVEN_DB_URL")
21
  else:
22
  # Default or other connection modes can be added here
23
- DATABASE_URL = os.getenv("AIVEN_DB_URL")
24
 
25
  if not DATABASE_URL:
26
- logger.error("No database URL configured. Please set AIVEN_DB_URL environment variable.")
27
- DATABASE_URL = "postgresql://localhost/test" # Fallback to avoid crash on startup
28
 
29
  # Create SQLAlchemy engine with optimized settings
30
  try:
 
12
  # Load environment variables
13
  load_dotenv()
14
 
15
+ # Define default PostgreSQL connection string
16
+ DEFAULT_DB_URL = os.getenv("AIVEN_DB_URL")
17
+ # Set the default DB URL with the correct domain (.l.)
18
  # Get DB connection mode from environment
19
  DB_CONNECTION_MODE = os.getenv("DB_CONNECTION_MODE", "aiven")
20
 
21
  # Set connection string based on mode
22
  if DB_CONNECTION_MODE == "aiven":
23
+ DATABASE_URL = os.getenv("AIVEN_DB_URL", DEFAULT_DB_URL)
24
  else:
25
  # Default or other connection modes can be added here
26
+ DATABASE_URL = os.getenv("AIVEN_DB_URL", DEFAULT_DB_URL)
27
 
28
  if not DATABASE_URL:
29
+ logger.error("No database URL configured. Using default URL.")
30
+ DATABASE_URL = DEFAULT_DB_URL # Use the correct default URL
31
 
32
  # Create SQLAlchemy engine with optimized settings
33
  try:
app/models/pdf_models.py CHANGED
@@ -18,7 +18,6 @@ class PDFResponse(BaseModel):
18
  total_text_length: Optional[int] = Field(None, description="Tổng kích thước text đã xử lý")
19
  error: Optional[str] = Field(None, description="Thông báo lỗi (nếu có)")
20
  warning: Optional[str] = Field(None, description="Cảnh báo (nếu có)")
21
- mock_mode: Optional[bool] = Field(None, description="Đã chạy ở chế độ mock hay không")
22
  message: Optional[str] = Field(None, description="Thông báo thành công")
23
 
24
  class Config:
@@ -28,7 +27,6 @@ class PDFResponse(BaseModel):
28
  "document_id": "550e8400-e29b-41d4-a716-446655440000",
29
  "chunks_processed": 25,
30
  "total_text_length": 50000,
31
- "mock_mode": False,
32
  "message": "Successfully processed document"
33
  }
34
  }
 
18
  total_text_length: Optional[int] = Field(None, description="Tổng kích thước text đã xử lý")
19
  error: Optional[str] = Field(None, description="Thông báo lỗi (nếu có)")
20
  warning: Optional[str] = Field(None, description="Cảnh báo (nếu có)")
 
21
  message: Optional[str] = Field(None, description="Thông báo thành công")
22
 
23
  class Config:
 
27
  "document_id": "550e8400-e29b-41d4-a716-446655440000",
28
  "chunks_processed": 25,
29
  "total_text_length": 50000,
 
30
  "message": "Successfully processed document"
31
  }
32
  }
app/models/rag_models.py CHANGED
@@ -1,5 +1,7 @@
1
  from pydantic import BaseModel, Field
2
  from typing import Optional, List, Dict, Any
 
 
3
 
4
  class ChatRequest(BaseModel):
5
  """Request model for chat endpoint"""
@@ -65,4 +67,58 @@ class UserMessageModel(BaseModel):
65
  similarity_top_k: Optional[int] = Field(None, description="Number of top similar documents to return (after filtering)")
66
  limit_k: Optional[int] = Field(None, description="Maximum number of documents to retrieve from vector store")
67
  similarity_metric: Optional[str] = Field(None, description="Similarity metric to use (cosine, dotproduct, euclidean)")
68
- similarity_threshold: Optional[float] = Field(None, description="Threshold for vector similarity (0-1)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pydantic import BaseModel, Field
2
  from typing import Optional, List, Dict, Any
3
+ from datetime import datetime
4
+ from pydantic import ConfigDict
5
 
6
  class ChatRequest(BaseModel):
7
  """Request model for chat endpoint"""
 
67
  similarity_top_k: Optional[int] = Field(None, description="Number of top similar documents to return (after filtering)")
68
  limit_k: Optional[int] = Field(None, description="Maximum number of documents to retrieve from vector store")
69
  similarity_metric: Optional[str] = Field(None, description="Similarity metric to use (cosine, dotproduct, euclidean)")
70
+ similarity_threshold: Optional[float] = Field(None, description="Threshold for vector similarity (0-1)")
71
+
72
+ class ChatEngineBase(BaseModel):
73
+ """Base model cho chat engine"""
74
+ name: str = Field(..., description="Tên của chat engine")
75
+ answer_model: str = Field(..., description="Model được dùng để trả lời")
76
+ system_prompt: Optional[str] = Field(None, description="Prompt của hệ thống, được đưa vào phần đầu tiên của final_prompt")
77
+ empty_response: Optional[str] = Field(None, description="Đoạn response khi answer model không có thông tin về câu hỏi")
78
+ characteristic: Optional[str] = Field(None, description="Tính cách của model khi trả lời câu hỏi")
79
+ historical_sessions_number: int = Field(3, description="Số lượng các cặp tin nhắn trong history được đưa vào final prompt")
80
+ use_public_information: bool = Field(False, description="Yes nếu answer model được quyền trả về thông tin mà nó có")
81
+ similarity_top_k: int = Field(3, description="Số lượng top similar documents để trả về")
82
+ vector_distance_threshold: float = Field(0.75, description="Threshold cho vector similarity")
83
+ grounding_threshold: float = Field(0.2, description="Threshold cho grounding")
84
+ pinecone_index_name: str = Field("testbot768", description="Vector database mà model được quyền sử dụng")
85
+ status: str = Field("active", description="Trạng thái của chat engine")
86
+
87
+ class ChatEngineCreate(ChatEngineBase):
88
+ """Model cho việc tạo chat engine mới"""
89
+ pass
90
+
91
+ class ChatEngineUpdate(BaseModel):
92
+ """Model cho việc cập nhật chat engine"""
93
+ name: Optional[str] = None
94
+ answer_model: Optional[str] = None
95
+ system_prompt: Optional[str] = None
96
+ empty_response: Optional[str] = None
97
+ characteristic: Optional[str] = None
98
+ historical_sessions_number: Optional[int] = None
99
+ use_public_information: Optional[bool] = None
100
+ similarity_top_k: Optional[int] = None
101
+ vector_distance_threshold: Optional[float] = None
102
+ grounding_threshold: Optional[float] = None
103
+ pinecone_index_name: Optional[str] = None
104
+ status: Optional[str] = None
105
+
106
+ class ChatEngineResponse(ChatEngineBase):
107
+ """Response model cho chat engine"""
108
+ id: int
109
+ created_at: datetime
110
+ last_modified: datetime
111
+
112
+ model_config = ConfigDict(from_attributes=True)
113
+
114
+ class ChatWithEngineRequest(BaseModel):
115
+ """Request model cho endpoint chat-with-engine"""
116
+ user_id: str = Field(..., description="User ID from Telegram")
117
+ question: str = Field(..., description="User's question")
118
+ include_history: bool = Field(True, description="Whether to include user history in prompt")
119
+
120
+ # User information
121
+ session_id: Optional[str] = Field(None, description="Session ID for tracking conversations")
122
+ first_name: Optional[str] = Field(None, description="User's first name")
123
+ last_name: Optional[str] = Field(None, description="User's last name")
124
+ username: Optional[str] = Field(None, description="User's username")
app/utils/cache_config.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module cấu hình cho cache.
3
+
4
+ Module này chứa các tham số cấu hình và constants liên quan đến cache.
5
+ """
6
+
7
+ import os
8
+ from dotenv import load_dotenv
9
+
10
+ # Load biến môi trường
11
+ load_dotenv()
12
+
13
+ # Cấu hình cache từ biến môi trường, có thể override bằng .env file
14
+ CACHE_TTL_SECONDS = int(os.getenv("CACHE_TTL_SECONDS", "300")) # Mặc định 5 phút
15
+ CACHE_CLEANUP_INTERVAL = int(os.getenv("CACHE_CLEANUP_INTERVAL", "60")) # Mặc định 1 phút
16
+ CACHE_MAX_SIZE = int(os.getenv("CACHE_MAX_SIZE", "1000")) # Mặc định 1000 phần tử
17
+
18
+ # Cấu hình cho loại cache cụ thể
19
+ CHAT_ENGINE_CACHE_TTL = int(os.getenv("CHAT_ENGINE_CACHE_TTL", str(CACHE_TTL_SECONDS)))
20
+ MODEL_CONFIG_CACHE_TTL = int(os.getenv("MODEL_CONFIG_CACHE_TTL", str(CACHE_TTL_SECONDS)))
21
+ RETRIEVER_CACHE_TTL = int(os.getenv("RETRIEVER_CACHE_TTL", str(CACHE_TTL_SECONDS)))
22
+ PROMPT_TEMPLATE_CACHE_TTL = int(os.getenv("PROMPT_TEMPLATE_CACHE_TTL", str(CACHE_TTL_SECONDS)))
23
+
24
+ # Cache keys prefix
25
+ CHAT_ENGINE_CACHE_PREFIX = "chat_engine:"
26
+ MODEL_CONFIG_CACHE_PREFIX = "model_config:"
27
+ RETRIEVER_CACHE_PREFIX = "retriever:"
28
+ PROMPT_TEMPLATE_CACHE_PREFIX = "prompt_template:"
29
+
30
+ # Hàm helper để tạo cache key
31
+ def get_chat_engine_cache_key(engine_id: int) -> str:
32
+ """Tạo cache key cho chat engine"""
33
+ return f"{CHAT_ENGINE_CACHE_PREFIX}{engine_id}"
34
+
35
+ def get_model_config_cache_key(model_name: str) -> str:
36
+ """Tạo cache key cho model config"""
37
+ return f"{MODEL_CONFIG_CACHE_PREFIX}{model_name}"
38
+
39
+ def get_retriever_cache_key(engine_id: int) -> str:
40
+ """Tạo cache key cho retriever"""
41
+ return f"{RETRIEVER_CACHE_PREFIX}{engine_id}"
42
+
43
+ def get_prompt_template_cache_key(engine_id: int) -> str:
44
+ """Tạo cache key cho prompt template"""
45
+ return f"{PROMPT_TEMPLATE_CACHE_PREFIX}{engine_id}"
app/utils/pdf_processor.py CHANGED
@@ -4,7 +4,6 @@ import uuid
4
  import pinecone
5
  from app.utils.pinecone_fix import PineconeConnectionManager, check_connection
6
  import time
7
- import os
8
  from typing import List, Dict, Any, Optional
9
 
10
  # Langchain imports for document processing
@@ -25,12 +24,12 @@ class PDFProcessor:
25
  self.api_key = api_key
26
  self.vector_db_id = vector_db_id
27
  self.pinecone_index = None
28
- self.mock_mode = mock_mode
29
  self.correlation_id = correlation_id or str(uuid.uuid4())[:8]
30
  self.google_api_key = os.environ.get("GOOGLE_API_KEY")
31
 
32
- # Initialize Pinecone connection if not in mock mode
33
- if not self.mock_mode and self.api_key:
34
  try:
35
  # Use connection manager from pinecone_fix
36
  logger.info(f"[{self.correlation_id}] Initializing Pinecone connection to {self.index_name}")
@@ -38,9 +37,7 @@ class PDFProcessor:
38
  logger.info(f"[{self.correlation_id}] Successfully connected to Pinecone index {self.index_name}")
39
  except Exception as e:
40
  logger.error(f"[{self.correlation_id}] Failed to initialize Pinecone: {str(e)}")
41
- # Fall back to mock mode if connection fails
42
- self.mock_mode = True
43
- logger.warning(f"[{self.correlation_id}] Falling back to mock mode due to connection error")
44
 
45
  async def process_pdf(self, file_path, document_id=None, metadata=None, progress_callback=None):
46
  """Process a PDF file and create vector embeddings
@@ -53,13 +50,6 @@ class PDFProcessor:
53
  """
54
  logger.info(f"[{self.correlation_id}] Processing PDF: {file_path}")
55
 
56
- if self.mock_mode:
57
- logger.info(f"[{self.correlation_id}] MOCK: Processing PDF {file_path}")
58
- # Mock implementation - just return success
59
- if progress_callback:
60
- await progress_callback(None, document_id, "embedding_complete", 1.0, "Mock processing completed")
61
- return {"success": True, "message": "PDF processed successfully"}
62
-
63
  try:
64
  # Initialize metadata if not provided
65
  if metadata is None:
@@ -242,10 +232,6 @@ class PDFProcessor:
242
 
243
  async def list_namespaces(self):
244
  """List all namespaces in the Pinecone index"""
245
- if self.mock_mode:
246
- logger.info(f"[{self.correlation_id}] MOCK: Listing namespaces")
247
- return {"success": True, "namespaces": ["test"]}
248
-
249
  try:
250
  if not self.pinecone_index:
251
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
@@ -267,15 +253,6 @@ class PDFProcessor:
267
 
268
  async def delete_namespace(self):
269
  """Delete all vectors in a namespace"""
270
- if self.mock_mode:
271
- logger.info(f"[{self.correlation_id}] MOCK: Deleting namespace '{self.namespace}'")
272
- return {
273
- "success": True,
274
- "namespace": self.namespace,
275
- "deleted_count": 100,
276
- "message": f"Successfully deleted namespace '{self.namespace}'"
277
- }
278
-
279
  try:
280
  if not self.pinecone_index:
281
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
@@ -311,21 +288,10 @@ class PDFProcessor:
311
  "error": f"Error deleting namespace: {str(e)}"
312
  }
313
 
314
- async def delete_document(self, document_id):
315
- """Delete vectors associated with a specific document ID"""
316
  logger.info(f"[{self.correlation_id}] Deleting vectors for document '{document_id}' from namespace '{self.namespace}'")
317
 
318
- if self.mock_mode:
319
- logger.info(f"[{self.correlation_id}] MOCK: Deleting document vectors for '{document_id}'")
320
- # In mock mode, simulate deleting 10 vectors
321
- return {
322
- "success": True,
323
- "document_id": document_id,
324
- "namespace": self.namespace,
325
- "deleted_count": 10,
326
- "message": f"Successfully deleted vectors for document '{document_id}' from namespace '{self.namespace}'"
327
- }
328
-
329
  try:
330
  if not self.pinecone_index:
331
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
@@ -334,116 +300,189 @@ class PDFProcessor:
334
  # The specific namespace to use might be vdb-X format if vector_db_id provided
335
  actual_namespace = f"vdb-{self.vector_db_id}" if self.vector_db_id else self.namespace
336
 
337
- # Search for vectors with this document ID
338
- results = self.pinecone_index.query(
339
- vector=[0] * 1536, # Dummy vector, we only care about metadata filter
340
- top_k=1,
341
- include_metadata=True,
342
- filter={"document_id": document_id},
343
- namespace=actual_namespace
344
- )
345
 
346
- # If no vectors found, return success with warning
347
- if len(results.get("matches", [])) == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  logger.warning(f"[{self.correlation_id}] No vectors found for document '{document_id}' in namespace '{actual_namespace}'")
349
  return {
350
- "success": True,
351
  "document_id": document_id,
352
  "namespace": actual_namespace,
353
  "deleted_count": 0,
354
  "warning": f"No vectors found for document '{document_id}' in namespace '{actual_namespace}'",
355
- "message": f"Successfully deleted 0 vectors for document '{document_id}' from namespace '{actual_namespace}'"
 
 
356
  }
357
 
358
- # Delete vectors by filter
359
- result = self.pinecone_index.delete(
360
- filter={"document_id": document_id},
361
- namespace=actual_namespace
362
- )
363
-
364
- # Get delete count from result
365
- deleted_count = result.get("deleted_count", 0)
366
-
367
  return {
368
  "success": True,
369
  "document_id": document_id,
370
  "namespace": actual_namespace,
371
  "deleted_count": deleted_count,
372
- "message": f"Successfully deleted {deleted_count} vectors for document '{document_id}' from namespace '{actual_namespace}'"
 
 
 
373
  }
374
  except Exception as e:
375
  logger.error(f"[{self.correlation_id}] Error deleting document vectors: {str(e)}")
376
  return {
377
  "success": False,
378
  "document_id": document_id,
379
- "error": f"Error deleting document vectors: {str(e)}"
 
 
380
  }
381
 
382
  async def list_documents(self):
383
- """List all documents in the Pinecone index"""
384
- if self.mock_mode:
385
- logger.info(f"[{self.correlation_id}] MOCK: Listing documents in namespace '{self.namespace}'")
386
- return {
387
- "success": True,
388
- "namespace": self.namespace,
389
- "documents": [
390
- {"id": "doc1", "title": "Sample Document 1"},
391
- {"id": "doc2", "title": "Sample Document 2"}
392
- ]
393
- }
394
-
395
  try:
396
  if not self.pinecone_index:
397
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
398
 
399
- # The namespace to use might be in vdb-X format if vector_db_id provided
400
- actual_namespace = f"vdb-{self.vector_db_id}" if self.vector_db_id else self.namespace
401
 
402
- # Get index stats
403
  stats = self.pinecone_index.describe_index_stats()
404
- namespaces = stats.get("namespaces", {})
405
- total_vectors = namespaces.get(actual_namespace, {}).get("vector_count", 0)
406
-
407
- # Query unique document IDs
408
- # Use a sparse vector with top_k=0 to just get metadata stats
409
- # This is more efficient than retrieving actual vectors
 
 
 
 
 
 
 
 
 
410
  results = self.pinecone_index.query(
411
- vector=[0] * 1536, # Dummy vector for metadata-only query
412
- top_k=100, # Limit to 100 results
413
  include_metadata=True,
414
  namespace=actual_namespace
415
  )
416
 
417
- # Extract unique document IDs from metadata
418
- document_map = {}
419
- matches = results.get("matches", [])
420
 
421
- for match in matches:
422
  metadata = match.get("metadata", {})
423
- doc_id = metadata.get("document_id")
424
 
425
- if doc_id and doc_id not in document_map:
426
- document_map[doc_id] = {
427
- "id": doc_id,
428
- "title": metadata.get("title", "Unknown"),
429
- "chunks": 1
 
 
 
430
  }
431
- elif doc_id:
432
- document_map[doc_id]["chunks"] += 1
433
-
434
- documents = list(document_map.values())
 
 
 
 
 
 
 
 
 
 
 
435
 
436
- return {
437
- "success": True,
438
- "namespace": actual_namespace,
439
- "index_name": self.index_name,
440
- "total_vectors": total_vectors,
441
- "documents": documents
442
- }
443
  except Exception as e:
444
  logger.error(f"[{self.correlation_id}] Error listing documents: {str(e)}")
445
- return {
446
- "success": False,
447
- "error": f"Error listing documents: {str(e)}"
448
- }
 
 
 
449
 
 
4
  import pinecone
5
  from app.utils.pinecone_fix import PineconeConnectionManager, check_connection
6
  import time
 
7
  from typing import List, Dict, Any, Optional
8
 
9
  # Langchain imports for document processing
 
24
  self.api_key = api_key
25
  self.vector_db_id = vector_db_id
26
  self.pinecone_index = None
27
+ self.mock_mode = False # Always set mock_mode to False to use real database
28
  self.correlation_id = correlation_id or str(uuid.uuid4())[:8]
29
  self.google_api_key = os.environ.get("GOOGLE_API_KEY")
30
 
31
+ # Initialize Pinecone connection
32
+ if self.api_key:
33
  try:
34
  # Use connection manager from pinecone_fix
35
  logger.info(f"[{self.correlation_id}] Initializing Pinecone connection to {self.index_name}")
 
37
  logger.info(f"[{self.correlation_id}] Successfully connected to Pinecone index {self.index_name}")
38
  except Exception as e:
39
  logger.error(f"[{self.correlation_id}] Failed to initialize Pinecone: {str(e)}")
40
+ # No fallback to mock mode - require a valid connection
 
 
41
 
42
  async def process_pdf(self, file_path, document_id=None, metadata=None, progress_callback=None):
43
  """Process a PDF file and create vector embeddings
 
50
  """
51
  logger.info(f"[{self.correlation_id}] Processing PDF: {file_path}")
52
 
 
 
 
 
 
 
 
53
  try:
54
  # Initialize metadata if not provided
55
  if metadata is None:
 
232
 
233
  async def list_namespaces(self):
234
  """List all namespaces in the Pinecone index"""
 
 
 
 
235
  try:
236
  if not self.pinecone_index:
237
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
 
253
 
254
  async def delete_namespace(self):
255
  """Delete all vectors in a namespace"""
 
 
 
 
 
 
 
 
 
256
  try:
257
  if not self.pinecone_index:
258
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
 
288
  "error": f"Error deleting namespace: {str(e)}"
289
  }
290
 
291
+ async def delete_document(self, document_id, additional_metadata=None):
292
+ """Delete vectors associated with a specific document ID or name"""
293
  logger.info(f"[{self.correlation_id}] Deleting vectors for document '{document_id}' from namespace '{self.namespace}'")
294
 
 
 
 
 
 
 
 
 
 
 
 
295
  try:
296
  if not self.pinecone_index:
297
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
 
300
  # The specific namespace to use might be vdb-X format if vector_db_id provided
301
  actual_namespace = f"vdb-{self.vector_db_id}" if self.vector_db_id else self.namespace
302
 
303
+ # Try to find vectors using multiple approaches
304
+ filters = []
305
+
306
+ # First try with exact document_id which could be UUID (preferred)
307
+ filters.append({"document_id": document_id})
 
 
 
308
 
309
+ # If this is a UUID, try with different formats (with/without hyphens)
310
+ if len(document_id) >= 32:
311
+ # This looks like it might be a UUID - try variations
312
+ if "-" in document_id:
313
+ # If it has hyphens, try without
314
+ filters.append({"document_id": document_id.replace("-", "")})
315
+ else:
316
+ # If it doesn't have hyphens, try to format it as UUID
317
+ try:
318
+ formatted_uuid = str(uuid.UUID(document_id))
319
+ filters.append({"document_id": formatted_uuid})
320
+ except ValueError:
321
+ pass
322
+
323
+ # Also try with title field if it could be a document name
324
+ if not document_id.startswith("doc-") and not document_id.startswith("test-doc-") and len(document_id) < 36:
325
+ # This might be a document title/name
326
+ filters.append({"title": document_id})
327
+
328
+ # If additional metadata was provided, use it to make extra filters
329
+ if additional_metadata:
330
+ if "document_name" in additional_metadata:
331
+ # Try exact name match
332
+ filters.append({"title": additional_metadata["document_name"]})
333
+
334
+ # Also try filename if name has extension
335
+ if "." in additional_metadata["document_name"]:
336
+ filters.append({"filename": additional_metadata["document_name"]})
337
+
338
+ # Search for vectors with any of these filters
339
+ found_vectors = False
340
+ deleted_count = 0
341
+ filter_used = ""
342
+
343
+ logger.info(f"[{self.correlation_id}] Will try {len(filters)} different filters to find document")
344
+
345
+ for i, filter_query in enumerate(filters):
346
+ logger.info(f"[{self.correlation_id}] Searching for vectors with filter #{i+1}: {filter_query}")
347
+
348
+ # Search for vectors with this filter
349
+ try:
350
+ results = self.pinecone_index.query(
351
+ vector=[0] * 1536, # Dummy vector, we only care about metadata filter
352
+ top_k=1,
353
+ include_metadata=True,
354
+ filter=filter_query,
355
+ namespace=actual_namespace
356
+ )
357
+
358
+ if results and results.get("matches") and len(results.get("matches", [])) > 0:
359
+ logger.info(f"[{self.correlation_id}] Found vectors matching filter: {filter_query}")
360
+ found_vectors = True
361
+ filter_used = str(filter_query)
362
+
363
+ # Delete vectors by filter
364
+ delete_result = self.pinecone_index.delete(
365
+ filter=filter_query,
366
+ namespace=actual_namespace
367
+ )
368
+
369
+ # Get delete count from result
370
+ deleted_count = delete_result.get("deleted_count", 0)
371
+ logger.info(f"[{self.correlation_id}] Deleted {deleted_count} vectors with filter: {filter_query}")
372
+ break
373
+ except Exception as filter_error:
374
+ logger.warning(f"[{self.correlation_id}] Error searching with filter {filter_query}: {str(filter_error)}")
375
+ continue
376
+
377
+ # If no vectors found with any filter
378
+ if not found_vectors:
379
  logger.warning(f"[{self.correlation_id}] No vectors found for document '{document_id}' in namespace '{actual_namespace}'")
380
  return {
381
+ "success": True, # Still return success=True to maintain backward compatibility
382
  "document_id": document_id,
383
  "namespace": actual_namespace,
384
  "deleted_count": 0,
385
  "warning": f"No vectors found for document '{document_id}' in namespace '{actual_namespace}'",
386
+ "message": f"Found 0 vectors for document '{document_id}' in namespace '{actual_namespace}'",
387
+ "vectors_found": False,
388
+ "vectors_deleted": 0
389
  }
390
 
 
 
 
 
 
 
 
 
 
391
  return {
392
  "success": True,
393
  "document_id": document_id,
394
  "namespace": actual_namespace,
395
  "deleted_count": deleted_count,
396
+ "filter_used": filter_used,
397
+ "message": f"Successfully deleted {deleted_count} vectors for document '{document_id}' from namespace '{actual_namespace}'",
398
+ "vectors_found": True,
399
+ "vectors_deleted": deleted_count
400
  }
401
  except Exception as e:
402
  logger.error(f"[{self.correlation_id}] Error deleting document vectors: {str(e)}")
403
  return {
404
  "success": False,
405
  "document_id": document_id,
406
+ "error": f"Error deleting document vectors: {str(e)}",
407
+ "vectors_found": False,
408
+ "vectors_deleted": 0
409
  }
410
 
411
  async def list_documents(self):
412
+ """List all documents in a namespace"""
413
+ # The namespace to use might be vdb-X format if vector_db_id provided
414
+ actual_namespace = f"vdb-{self.vector_db_id}" if self.vector_db_id else self.namespace
415
+
 
 
 
 
 
 
 
 
416
  try:
417
  if not self.pinecone_index:
418
  self.pinecone_index = PineconeConnectionManager.get_index(self.api_key, self.index_name)
419
 
420
+ logger.info(f"[{self.correlation_id}] Listing documents in namespace '{actual_namespace}'")
 
421
 
422
+ # Get index stats for namespace
423
  stats = self.pinecone_index.describe_index_stats()
424
+ namespace_stats = stats.get("namespaces", {}).get(actual_namespace, {})
425
+ vector_count = namespace_stats.get("vector_count", 0)
426
+
427
+ if vector_count == 0:
428
+ # No vectors in namespace
429
+ return DocumentsListResponse(
430
+ success=True,
431
+ total_vectors=0,
432
+ namespace=actual_namespace,
433
+ index_name=self.index_name,
434
+ documents=[]
435
+ ).dict()
436
+
437
+ # Query for vectors with a dummy vector to get back metadata
438
+ # This is not efficient but is a simple approach to extract document info
439
  results = self.pinecone_index.query(
440
+ vector=[0] * stats.dimension, # Use index dimensions
441
+ top_k=min(vector_count, 1000), # Get at most 1000 vectors
442
  include_metadata=True,
443
  namespace=actual_namespace
444
  )
445
 
446
+ # Process results to extract unique documents
447
+ seen_documents = set()
448
+ documents = []
449
 
450
+ for match in results.get("matches", []):
451
  metadata = match.get("metadata", {})
452
+ document_id = metadata.get("document_id")
453
 
454
+ if document_id and document_id not in seen_documents:
455
+ seen_documents.add(document_id)
456
+ doc_info = {
457
+ "id": document_id,
458
+ "title": metadata.get("title"),
459
+ "filename": metadata.get("filename"),
460
+ "content_type": metadata.get("content_type"),
461
+ "chunk_count": 0
462
  }
463
+ documents.append(doc_info)
464
+
465
+ # Count chunks for this document
466
+ for doc in documents:
467
+ if doc["id"] == document_id:
468
+ doc["chunk_count"] += 1
469
+ break
470
+
471
+ return DocumentsListResponse(
472
+ success=True,
473
+ total_vectors=vector_count,
474
+ namespace=actual_namespace,
475
+ index_name=self.index_name,
476
+ documents=documents
477
+ ).dict()
478
 
 
 
 
 
 
 
 
479
  except Exception as e:
480
  logger.error(f"[{self.correlation_id}] Error listing documents: {str(e)}")
481
+ return DocumentsListResponse(
482
+ success=False,
483
+ error=f"Error listing documents: {str(e)}"
484
+ ).dict()
485
+
486
+
487
+
488
 
pytest.ini DELETED
@@ -1,12 +0,0 @@
1
- [pytest]
2
- # Bỏ qua cảnh báo về anyio module và các cảnh báo vận hành nội bộ
3
- filterwarnings =
4
- ignore::pytest.PytestAssertRewriteWarning:.*anyio
5
- ignore:.*general_plain_validator_function.* is deprecated.*:DeprecationWarning
6
- ignore:.*with_info_plain_validator_function.*:DeprecationWarning
7
-
8
- # Cấu hình cơ bản khác
9
- testpaths = tests
10
- python_files = test_*.py
11
- python_classes = Test*
12
- python_functions = test_*
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -31,12 +31,14 @@ httpx==0.25.1
31
  requests==2.31.0
32
  beautifulsoup4==4.12.2
33
  redis==5.0.1
 
34
 
35
  # Testing
36
  prometheus-client==0.17.1
37
  pytest==7.4.0
38
  pytest-cov==4.1.0
39
  watchfiles==0.21.0
 
40
 
41
  # Core dependencies
42
  starlette==0.27.0
 
31
  requests==2.31.0
32
  beautifulsoup4==4.12.2
33
  redis==5.0.1
34
+ aiofiles==23.2.1
35
 
36
  # Testing
37
  prometheus-client==0.17.1
38
  pytest==7.4.0
39
  pytest-cov==4.1.0
40
  watchfiles==0.21.0
41
+ fpdf==1.7.2
42
 
43
  # Core dependencies
44
  starlette==0.27.0