jeongsoo commited on
Commit
52a2853
Β·
1 Parent(s): aafe228

Add application file

Browse files
Files changed (1) hide show
  1. app.py +45 -8
app.py CHANGED
@@ -37,6 +37,10 @@ class AutoRAGChatApp:
37
  """
38
  RAG 챗봇 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ΄ˆκΈ°ν™”
39
  """
 
 
 
 
40
  # 데이터 디렉토리 μ •μ˜
41
  self.pdf_directory = PDF_DIRECTORY
42
  self.cache_directory = "cached_data"
@@ -69,14 +73,20 @@ class AutoRAGChatApp:
69
  self.documents = []
70
  self.processed_files = []
71
  self.is_initialized = False
 
72
 
73
  # ν΄λ‘œλ°” STT ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™”
74
  self.stt_client = ClovaSTT()
75
  print("μŒμ„±μΈμ‹(STT) κΈ°λŠ₯이 μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
76
 
 
 
 
77
  # μ‹œμž‘ μ‹œ μžλ™μœΌλ‘œ λ¬Έμ„œ λ‘œλ“œ 및 처리
78
  print("λ¬Έμ„œ μžλ™ λ‘œλ“œ 및 처리 μ‹œμž‘...")
79
- self.auto_process_documents()
 
 
80
 
81
  def _process_pdf_file(self, file_path: str) -> List[Document]:
82
  """
@@ -285,6 +295,7 @@ class AutoRAGChatApp:
285
  pdf_files.append(os.path.join(self.pdf_directory, filename))
286
 
287
  if not pdf_files:
 
288
  return f"'{self.pdf_directory}' 폴더에 PDF 파일이 μ—†μŠ΅λ‹ˆλ‹€."
289
 
290
  print(f"발견된 PDF 파일: {len(pdf_files)}개")
@@ -383,8 +394,18 @@ class AutoRAGChatApp:
383
 
384
  # RAG 체인 μ΄ˆκΈ°ν™”
385
  if RAG_CHAIN_AVAILABLE:
386
- self.rag_chain = RAGChain(self.vector_store)
387
- self.is_initialized = True
 
 
 
 
 
 
 
 
 
 
388
 
389
  total_time = time.time() - start_time
390
 
@@ -397,15 +418,20 @@ class AutoRAGChatApp:
397
  f"- μ‹€νŒ¨ν•œ 파일: {len(failed_files)}개\n"
398
  f"- 총 청크 수: {len(self.documents)}개\n"
399
  f"- 처리 μ‹œκ°„: {total_time:.2f}초\n"
 
400
  f"이제 μ§ˆλ¬Έν•  μ€€λΉ„κ°€ λ˜μ—ˆμŠ΅λ‹ˆλ‹€!"
401
  )
402
 
403
  print(status_message)
404
  return status_message
405
  else:
406
- return "RAG 체인을 μ΄ˆκΈ°ν™”ν•  수 μ—†μŠ΅λ‹ˆλ‹€. ν•„μš”ν•œ λΌμ΄λΈŒλŸ¬λ¦¬κ°€ μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”."
 
 
 
407
 
408
  except Exception as e:
 
409
  error_message = f"λ¬Έμ„œ 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
410
  print(error_message)
411
  import traceback
@@ -547,37 +573,48 @@ class AutoRAGChatApp:
547
 
548
  # μΈμ‹λœ ν…μŠ€νŠΈλ‘œ 쿼리 처리 (μŒμ„± λ©”μ‹œμ§€ 접두어 μΆ”κ°€)
549
  query = f"🎀 {recognized_text}"
 
550
 
551
  # RAG 체인 μ‹€ν–‰ 및 응닡 생성
552
- if not self.is_initialized:
 
553
  response = "λ¬Έμ„œ λ‘œλ“œκ°€ μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. μžλ™ λ‘œλ“œλ₯Ό μ‹œλ„ν•©λ‹ˆλ‹€."
554
  chat_history.append((query, response))
555
 
556
  # μžλ™ λ‘œλ“œ μ‹œλ„
557
  try:
558
- self.auto_process_documents()
559
- if not self.is_initialized:
560
- response = "λ¬Έμ„œλ₯Ό λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€. 'documents' 폴더에 PDF 파일이 μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”."
 
 
561
  chat_history.append((query, response))
562
  return chat_history
563
  except Exception as e:
564
  response = f"λ¬Έμ„œ λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
565
  chat_history.append((query, response))
566
  return chat_history
 
 
567
 
568
  try:
569
  # RAG 체인 μ‹€ν–‰ 및 응닡 생성
570
  start_time = time.time()
 
571
  response = self.rag_chain.run(query)
572
  end_time = time.time()
573
 
574
  query_time = end_time - start_time
575
  print(f"쿼리 처리 μ‹œκ°„: {query_time:.2f}초")
 
576
 
577
  chat_history.append((query, response))
578
  return chat_history
579
  except Exception as e:
580
  error_msg = f"였λ₯˜ λ°œμƒ: {str(e)}"
 
 
 
581
  chat_history.append((query, error_msg))
582
  return chat_history
583
 
 
37
  """
38
  RAG 챗봇 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ΄ˆκΈ°ν™”
39
  """
40
+ print("=" * 50)
41
+ print("μŒμ„±μΈμ‹ RAG 챗봇 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ΄ˆκΈ°ν™” μ‹œμž‘")
42
+ print("=" * 50)
43
+
44
  # 데이터 디렉토리 μ •μ˜
45
  self.pdf_directory = PDF_DIRECTORY
46
  self.cache_directory = "cached_data"
 
73
  self.documents = []
74
  self.processed_files = []
75
  self.is_initialized = False
76
+ self.rag_chain = None
77
 
78
  # ν΄λ‘œλ°” STT ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™”
79
  self.stt_client = ClovaSTT()
80
  print("μŒμ„±μΈμ‹(STT) κΈ°λŠ₯이 μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
81
 
82
+ # RAG 체인 μ‚¬μš© κ°€λŠ₯μ„± 확인
83
+ print(f"RAG 체인 μ‚¬μš© κ°€λŠ₯: {RAG_CHAIN_AVAILABLE}")
84
+
85
  # μ‹œμž‘ μ‹œ μžλ™μœΌλ‘œ λ¬Έμ„œ λ‘œλ“œ 및 처리
86
  print("λ¬Έμ„œ μžλ™ λ‘œλ“œ 및 처리 μ‹œμž‘...")
87
+ result = self.auto_process_documents()
88
+ print(f"μ΄ˆκΈ°ν™” μ™„λ£Œ μƒνƒœ: {self.is_initialized}")
89
+ print("=" * 50)
90
 
91
  def _process_pdf_file(self, file_path: str) -> List[Document]:
92
  """
 
295
  pdf_files.append(os.path.join(self.pdf_directory, filename))
296
 
297
  if not pdf_files:
298
+ print(f"'{self.pdf_directory}' 폴더에 PDF 파일이 μ—†μŠ΅λ‹ˆλ‹€.")
299
  return f"'{self.pdf_directory}' 폴더에 PDF 파일이 μ—†μŠ΅λ‹ˆλ‹€."
300
 
301
  print(f"발견된 PDF 파일: {len(pdf_files)}개")
 
394
 
395
  # RAG 체인 μ΄ˆκΈ°ν™”
396
  if RAG_CHAIN_AVAILABLE:
397
+ print("RAG 체인 μ΄ˆκΈ°ν™” 쀑...")
398
+ try:
399
+ # λ°˜λ“œμ‹œ λͺ¨λ“  λ¬Έμ„œ μ²˜λ¦¬μ™€ 벑터 μŠ€ν† μ–΄ 섀정이 μ™„λ£Œλœ ν›„ RAG 체인 μ΄ˆκΈ°ν™”
400
+ self.rag_chain = RAGChain(self.vector_store)
401
+ self.is_initialized = True
402
+ print("RAG 체인 μ΄ˆκΈ°ν™” μ™„λ£Œ!")
403
+ except Exception as e:
404
+ print(f"RAG 체인 μ΄ˆκΈ°ν™” μ‹€νŒ¨: {e}")
405
+ import traceback
406
+ traceback.print_exc()
407
+ self.is_initialized = False
408
+ return f"RAG 체인 μ΄ˆκΈ°ν™” μ‹€νŒ¨: {e}"
409
 
410
  total_time = time.time() - start_time
411
 
 
418
  f"- μ‹€νŒ¨ν•œ 파일: {len(failed_files)}개\n"
419
  f"- 총 청크 수: {len(self.documents)}개\n"
420
  f"- 처리 μ‹œκ°„: {total_time:.2f}초\n"
421
+ f"- RAG 체인 μ΄ˆκΈ°ν™”: {'성곡' if self.is_initialized else 'μ‹€νŒ¨'}\n"
422
  f"이제 μ§ˆλ¬Έν•  μ€€λΉ„κ°€ λ˜μ—ˆμŠ΅λ‹ˆλ‹€!"
423
  )
424
 
425
  print(status_message)
426
  return status_message
427
  else:
428
+ self.is_initialized = False
429
+ error_msg = "RAG 체인을 μ΄ˆκΈ°ν™”ν•  수 μ—†μŠ΅λ‹ˆλ‹€. ν•„μš”ν•œ λΌμ΄λΈŒλŸ¬λ¦¬κ°€ μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”."
430
+ print(error_msg)
431
+ return error_msg
432
 
433
  except Exception as e:
434
+ self.is_initialized = False
435
  error_message = f"λ¬Έμ„œ 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
436
  print(error_message)
437
  import traceback
 
573
 
574
  # μΈμ‹λœ ν…μŠ€νŠΈλ‘œ 쿼리 처리 (μŒμ„± λ©”μ‹œμ§€ 접두어 μΆ”κ°€)
575
  query = f"🎀 {recognized_text}"
576
+ print(f"[DEBUG] is_initialized = {self.is_initialized}, RAG 체인 쑴재 = {self.rag_chain is not None}")
577
 
578
  # RAG 체인 μ‹€ν–‰ 및 응닡 생성
579
+ if not self.is_initialized or self.rag_chain is None:
580
+ print("μŒμ„± 쿼리 처리: λ¬Έμ„œ λ‘œλ“œ μ΄ˆκΈ°ν™”κ°€ ν•„μš”ν•©λ‹ˆλ‹€.")
581
  response = "λ¬Έμ„œ λ‘œλ“œκ°€ μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. μžλ™ λ‘œλ“œλ₯Ό μ‹œλ„ν•©λ‹ˆλ‹€."
582
  chat_history.append((query, response))
583
 
584
  # μžλ™ λ‘œλ“œ μ‹œλ„
585
  try:
586
+ init_result = self.auto_process_documents()
587
+ print(f"[DEBUG] μžλ™ λ‘œλ“œ ν›„ is_initialized = {self.is_initialized}, RAG 체인 쑴재 = {self.rag_chain is not None}")
588
+
589
+ if not self.is_initialized or self.rag_chain is None:
590
+ response = f"λ¬Έμ„œλ₯Ό λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€. 'documents' 폴더에 PDF ��일이 μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”.\n였λ₯˜ 정보: {init_result}"
591
  chat_history.append((query, response))
592
  return chat_history
593
  except Exception as e:
594
  response = f"λ¬Έμ„œ λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
595
  chat_history.append((query, response))
596
  return chat_history
597
+ else:
598
+ print("μŒμ„± 쿼리 처리: λ¬Έμ„œκ°€ 이미 λ‘œλ“œλ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.")
599
 
600
  try:
601
  # RAG 체인 μ‹€ν–‰ 및 응닡 생성
602
  start_time = time.time()
603
+ print(f"RAG 체인 μ‹€ν–‰ 쀑: 쿼리 = '{query}'")
604
  response = self.rag_chain.run(query)
605
  end_time = time.time()
606
 
607
  query_time = end_time - start_time
608
  print(f"쿼리 처리 μ‹œκ°„: {query_time:.2f}초")
609
+ print(f"응닡: {response[:100]}..." if len(response) > 100 else f"응닡: {response}")
610
 
611
  chat_history.append((query, response))
612
  return chat_history
613
  except Exception as e:
614
  error_msg = f"였λ₯˜ λ°œμƒ: {str(e)}"
615
+ print(f"RAG 체인 μ‹€ν–‰ 쀑 였λ₯˜: {error_msg}")
616
+ import traceback
617
+ traceback.print_exc()
618
  chat_history.append((query, error_msg))
619
  return chat_history
620