Add application file
Browse files
app.py
CHANGED
@@ -37,6 +37,10 @@ class AutoRAGChatApp:
|
|
37 |
"""
|
38 |
RAG μ±λ΄ μ ν리μΌμ΄μ
μ΄κΈ°ν
|
39 |
"""
|
|
|
|
|
|
|
|
|
40 |
# λ°μ΄ν° λλ ν 리 μ μ
|
41 |
self.pdf_directory = PDF_DIRECTORY
|
42 |
self.cache_directory = "cached_data"
|
@@ -69,14 +73,20 @@ class AutoRAGChatApp:
|
|
69 |
self.documents = []
|
70 |
self.processed_files = []
|
71 |
self.is_initialized = False
|
|
|
72 |
|
73 |
# ν΄λ‘λ° STT ν΄λΌμ΄μΈνΈ μ΄κΈ°ν
|
74 |
self.stt_client = ClovaSTT()
|
75 |
print("μμ±μΈμ(STT) κΈ°λ₯μ΄ μ΄κΈ°νλμμ΅λλ€.")
|
76 |
|
|
|
|
|
|
|
77 |
# μμ μ μλμΌλ‘ λ¬Έμ λ‘λ λ° μ²λ¦¬
|
78 |
print("λ¬Έμ μλ λ‘λ λ° μ²λ¦¬ μμ...")
|
79 |
-
self.auto_process_documents()
|
|
|
|
|
80 |
|
81 |
def _process_pdf_file(self, file_path: str) -> List[Document]:
|
82 |
"""
|
@@ -285,6 +295,7 @@ class AutoRAGChatApp:
|
|
285 |
pdf_files.append(os.path.join(self.pdf_directory, filename))
|
286 |
|
287 |
if not pdf_files:
|
|
|
288 |
return f"'{self.pdf_directory}' ν΄λμ PDF νμΌμ΄ μμ΅λλ€."
|
289 |
|
290 |
print(f"λ°κ²¬λ PDF νμΌ: {len(pdf_files)}κ°")
|
@@ -383,8 +394,18 @@ class AutoRAGChatApp:
|
|
383 |
|
384 |
# RAG μ²΄μΈ μ΄κΈ°ν
|
385 |
if RAG_CHAIN_AVAILABLE:
|
386 |
-
|
387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
total_time = time.time() - start_time
|
390 |
|
@@ -397,15 +418,20 @@ class AutoRAGChatApp:
|
|
397 |
f"- μ€ν¨ν νμΌ: {len(failed_files)}κ°\n"
|
398 |
f"- μ΄ μ²ν¬ μ: {len(self.documents)}κ°\n"
|
399 |
f"- μ²λ¦¬ μκ°: {total_time:.2f}μ΄\n"
|
|
|
400 |
f"μ΄μ μ§λ¬Έν μ€λΉκ° λμμ΅λλ€!"
|
401 |
)
|
402 |
|
403 |
print(status_message)
|
404 |
return status_message
|
405 |
else:
|
406 |
-
|
|
|
|
|
|
|
407 |
|
408 |
except Exception as e:
|
|
|
409 |
error_message = f"λ¬Έμ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}"
|
410 |
print(error_message)
|
411 |
import traceback
|
@@ -547,37 +573,48 @@ class AutoRAGChatApp:
|
|
547 |
|
548 |
# μΈμλ ν
μ€νΈλ‘ 쿼리 μ²λ¦¬ (μμ± λ©μμ§ μ λμ΄ μΆκ°)
|
549 |
query = f"π€ {recognized_text}"
|
|
|
550 |
|
551 |
# RAG μ²΄μΈ μ€ν λ° μλ΅ μμ±
|
552 |
-
if not self.is_initialized:
|
|
|
553 |
response = "λ¬Έμ λ‘λκ° μ΄κΈ°νλμ§ μμμ΅λλ€. μλ λ‘λλ₯Ό μλν©λλ€."
|
554 |
chat_history.append((query, response))
|
555 |
|
556 |
# μλ λ‘λ μλ
|
557 |
try:
|
558 |
-
self.auto_process_documents()
|
559 |
-
|
560 |
-
|
|
|
|
|
561 |
chat_history.append((query, response))
|
562 |
return chat_history
|
563 |
except Exception as e:
|
564 |
response = f"λ¬Έμ λ‘λ μ€ μ€λ₯ λ°μ: {str(e)}"
|
565 |
chat_history.append((query, response))
|
566 |
return chat_history
|
|
|
|
|
567 |
|
568 |
try:
|
569 |
# RAG μ²΄μΈ μ€ν λ° μλ΅ μμ±
|
570 |
start_time = time.time()
|
|
|
571 |
response = self.rag_chain.run(query)
|
572 |
end_time = time.time()
|
573 |
|
574 |
query_time = end_time - start_time
|
575 |
print(f"쿼리 μ²λ¦¬ μκ°: {query_time:.2f}μ΄")
|
|
|
576 |
|
577 |
chat_history.append((query, response))
|
578 |
return chat_history
|
579 |
except Exception as e:
|
580 |
error_msg = f"μ€λ₯ λ°μ: {str(e)}"
|
|
|
|
|
|
|
581 |
chat_history.append((query, error_msg))
|
582 |
return chat_history
|
583 |
|
|
|
37 |
"""
|
38 |
RAG μ±λ΄ μ ν리μΌμ΄μ
μ΄κΈ°ν
|
39 |
"""
|
40 |
+
print("=" * 50)
|
41 |
+
print("μμ±μΈμ RAG μ±λ΄ μ ν리μΌμ΄μ
μ΄κΈ°ν μμ")
|
42 |
+
print("=" * 50)
|
43 |
+
|
44 |
# λ°μ΄ν° λλ ν 리 μ μ
|
45 |
self.pdf_directory = PDF_DIRECTORY
|
46 |
self.cache_directory = "cached_data"
|
|
|
73 |
self.documents = []
|
74 |
self.processed_files = []
|
75 |
self.is_initialized = False
|
76 |
+
self.rag_chain = None
|
77 |
|
78 |
# ν΄λ‘λ° STT ν΄λΌμ΄μΈνΈ μ΄κΈ°ν
|
79 |
self.stt_client = ClovaSTT()
|
80 |
print("μμ±μΈμ(STT) κΈ°λ₯μ΄ μ΄κΈ°νλμμ΅λλ€.")
|
81 |
|
82 |
+
# RAG μ²΄μΈ μ¬μ© κ°λ₯μ± νμΈ
|
83 |
+
print(f"RAG μ²΄μΈ μ¬μ© κ°λ₯: {RAG_CHAIN_AVAILABLE}")
|
84 |
+
|
85 |
# μμ μ μλμΌλ‘ λ¬Έμ λ‘λ λ° μ²λ¦¬
|
86 |
print("λ¬Έμ μλ λ‘λ λ° μ²λ¦¬ μμ...")
|
87 |
+
result = self.auto_process_documents()
|
88 |
+
print(f"μ΄κΈ°ν μλ£ μν: {self.is_initialized}")
|
89 |
+
print("=" * 50)
|
90 |
|
91 |
def _process_pdf_file(self, file_path: str) -> List[Document]:
|
92 |
"""
|
|
|
295 |
pdf_files.append(os.path.join(self.pdf_directory, filename))
|
296 |
|
297 |
if not pdf_files:
|
298 |
+
print(f"'{self.pdf_directory}' ν΄λμ PDF νμΌμ΄ μμ΅λλ€.")
|
299 |
return f"'{self.pdf_directory}' ν΄λμ PDF νμΌμ΄ μμ΅λλ€."
|
300 |
|
301 |
print(f"λ°κ²¬λ PDF νμΌ: {len(pdf_files)}κ°")
|
|
|
394 |
|
395 |
# RAG μ²΄μΈ μ΄κΈ°ν
|
396 |
if RAG_CHAIN_AVAILABLE:
|
397 |
+
print("RAG μ²΄μΈ μ΄κΈ°ν μ€...")
|
398 |
+
try:
|
399 |
+
# λ°λμ λͺ¨λ λ¬Έμ μ²λ¦¬μ λ²‘ν° μ€ν μ΄ μ€μ μ΄ μλ£λ ν RAG μ²΄μΈ μ΄κΈ°ν
|
400 |
+
self.rag_chain = RAGChain(self.vector_store)
|
401 |
+
self.is_initialized = True
|
402 |
+
print("RAG μ²΄μΈ μ΄κΈ°ν μλ£!")
|
403 |
+
except Exception as e:
|
404 |
+
print(f"RAG μ²΄μΈ μ΄κΈ°ν μ€ν¨: {e}")
|
405 |
+
import traceback
|
406 |
+
traceback.print_exc()
|
407 |
+
self.is_initialized = False
|
408 |
+
return f"RAG μ²΄μΈ μ΄κΈ°ν μ€ν¨: {e}"
|
409 |
|
410 |
total_time = time.time() - start_time
|
411 |
|
|
|
418 |
f"- μ€ν¨ν νμΌ: {len(failed_files)}κ°\n"
|
419 |
f"- μ΄ μ²ν¬ μ: {len(self.documents)}κ°\n"
|
420 |
f"- μ²λ¦¬ μκ°: {total_time:.2f}μ΄\n"
|
421 |
+
f"- RAG μ²΄μΈ μ΄κΈ°ν: {'μ±κ³΅' if self.is_initialized else 'μ€ν¨'}\n"
|
422 |
f"μ΄μ μ§λ¬Έν μ€λΉκ° λμμ΅λλ€!"
|
423 |
)
|
424 |
|
425 |
print(status_message)
|
426 |
return status_message
|
427 |
else:
|
428 |
+
self.is_initialized = False
|
429 |
+
error_msg = "RAG 체μΈμ μ΄κΈ°νν μ μμ΅λλ€. νμν λΌμ΄λΈλ¬λ¦¬κ° μ€μΉλμ΄ μλμ§ νμΈνμΈμ."
|
430 |
+
print(error_msg)
|
431 |
+
return error_msg
|
432 |
|
433 |
except Exception as e:
|
434 |
+
self.is_initialized = False
|
435 |
error_message = f"λ¬Έμ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}"
|
436 |
print(error_message)
|
437 |
import traceback
|
|
|
573 |
|
574 |
# μΈμλ ν
μ€νΈλ‘ 쿼리 μ²λ¦¬ (μμ± λ©μμ§ μ λμ΄ μΆκ°)
|
575 |
query = f"π€ {recognized_text}"
|
576 |
+
print(f"[DEBUG] is_initialized = {self.is_initialized}, RAG μ²΄μΈ μ‘΄μ¬ = {self.rag_chain is not None}")
|
577 |
|
578 |
# RAG μ²΄μΈ μ€ν λ° μλ΅ μμ±
|
579 |
+
if not self.is_initialized or self.rag_chain is None:
|
580 |
+
print("μμ± μΏΌλ¦¬ μ²λ¦¬: λ¬Έμ λ‘λ μ΄κΈ°νκ° νμν©λλ€.")
|
581 |
response = "λ¬Έμ λ‘λκ° μ΄κΈ°νλμ§ μμμ΅λλ€. μλ λ‘λλ₯Ό μλν©λλ€."
|
582 |
chat_history.append((query, response))
|
583 |
|
584 |
# μλ λ‘λ μλ
|
585 |
try:
|
586 |
+
init_result = self.auto_process_documents()
|
587 |
+
print(f"[DEBUG] μλ λ‘λ ν is_initialized = {self.is_initialized}, RAG μ²΄μΈ μ‘΄μ¬ = {self.rag_chain is not None}")
|
588 |
+
|
589 |
+
if not self.is_initialized or self.rag_chain is None:
|
590 |
+
response = f"λ¬Έμλ₯Ό λ‘λν μ μμ΅λλ€. 'documents' ν΄λμ PDF οΏ½οΏ½μΌμ΄ μλμ§ νμΈνμΈμ.\nμ€λ₯ μ 보: {init_result}"
|
591 |
chat_history.append((query, response))
|
592 |
return chat_history
|
593 |
except Exception as e:
|
594 |
response = f"λ¬Έμ λ‘λ μ€ μ€λ₯ λ°μ: {str(e)}"
|
595 |
chat_history.append((query, response))
|
596 |
return chat_history
|
597 |
+
else:
|
598 |
+
print("μμ± μΏΌλ¦¬ μ²λ¦¬: λ¬Έμκ° μ΄λ―Έ λ‘λλμ΄ μμ΅λλ€.")
|
599 |
|
600 |
try:
|
601 |
# RAG μ²΄μΈ μ€ν λ° μλ΅ μμ±
|
602 |
start_time = time.time()
|
603 |
+
print(f"RAG μ²΄μΈ μ€ν μ€: 쿼리 = '{query}'")
|
604 |
response = self.rag_chain.run(query)
|
605 |
end_time = time.time()
|
606 |
|
607 |
query_time = end_time - start_time
|
608 |
print(f"쿼리 μ²λ¦¬ μκ°: {query_time:.2f}μ΄")
|
609 |
+
print(f"μλ΅: {response[:100]}..." if len(response) > 100 else f"μλ΅: {response}")
|
610 |
|
611 |
chat_history.append((query, response))
|
612 |
return chat_history
|
613 |
except Exception as e:
|
614 |
error_msg = f"μ€λ₯ λ°μ: {str(e)}"
|
615 |
+
print(f"RAG μ²΄μΈ μ€ν μ€ μ€λ₯: {error_msg}")
|
616 |
+
import traceback
|
617 |
+
traceback.print_exc()
|
618 |
chat_history.append((query, error_msg))
|
619 |
return chat_history
|
620 |
|