jeongsoo commited on
Commit
b12512e
ยท
1 Parent(s): a76f77b

Add application file

Browse files
Files changed (2) hide show
  1. app.py +245 -52
  2. clova_stt.py +4 -6
app.py CHANGED
@@ -1,31 +1,151 @@
1
  """
2
- ์Œ์„ฑ์ธ์‹(STT) ๊ธฐ๋Šฅ์ด ๊ตฌํ˜„๋œ RAG ์ฑ—๋ด‡ ์•ฑ
3
  """
4
  import os
5
  import time
 
 
6
  import hashlib
7
  import pickle
8
  import json
9
- import tempfile
10
- from typing import List, Dict, Tuple, Any
11
-
12
- from langchain.schema import Document
13
 
 
14
  from config import (
15
  PDF_DIRECTORY, CHUNK_SIZE, CHUNK_OVERLAP, LLM_MODEL,
16
- STT_LANGUAGE, IS_HUGGINGFACE
17
  )
18
  from optimized_document_processor import OptimizedDocumentProcessor
19
  from vector_store import VectorStore
 
 
 
20
  from clova_stt import ClovaSTT
21
 
22
  # ์•ˆ์ „ํ•œ ์ž„ํฌํŠธ
23
  try:
 
24
  from rag_chain import RAGChain
 
25
  RAG_CHAIN_AVAILABLE = True
26
- except ImportError:
27
- print("RAG ์ฒด์ธ ๋ชจ๋“ˆ์„ ๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
28
- RAG_CHAIN_AVAILABLE = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  class AutoRAGChatApp:
@@ -88,6 +208,43 @@ class AutoRAGChatApp:
88
  print(f"์ดˆ๊ธฐํ™” ์™„๋ฃŒ ์ƒํƒœ: {self.is_initialized}")
89
  print("=" * 50)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def _process_pdf_file(self, file_path: str) -> List[Document]:
92
  """
93
  PDF ํŒŒ์ผ ์ฒ˜๋ฆฌ - docling ์‹คํŒจ ์‹œ PyPDFLoader ์‚ฌ์šฉ
@@ -393,42 +550,61 @@ class AutoRAGChatApp:
393
  print("๋ฒกํ„ฐ ์ธ๋ฑ์Šค๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•„ ์ €์žฅํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
394
 
395
  # RAG ์ฒด์ธ ์ดˆ๊ธฐํ™”
396
- if RAG_CHAIN_AVAILABLE:
397
- print("RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์ค‘...")
398
- try:
399
- # ๋ฐ˜๋“œ์‹œ ๋ชจ๋“  ๋ฌธ์„œ ์ฒ˜๋ฆฌ์™€ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •์ด ์™„๋ฃŒ๋œ ํ›„ RAG ์ฒด์ธ ์ดˆ๊ธฐํ™”
 
400
  self.rag_chain = RAGChain(self.vector_store)
401
- self.is_initialized = True
402
- print("RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ!")
403
- except Exception as e:
404
- print(f"RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
405
- import traceback
406
- traceback.print_exc()
 
 
 
 
 
 
 
 
407
  self.is_initialized = False
408
- return f"RAG ๏ฟฝ๏ฟฝ์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}"
409
-
410
- total_time = time.time() - start_time
411
-
412
- status_message = (
413
- f"๋ฌธ์„œ ์ฒ˜๋ฆฌ ์™„๋ฃŒ!\n"
414
- f"- ์ฒ˜๋ฆฌ๋œ ํŒŒ์ผ: {len(self.processed_files)}๊ฐœ\n"
415
- f"- ์บ์‹œ๋œ ํŒŒ์ผ: {len(cached_files)}๊ฐœ\n"
416
- f"- ์ƒˆ ํŒŒ์ผ: {len(new_files)}๊ฐœ\n"
417
- f"- ์—…๋ฐ์ดํŠธ๋œ ํŒŒ์ผ: {len(updated_files)}๊ฐœ\n"
418
- f"- ์‹คํŒจํ•œ ํŒŒ์ผ: {len(failed_files)}๊ฐœ\n"
419
- f"- ์ด ์ฒญํฌ ์ˆ˜: {len(self.documents)}๊ฐœ\n"
420
- f"- ์ฒ˜๋ฆฌ ์‹œ๊ฐ„: {total_time:.2f}์ดˆ\n"
421
- f"- RAG ์ฒด์ธ ์ดˆ๊ธฐํ™”: {'์„ฑ๊ณต' if self.is_initialized else '์‹คํŒจ'}\n"
422
- f"์ด์ œ ์งˆ๋ฌธํ•  ์ค€๋น„๊ฐ€ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!"
423
- )
424
-
425
- print(status_message)
426
- return status_message
427
- else:
428
  self.is_initialized = False
429
- error_msg = "RAG ์ฒด์ธ์„ ์ดˆ๊ธฐํ™”ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”."
430
- print(error_msg)
431
- return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
  except Exception as e:
434
  self.is_initialized = False
@@ -512,7 +688,13 @@ class AutoRAGChatApp:
512
  # RAG ์ฒด์ธ ์‹คํ–‰ ๋ฐ ์‘๋‹ต ์ƒ์„ฑ
513
  start_time = time.time()
514
  print(f"RAG ์ฒด์ธ ์‹คํ–‰ ์ค‘: ์ฟผ๋ฆฌ = '{query}'")
515
- response = self.rag_chain.run(query)
 
 
 
 
 
 
516
  end_time = time.time()
517
 
518
  query_time = end_time - start_time
@@ -533,7 +715,7 @@ class AutoRAGChatApp:
533
  new_history.append([query, error_msg])
534
  return "", new_history
535
 
536
- def process_voice_query(self, audio, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
537
  """
538
  ์Œ์„ฑ ์ฟผ๋ฆฌ ์ฒ˜๋ฆฌ
539
 
@@ -545,7 +727,7 @@ class AutoRAGChatApp:
545
  ์—…๋ฐ์ดํŠธ๋œ ๋Œ€ํ™” ๊ธฐ๋ก
546
  """
547
  if audio is None:
548
- return "", chat_history
549
 
550
  try:
551
  import numpy as np
@@ -575,16 +757,18 @@ class AutoRAGChatApp:
575
  if "error" in result:
576
  error_msg = f"์Œ์„ฑ์ธ์‹ ์˜ค๋ฅ˜: {result.get('error')}"
577
  print(f"[STT] {error_msg}")
578
- chat_history.append(("์Œ์„ฑ ๋ฉ”์‹œ์ง€", error_msg))
579
- return "", chat_history
 
580
 
581
  # ์ธ์‹๋œ ํ…์ŠคํŠธ ์ถ”์ถœ
582
  recognized_text = result.get("text", "")
583
  if not recognized_text:
584
  error_msg = "์Œ์„ฑ์„ ์ธ์‹ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
585
  print("[STT] ์ธ์‹๋œ ํ…์ŠคํŠธ ์—†์Œ")
586
- chat_history.append(("์Œ์„ฑ ๋ฉ”์‹œ์ง€", error_msg))
587
- return "", chat_history
 
588
 
589
  print(f"[STT] ์ธ์‹๋œ ํ…์ŠคํŠธ: {recognized_text}")
590
 
@@ -621,7 +805,13 @@ class AutoRAGChatApp:
621
  # RAG ์ฒด์ธ ์‹คํ–‰ ๋ฐ ์‘๋‹ต ์ƒ์„ฑ
622
  start_time = time.time()
623
  print(f"RAG ์ฒด์ธ ์‹คํ–‰ ์ค‘: ์ฟผ๋ฆฌ = '{query}'")
624
- response = self.rag_chain.run(query)
 
 
 
 
 
 
625
  end_time = time.time()
626
 
627
  query_time = end_time - start_time
@@ -645,8 +835,11 @@ class AutoRAGChatApp:
645
  except Exception as e:
646
  error_msg = f"์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
647
  print(f"[STT] {error_msg}")
648
- chat_history.append(("์Œ์„ฑ ๋ฉ”์‹œ์ง€", error_msg))
649
- return "", chat_history
 
 
 
650
 
651
  def launch_app(self) -> None:
652
  """
 
1
  """
2
+ ์Œ์„ฑ์ธ์‹ ๊ธฐ๋Šฅ์ด ์ถ”๊ฐ€๋œ RAG ์ฑ—๋ด‡ ์•ฑ
3
  """
4
  import os
5
  import time
6
+ import tempfile
7
+ from typing import List, Dict, Tuple, Any, Optional
8
  import hashlib
9
  import pickle
10
  import json
 
 
 
 
11
 
12
+ # ๊ธฐ์กด ์ž„ํฌํŠธ
13
  from config import (
14
  PDF_DIRECTORY, CHUNK_SIZE, CHUNK_OVERLAP, LLM_MODEL,
15
+ STT_LANGUAGE, IS_HUGGINGFACE, OPENAI_API_KEY, USE_OPENAI
16
  )
17
  from optimized_document_processor import OptimizedDocumentProcessor
18
  from vector_store import VectorStore
19
+ from langchain.schema import Document
20
+
21
+ # ํด๋กœ๋ฐ” STT ๋ชจ๋“ˆ ์ž„ํฌํŠธ
22
  from clova_stt import ClovaSTT
23
 
24
  # ์•ˆ์ „ํ•œ ์ž„ํฌํŠธ
25
  try:
26
+ print("RAG ์ฒด์ธ ๋ชจ๋“ˆ ๋กœ๋“œ ์‹œ๋„...")
27
  from rag_chain import RAGChain
28
+ # RAGChain ํด๋ž˜์Šค๊ฐ€ ์ œ๋Œ€๋กœ ์ž„ํฌํŠธ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
29
  RAG_CHAIN_AVAILABLE = True
30
+ print("์™ธ๋ถ€ RAG ์ฒด์ธ ๋ชจ๋“ˆ ๋กœ๋“œ ์„ฑ๊ณต")
31
+ except Exception as e:
32
+ print(f"์™ธ๋ถ€ RAG ์ฒด์ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
33
+ print("๋‚ด์žฅ RAG ์ฒด์ธ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
34
+ # ๋‚ด์žฅ RAG ์ฒด์ธ ๊ตฌํ˜„ ์‚ฌ์šฉ
35
+ RAG_CHAIN_AVAILABLE = True
36
+
37
+ # ํ•„์š”ํ•œ langchain ์ž„ํฌํŠธ
38
+ try:
39
+ from langchain_openai import ChatOpenAI
40
+ from langchain_community.chat_models import ChatOllama
41
+ from langchain.prompts import PromptTemplate
42
+ from langchain_core.output_parsers import StrOutputParser
43
+ from langchain_core.runnables import RunnablePassthrough
44
+
45
+ # SimpleRAGChain ๋‚ด์žฅ ํด๋ž˜์Šค ์ •์˜
46
+ class RAGChain:
47
+ """์•ฑ์— ๋‚ด์žฅ๋œ ๊ฐ„๋‹จํ•œ RAG ์ฒด์ธ"""
48
+ def __init__(self, vector_store):
49
+ print("๋‚ด์žฅ RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์ค‘...")
50
+ self.vector_store = vector_store
51
+
52
+ # ํ™˜๊ฒฝ ์„ค์ • ์ž„ํฌํŠธ
53
+ from config import OPENAI_API_KEY, LLM_MODEL, USE_OPENAI, TOP_K_RETRIEVAL, OLLAMA_HOST
54
+
55
+ try:
56
+ # LLM ์ดˆ๊ธฐํ™”
57
+ if USE_OPENAI:
58
+ self.llm = ChatOpenAI(
59
+ model_name=LLM_MODEL,
60
+ temperature=0.2,
61
+ api_key=OPENAI_API_KEY,
62
+ )
63
+ print(f"OpenAI ๋ชจ๋ธ ์ดˆ๊ธฐํ™”: {LLM_MODEL}")
64
+ else:
65
+ try:
66
+ self.llm = ChatOllama(
67
+ model=LLM_MODEL,
68
+ temperature=0.2,
69
+ base_url=OLLAMA_HOST,
70
+ )
71
+ print(f"Ollama ๋ชจ๋ธ ์ดˆ๊ธฐํ™”: {LLM_MODEL}")
72
+ except Exception as e:
73
+ print(f"Ollama ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}, OpenAI ๋ชจ๋ธ๋กœ ๋Œ€์ฒด")
74
+ self.llm = ChatOpenAI(
75
+ model_name="gpt-3.5-turbo",
76
+ temperature=0.2,
77
+ api_key=OPENAI_API_KEY,
78
+ )
79
+
80
+ # ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ
81
+ template = """
82
+ ๋‹ค์Œ ์ •๋ณด๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”.
83
+
84
+ ์งˆ๋ฌธ: {question}
85
+
86
+ ์ฐธ๊ณ  ์ •๋ณด:
87
+ {context}
88
+
89
+ ์ฐธ๊ณ  ์ •๋ณด์— ๋‹ต์ด ์—†๋Š” ๊ฒฝ์šฐ "์ œ๊ณต๋œ ๋ฌธ์„œ์—์„œ ํ•ด๋‹น ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."๋ผ๊ณ  ๋‹ต๋ณ€ํ•˜์„ธ์š”.
90
+ ๋‹ต๋ณ€์€ ์ •ํ™•ํ•˜๊ณ  ๊ฐ„๊ฒฐํ•˜๊ฒŒ ์ œ๊ณตํ•˜๋˜, ์ฐธ๊ณ  ์ •๋ณด์—์„œ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์•„ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”.
91
+ ์ฐธ๊ณ  ์ •๋ณด์˜ ์ถœ์ฒ˜๋„ ํ•จ๊ป˜ ์•Œ๋ ค์ฃผ์„ธ์š”.
92
+ """
93
+
94
+ self.prompt = PromptTemplate.from_template(template)
95
+
96
+ # ์ฒด์ธ ๊ตฌ์„ฑ
97
+ self.chain = (
98
+ {"context": self._retrieve, "question": RunnablePassthrough()}
99
+ | self.prompt
100
+ | self.llm
101
+ | StrOutputParser()
102
+ )
103
+ print("๋‚ด์žฅ RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
104
+ except Exception as e:
105
+ print(f"LLM ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
106
+ import traceback
107
+ traceback.print_exc()
108
+ raise
109
+
110
+ def _retrieve(self, query):
111
+ """๋ฌธ์„œ ๊ฒ€์ƒ‰"""
112
+ try:
113
+ from config import TOP_K_RETRIEVAL
114
+ docs = self.vector_store.similarity_search(query, k=TOP_K_RETRIEVAL)
115
+
116
+ # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ปจํ…์ŠคํŠธ ๊ตฌ์„ฑ
117
+ context_parts = []
118
+ for i, doc in enumerate(docs, 1):
119
+ source = doc.metadata.get("source", "์•Œ ๏ฟฝ๏ฟฝ๏ฟฝ ์—†๋Š” ์ถœ์ฒ˜")
120
+ page = doc.metadata.get("page", "")
121
+ source_info = f"{source}"
122
+ if page:
123
+ source_info += f" (ํŽ˜์ด์ง€: {page})"
124
+
125
+ context_parts.append(f"[์ฐธ๊ณ ์ž๋ฃŒ {i}] - ์ถœ์ฒ˜: {source_info}\n{doc.page_content}\n")
126
+
127
+ return "\n".join(context_parts)
128
+ except Exception as e:
129
+ print(f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜: {e}")
130
+ import traceback
131
+ traceback.print_exc()
132
+ return "๋ฌธ์„œ ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
133
+
134
+ def run(self, query):
135
+ """์ฟผ๋ฆฌ ์ฒ˜๋ฆฌ"""
136
+ try:
137
+ return self.chain.invoke(query)
138
+ except Exception as e:
139
+ print(f"RAG ์ฒด์ธ ์‹คํ–‰ ์˜ค๋ฅ˜: {e}")
140
+ import traceback
141
+ traceback.print_exc()
142
+ return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
143
+
144
+ except Exception as inner_e:
145
+ print(f"๋‚ด์žฅ RAG ์ฒด์ธ ์ •์˜ ์‹คํŒจ: {inner_e}")
146
+ import traceback
147
+ traceback.print_exc()
148
+ RAG_CHAIN_AVAILABLE = False
149
 
150
 
151
  class AutoRAGChatApp:
 
208
  print(f"์ดˆ๊ธฐํ™” ์™„๋ฃŒ ์ƒํƒœ: {self.is_initialized}")
209
  print("=" * 50)
210
 
211
+ def _fallback_response(self, query: str) -> str:
212
+ """
213
+ RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ ์‹œ ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ
214
+
215
+ Args:
216
+ query: ์‚ฌ์šฉ์ž ์งˆ๋ฌธ
217
+
218
+ Returns:
219
+ ๊ธฐ๋ณธ ์‘๋‹ต ํ…์ŠคํŠธ
220
+ """
221
+ try:
222
+ # ๋ฒกํ„ฐ ๊ฒ€์ƒ‰์ด๋ผ๋„ ์‹คํ–‰
223
+ if self.vector_store and self.vector_store.vector_store:
224
+ try:
225
+ docs = self.vector_store.similarity_search(query, k=3)
226
+ if docs:
227
+ context = "\n\n".join([doc.page_content for doc in docs])
228
+ response = f"""
229
+ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. RAG ์ฒด์ธ์ด ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.
230
+
231
+ ๊ทธ๋Ÿฌ๋‚˜ ๋ฌธ์„œ์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์•˜์Šต๋‹ˆ๋‹ค:
232
+
233
+ {context}
234
+
235
+ RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋ ค๋ฉด ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•˜์„ธ์š”.
236
+ """
237
+ return response.strip()
238
+ except Exception as e:
239
+ print(f"๋ฒกํ„ฐ ๊ฒ€์ƒ‰ ์‹คํŒจ: {e}")
240
+
241
+ # ๊ธฐ๋ณธ ์‘๋‹ต
242
+ return "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. RAG ์ฒด์ธ์ด ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•„ ์งˆ๋ฌธ์— ์‘๋‹ตํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๊ธฐ์ˆ ์ ์ธ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐ ์ค‘์ž…๋‹ˆ๋‹ค."
243
+
244
+ except Exception as e:
245
+ print(f"๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
246
+ return "์‹œ์Šคํ…œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๊ด€๋ฆฌ์ž์—๊ฒŒ ๋ฌธ์˜ํ•˜์„ธ์š”."
247
+
248
  def _process_pdf_file(self, file_path: str) -> List[Document]:
249
  """
250
  PDF ํŒŒ์ผ ์ฒ˜๋ฆฌ - docling ์‹คํŒจ ์‹œ PyPDFLoader ์‚ฌ์šฉ
 
550
  print("๋ฒกํ„ฐ ์ธ๋ฑ์Šค๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•„ ์ €์žฅํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
551
 
552
  # RAG ์ฒด์ธ ์ดˆ๊ธฐํ™”
553
+ try:
554
+ print("RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์‹œ๋„...")
555
+ if RAG_CHAIN_AVAILABLE:
556
+ print("RAG_CHAIN_AVAILABLE=True, ์ดˆ๊ธฐํ™” ์ง„ํ–‰")
557
+ # ์ง์ ‘ RAG ์ฒด์ธ ํด๋ž˜์Šค๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ดˆ๊ธฐํ™”
558
  self.rag_chain = RAGChain(self.vector_store)
559
+ print("RAG ์ฒด์ธ ๊ฐ์ฒด ์ƒ์„ฑ ์™„๋ฃŒ")
560
+ # ํ…Œ์ŠคํŠธ ์ฟผ๋ฆฌ ์‹คํ–‰ํ•˜์—ฌ ์ฒด์ธ์ด ์ž‘๋™ํ•˜๋Š”์ง€ ํ™•์ธ
561
+ try:
562
+ test_response = self.rag_chain.run("ํ…Œ์ŠคํŠธ ์ฟผ๋ฆฌ์ž…๋‹ˆ๋‹ค.")
563
+ print(f"RAG ์ฒด์ธ ํ…Œ์ŠคํŠธ ์„ฑ๊ณต: ์‘๋‹ต ๊ธธ์ด {len(test_response)}")
564
+ self.is_initialized = True
565
+ except Exception as test_e:
566
+ print(f"RAG ์ฒด์ธ ํ…Œ์ŠคํŠธ ์‹คํŒจ: {test_e}")
567
+ import traceback
568
+ traceback.print_exc()
569
+ self.is_initialized = False
570
+ return f"RAG ์ฒด์ธ ํ…Œ์ŠคํŠธ ์‹คํŒจ: {test_e}"
571
+ else:
572
+ print("RAG_CHAIN_AVAILABLE=False, ์ดˆ๊ธฐํ™” ๋ถˆ๊ฐ€")
573
  self.is_initialized = False
574
+ return "RAG ์ฒด์ธ ๋ชจ๋“ˆ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
575
+
576
+ # ์ตœ์ข… ์ƒํƒœ ํ™•์ธ ๋ฐ ๋กœ๊ทธ
577
+ print(f"RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ๊ฒฐ๊ณผ: is_initialized={self.is_initialized}")
578
+ if self.is_initialized:
579
+ print("RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์„ฑ๊ณต!")
580
+ else:
581
+ print("RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจํ–ˆ์ง€๋งŒ ์˜ˆ์™ธ๋Š” ๋ฐœ์ƒํ•˜์ง€ ์•Š์Œ.")
582
+ return "RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: ์›์ธ ๋ถˆ๋ช…"
583
+
584
+ except Exception as e:
585
+ print(f"RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ: {e}")
586
+ import traceback
587
+ traceback.print_exc()
 
 
 
 
 
 
588
  self.is_initialized = False
589
+ return f"RAG ์ฒด์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}"
590
+
591
+ total_time = time.time() - start_time
592
+
593
+ status_message = (
594
+ f"๋ฌธ์„œ ์ฒ˜๋ฆฌ ์™„๋ฃŒ!\n"
595
+ f"- ์ฒ˜๋ฆฌ๋œ ํŒŒ์ผ: {len(self.processed_files)}๊ฐœ\n"
596
+ f"- ์บ์‹œ๋œ ํŒŒ์ผ: {len(cached_files)}๊ฐœ\n"
597
+ f"- ์ƒˆ ํŒŒ์ผ: {len(new_files)}๊ฐœ\n"
598
+ f"- ์—…๋ฐ์ดํŠธ๋œ ํŒŒ์ผ: {len(updated_files)}๊ฐœ\n"
599
+ f"- ์‹คํŒจํ•œ ํŒŒ์ผ: {len(failed_files)}๊ฐœ\n"
600
+ f"- ์ด ์ฒญํฌ ์ˆ˜: {len(self.documents)}๊ฐœ\n"
601
+ f"- ์ฒ˜๋ฆฌ ์‹œ๊ฐ„: {total_time:.2f}์ดˆ\n"
602
+ f"- RAG ์ฒด์ธ ์ดˆ๊ธฐํ™”: {'์„ฑ๊ณต' if self.is_initialized else '์‹คํŒจ'}\n"
603
+ f"์ด์ œ ์งˆ๋ฌธํ•  ์ค€๋น„๊ฐ€ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!"
604
+ )
605
+
606
+ print(status_message)
607
+ return status_message
608
 
609
  except Exception as e:
610
  self.is_initialized = False
 
688
  # RAG ์ฒด์ธ ์‹คํ–‰ ๋ฐ ์‘๋‹ต ์ƒ์„ฑ
689
  start_time = time.time()
690
  print(f"RAG ์ฒด์ธ ์‹คํ–‰ ์ค‘: ์ฟผ๋ฆฌ = '{query}'")
691
+
692
+ if self.is_initialized and self.rag_chain is not None:
693
+ response = self.rag_chain.run(query)
694
+ else:
695
+ print("RAG ์ฒด์ธ์ด ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์Œ: ๊ธฐ๋ณธ ์‘๋‹ต ์‚ฌ์šฉ")
696
+ response = self._fallback_response(query)
697
+
698
  end_time = time.time()
699
 
700
  query_time = end_time - start_time
 
715
  new_history.append([query, error_msg])
716
  return "", new_history
717
 
718
+ def process_voice_query(self, audio, chat_history: List[List[str]]) -> List[List[str]]:
719
  """
720
  ์Œ์„ฑ ์ฟผ๋ฆฌ ์ฒ˜๋ฆฌ
721
 
 
727
  ์—…๋ฐ์ดํŠธ๋œ ๋Œ€ํ™” ๊ธฐ๋ก
728
  """
729
  if audio is None:
730
+ return chat_history
731
 
732
  try:
733
  import numpy as np
 
757
  if "error" in result:
758
  error_msg = f"์Œ์„ฑ์ธ์‹ ์˜ค๋ฅ˜: {result.get('error')}"
759
  print(f"[STT] {error_msg}")
760
+ new_history = list(chat_history)
761
+ new_history.append(["์Œ์„ฑ ๋ฉ”์‹œ์ง€", error_msg])
762
+ return new_history
763
 
764
  # ์ธ์‹๋œ ํ…์ŠคํŠธ ์ถ”์ถœ
765
  recognized_text = result.get("text", "")
766
  if not recognized_text:
767
  error_msg = "์Œ์„ฑ์„ ์ธ์‹ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
768
  print("[STT] ์ธ์‹๋œ ํ…์ŠคํŠธ ์—†์Œ")
769
+ new_history = list(chat_history)
770
+ new_history.append(["์Œ์„ฑ ๋ฉ”์‹œ์ง€", error_msg])
771
+ return new_history
772
 
773
  print(f"[STT] ์ธ์‹๋œ ํ…์ŠคํŠธ: {recognized_text}")
774
 
 
805
  # RAG ์ฒด์ธ ์‹คํ–‰ ๋ฐ ์‘๋‹ต ์ƒ์„ฑ
806
  start_time = time.time()
807
  print(f"RAG ์ฒด์ธ ์‹คํ–‰ ์ค‘: ์ฟผ๋ฆฌ = '{query}'")
808
+
809
+ if self.is_initialized and self.rag_chain is not None:
810
+ response = self.rag_chain.run(query)
811
+ else:
812
+ print("RAG ์ฒด์ธ์ด ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์Œ: ๊ธฐ๋ณธ ์‘๋‹ต ์‚ฌ์šฉ")
813
+ response = self._fallback_response(query)
814
+
815
  end_time = time.time()
816
 
817
  query_time = end_time - start_time
 
835
  except Exception as e:
836
  error_msg = f"์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
837
  print(f"[STT] {error_msg}")
838
+ import traceback
839
+ traceback.print_exc()
840
+ new_history = list(chat_history)
841
+ new_history.append(["์Œ์„ฑ ๋ฉ”์‹œ์ง€", error_msg])
842
+ return new_history
843
 
844
  def launch_app(self) -> None:
845
  """
clova_stt.py CHANGED
@@ -5,11 +5,9 @@ import os
5
  import json
6
  import requests
7
  import tempfile
8
- from dotenv import load_dotenv
9
-
10
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
11
- load_dotenv()
12
 
 
 
13
 
14
  class ClovaSTT:
15
  """
@@ -20,8 +18,8 @@ class ClovaSTT:
20
  """
21
  ํด๋กœ๋ฐ” STT ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
22
  """
23
- self.client_id = os.getenv("NAVER_CLIENT_ID", "")
24
- self.client_secret = os.getenv("NAVER_CLIENT_SECRET", "")
25
 
26
  # ํด๋ผ์ด์–ธํŠธ ID์™€ Secret ๊ฒ€์ฆ
27
  if not self.client_id or not self.client_secret:
 
5
  import json
6
  import requests
7
  import tempfile
 
 
 
 
8
 
9
+ # config.py์—์„œ ์„ค์ • ๊ฐ€์ ธ์˜ค๊ธฐ
10
+ from config import NAVER_CLIENT_ID, NAVER_CLIENT_SECRET
11
 
12
  class ClovaSTT:
13
  """
 
18
  """
19
  ํด๋กœ๋ฐ” STT ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
20
  """
21
+ self.client_id = NAVER_CLIENT_ID
22
+ self.client_secret = NAVER_CLIENT_SECRET
23
 
24
  # ํด๋ผ์ด์–ธํŠธ ID์™€ Secret ๊ฒ€์ฆ
25
  if not self.client_id or not self.client_secret: