Add application file
Browse files- app.py +245 -52
- clova_stt.py +4 -6
app.py
CHANGED
@@ -1,31 +1,151 @@
|
|
1 |
"""
|
2 |
-
์์ฑ์ธ์
|
3 |
"""
|
4 |
import os
|
5 |
import time
|
|
|
|
|
6 |
import hashlib
|
7 |
import pickle
|
8 |
import json
|
9 |
-
import tempfile
|
10 |
-
from typing import List, Dict, Tuple, Any
|
11 |
-
|
12 |
-
from langchain.schema import Document
|
13 |
|
|
|
14 |
from config import (
|
15 |
PDF_DIRECTORY, CHUNK_SIZE, CHUNK_OVERLAP, LLM_MODEL,
|
16 |
-
STT_LANGUAGE, IS_HUGGINGFACE
|
17 |
)
|
18 |
from optimized_document_processor import OptimizedDocumentProcessor
|
19 |
from vector_store import VectorStore
|
|
|
|
|
|
|
20 |
from clova_stt import ClovaSTT
|
21 |
|
22 |
# ์์ ํ ์ํฌํธ
|
23 |
try:
|
|
|
24 |
from rag_chain import RAGChain
|
|
|
25 |
RAG_CHAIN_AVAILABLE = True
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
class AutoRAGChatApp:
|
@@ -88,6 +208,43 @@ class AutoRAGChatApp:
|
|
88 |
print(f"์ด๊ธฐํ ์๋ฃ ์ํ: {self.is_initialized}")
|
89 |
print("=" * 50)
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
def _process_pdf_file(self, file_path: str) -> List[Document]:
|
92 |
"""
|
93 |
PDF ํ์ผ ์ฒ๋ฆฌ - docling ์คํจ ์ PyPDFLoader ์ฌ์ฉ
|
@@ -393,42 +550,61 @@ class AutoRAGChatApp:
|
|
393 |
print("๋ฒกํฐ ์ธ๋ฑ์ค๊ฐ ์ด๊ธฐํ๋์ง ์์ ์ ์ฅํ์ง ์์ต๋๋ค.")
|
394 |
|
395 |
# RAG ์ฒด์ธ ์ด๊ธฐํ
|
396 |
-
|
397 |
-
print("RAG ์ฒด์ธ ์ด๊ธฐํ
|
398 |
-
|
399 |
-
|
|
|
400 |
self.rag_chain = RAGChain(self.vector_store)
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
self.is_initialized = False
|
408 |
-
return
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
f"์ด์ ์ง๋ฌธํ ์ค๋น๊ฐ ๋์์ต๋๋ค!"
|
423 |
-
)
|
424 |
-
|
425 |
-
print(status_message)
|
426 |
-
return status_message
|
427 |
-
else:
|
428 |
self.is_initialized = False
|
429 |
-
|
430 |
-
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
except Exception as e:
|
434 |
self.is_initialized = False
|
@@ -512,7 +688,13 @@ class AutoRAGChatApp:
|
|
512 |
# RAG ์ฒด์ธ ์คํ ๋ฐ ์๋ต ์์ฑ
|
513 |
start_time = time.time()
|
514 |
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'")
|
515 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
end_time = time.time()
|
517 |
|
518 |
query_time = end_time - start_time
|
@@ -533,7 +715,7 @@ class AutoRAGChatApp:
|
|
533 |
new_history.append([query, error_msg])
|
534 |
return "", new_history
|
535 |
|
536 |
-
def process_voice_query(self, audio, chat_history: List[
|
537 |
"""
|
538 |
์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ
|
539 |
|
@@ -545,7 +727,7 @@ class AutoRAGChatApp:
|
|
545 |
์
๋ฐ์ดํธ๋ ๋ํ ๊ธฐ๋ก
|
546 |
"""
|
547 |
if audio is None:
|
548 |
-
return
|
549 |
|
550 |
try:
|
551 |
import numpy as np
|
@@ -575,16 +757,18 @@ class AutoRAGChatApp:
|
|
575 |
if "error" in result:
|
576 |
error_msg = f"์์ฑ์ธ์ ์ค๋ฅ: {result.get('error')}"
|
577 |
print(f"[STT] {error_msg}")
|
578 |
-
|
579 |
-
|
|
|
580 |
|
581 |
# ์ธ์๋ ํ
์คํธ ์ถ์ถ
|
582 |
recognized_text = result.get("text", "")
|
583 |
if not recognized_text:
|
584 |
error_msg = "์์ฑ์ ์ธ์ํ ์ ์์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์."
|
585 |
print("[STT] ์ธ์๋ ํ
์คํธ ์์")
|
586 |
-
|
587 |
-
|
|
|
588 |
|
589 |
print(f"[STT] ์ธ์๋ ํ
์คํธ: {recognized_text}")
|
590 |
|
@@ -621,7 +805,13 @@ class AutoRAGChatApp:
|
|
621 |
# RAG ์ฒด์ธ ์คํ ๋ฐ ์๋ต ์์ฑ
|
622 |
start_time = time.time()
|
623 |
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'")
|
624 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
625 |
end_time = time.time()
|
626 |
|
627 |
query_time = end_time - start_time
|
@@ -645,8 +835,11 @@ class AutoRAGChatApp:
|
|
645 |
except Exception as e:
|
646 |
error_msg = f"์์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
|
647 |
print(f"[STT] {error_msg}")
|
648 |
-
|
649 |
-
|
|
|
|
|
|
|
650 |
|
651 |
def launch_app(self) -> None:
|
652 |
"""
|
|
|
1 |
"""
|
2 |
+
์์ฑ์ธ์ ๊ธฐ๋ฅ์ด ์ถ๊ฐ๋ RAG ์ฑ๋ด ์ฑ
|
3 |
"""
|
4 |
import os
|
5 |
import time
|
6 |
+
import tempfile
|
7 |
+
from typing import List, Dict, Tuple, Any, Optional
|
8 |
import hashlib
|
9 |
import pickle
|
10 |
import json
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
# ๊ธฐ์กด ์ํฌํธ
|
13 |
from config import (
|
14 |
PDF_DIRECTORY, CHUNK_SIZE, CHUNK_OVERLAP, LLM_MODEL,
|
15 |
+
STT_LANGUAGE, IS_HUGGINGFACE, OPENAI_API_KEY, USE_OPENAI
|
16 |
)
|
17 |
from optimized_document_processor import OptimizedDocumentProcessor
|
18 |
from vector_store import VectorStore
|
19 |
+
from langchain.schema import Document
|
20 |
+
|
21 |
+
# ํด๋ก๋ฐ STT ๋ชจ๋ ์ํฌํธ
|
22 |
from clova_stt import ClovaSTT
|
23 |
|
24 |
# ์์ ํ ์ํฌํธ
|
25 |
try:
|
26 |
+
print("RAG ์ฒด์ธ ๋ชจ๋ ๋ก๋ ์๋...")
|
27 |
from rag_chain import RAGChain
|
28 |
+
# RAGChain ํด๋์ค๊ฐ ์ ๋๋ก ์ํฌํธ๋์๋์ง ํ์ธ
|
29 |
RAG_CHAIN_AVAILABLE = True
|
30 |
+
print("์ธ๋ถ RAG ์ฒด์ธ ๋ชจ๋ ๋ก๋ ์ฑ๊ณต")
|
31 |
+
except Exception as e:
|
32 |
+
print(f"์ธ๋ถ RAG ์ฒด์ธ ๋ก๋ ์คํจ: {e}")
|
33 |
+
print("๋ด์ฅ RAG ์ฒด์ธ์ ์ฌ์ฉํฉ๋๋ค.")
|
34 |
+
# ๋ด์ฅ RAG ์ฒด์ธ ๊ตฌํ ์ฌ์ฉ
|
35 |
+
RAG_CHAIN_AVAILABLE = True
|
36 |
+
|
37 |
+
# ํ์ํ langchain ์ํฌํธ
|
38 |
+
try:
|
39 |
+
from langchain_openai import ChatOpenAI
|
40 |
+
from langchain_community.chat_models import ChatOllama
|
41 |
+
from langchain.prompts import PromptTemplate
|
42 |
+
from langchain_core.output_parsers import StrOutputParser
|
43 |
+
from langchain_core.runnables import RunnablePassthrough
|
44 |
+
|
45 |
+
# SimpleRAGChain ๋ด์ฅ ํด๋์ค ์ ์
|
46 |
+
class RAGChain:
|
47 |
+
"""์ฑ์ ๋ด์ฅ๋ ๊ฐ๋จํ RAG ์ฒด์ธ"""
|
48 |
+
def __init__(self, vector_store):
|
49 |
+
print("๋ด์ฅ RAG ์ฒด์ธ ์ด๊ธฐํ ์ค...")
|
50 |
+
self.vector_store = vector_store
|
51 |
+
|
52 |
+
# ํ๊ฒฝ ์ค์ ์ํฌํธ
|
53 |
+
from config import OPENAI_API_KEY, LLM_MODEL, USE_OPENAI, TOP_K_RETRIEVAL, OLLAMA_HOST
|
54 |
+
|
55 |
+
try:
|
56 |
+
# LLM ์ด๊ธฐํ
|
57 |
+
if USE_OPENAI:
|
58 |
+
self.llm = ChatOpenAI(
|
59 |
+
model_name=LLM_MODEL,
|
60 |
+
temperature=0.2,
|
61 |
+
api_key=OPENAI_API_KEY,
|
62 |
+
)
|
63 |
+
print(f"OpenAI ๋ชจ๋ธ ์ด๊ธฐํ: {LLM_MODEL}")
|
64 |
+
else:
|
65 |
+
try:
|
66 |
+
self.llm = ChatOllama(
|
67 |
+
model=LLM_MODEL,
|
68 |
+
temperature=0.2,
|
69 |
+
base_url=OLLAMA_HOST,
|
70 |
+
)
|
71 |
+
print(f"Ollama ๋ชจ๋ธ ์ด๊ธฐํ: {LLM_MODEL}")
|
72 |
+
except Exception as e:
|
73 |
+
print(f"Ollama ์ด๊ธฐํ ์คํจ: {e}, OpenAI ๋ชจ๋ธ๋ก ๋์ฒด")
|
74 |
+
self.llm = ChatOpenAI(
|
75 |
+
model_name="gpt-3.5-turbo",
|
76 |
+
temperature=0.2,
|
77 |
+
api_key=OPENAI_API_KEY,
|
78 |
+
)
|
79 |
+
|
80 |
+
# ํ๋กฌํํธ ํ
ํ๋ฆฟ
|
81 |
+
template = """
|
82 |
+
๋ค์ ์ ๋ณด๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ์ ํํ๊ฒ ๋ต๋ณํด์ฃผ์ธ์.
|
83 |
+
|
84 |
+
์ง๋ฌธ: {question}
|
85 |
+
|
86 |
+
์ฐธ๊ณ ์ ๋ณด:
|
87 |
+
{context}
|
88 |
+
|
89 |
+
์ฐธ๊ณ ์ ๋ณด์ ๋ต์ด ์๋ ๊ฒฝ์ฐ "์ ๊ณต๋ ๋ฌธ์์์ ํด๋น ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."๋ผ๊ณ ๋ต๋ณํ์ธ์.
|
90 |
+
๋ต๋ณ์ ์ ํํ๊ณ ๊ฐ๊ฒฐํ๊ฒ ์ ๊ณตํ๋, ์ฐธ๊ณ ์ ๋ณด์์ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ ์ค๋ช
ํด์ฃผ์ธ์.
|
91 |
+
์ฐธ๊ณ ์ ๋ณด์ ์ถ์ฒ๋ ํจ๊ป ์๋ ค์ฃผ์ธ์.
|
92 |
+
"""
|
93 |
+
|
94 |
+
self.prompt = PromptTemplate.from_template(template)
|
95 |
+
|
96 |
+
# ์ฒด์ธ ๊ตฌ์ฑ
|
97 |
+
self.chain = (
|
98 |
+
{"context": self._retrieve, "question": RunnablePassthrough()}
|
99 |
+
| self.prompt
|
100 |
+
| self.llm
|
101 |
+
| StrOutputParser()
|
102 |
+
)
|
103 |
+
print("๋ด์ฅ RAG ์ฒด์ธ ์ด๊ธฐํ ์๋ฃ")
|
104 |
+
except Exception as e:
|
105 |
+
print(f"LLM ์ด๊ธฐํ ์คํจ: {e}")
|
106 |
+
import traceback
|
107 |
+
traceback.print_exc()
|
108 |
+
raise
|
109 |
+
|
110 |
+
def _retrieve(self, query):
|
111 |
+
"""๋ฌธ์ ๊ฒ์"""
|
112 |
+
try:
|
113 |
+
from config import TOP_K_RETRIEVAL
|
114 |
+
docs = self.vector_store.similarity_search(query, k=TOP_K_RETRIEVAL)
|
115 |
+
|
116 |
+
# ๊ฒ์ ๊ฒฐ๊ณผ ์ปจํ
์คํธ ๊ตฌ์ฑ
|
117 |
+
context_parts = []
|
118 |
+
for i, doc in enumerate(docs, 1):
|
119 |
+
source = doc.metadata.get("source", "์ ๏ฟฝ๏ฟฝ๏ฟฝ ์๋ ์ถ์ฒ")
|
120 |
+
page = doc.metadata.get("page", "")
|
121 |
+
source_info = f"{source}"
|
122 |
+
if page:
|
123 |
+
source_info += f" (ํ์ด์ง: {page})"
|
124 |
+
|
125 |
+
context_parts.append(f"[์ฐธ๊ณ ์๋ฃ {i}] - ์ถ์ฒ: {source_info}\n{doc.page_content}\n")
|
126 |
+
|
127 |
+
return "\n".join(context_parts)
|
128 |
+
except Exception as e:
|
129 |
+
print(f"๊ฒ์ ์ค ์ค๋ฅ: {e}")
|
130 |
+
import traceback
|
131 |
+
traceback.print_exc()
|
132 |
+
return "๋ฌธ์ ๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."
|
133 |
+
|
134 |
+
def run(self, query):
|
135 |
+
"""์ฟผ๋ฆฌ ์ฒ๋ฆฌ"""
|
136 |
+
try:
|
137 |
+
return self.chain.invoke(query)
|
138 |
+
except Exception as e:
|
139 |
+
print(f"RAG ์ฒด์ธ ์คํ ์ค๋ฅ: {e}")
|
140 |
+
import traceback
|
141 |
+
traceback.print_exc()
|
142 |
+
return f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
143 |
+
|
144 |
+
except Exception as inner_e:
|
145 |
+
print(f"๋ด์ฅ RAG ์ฒด์ธ ์ ์ ์คํจ: {inner_e}")
|
146 |
+
import traceback
|
147 |
+
traceback.print_exc()
|
148 |
+
RAG_CHAIN_AVAILABLE = False
|
149 |
|
150 |
|
151 |
class AutoRAGChatApp:
|
|
|
208 |
print(f"์ด๊ธฐํ ์๋ฃ ์ํ: {self.is_initialized}")
|
209 |
print("=" * 50)
|
210 |
|
211 |
+
def _fallback_response(self, query: str) -> str:
|
212 |
+
"""
|
213 |
+
RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจ ์ ๊ธฐ๋ณธ ์๋ต ์์ฑ
|
214 |
+
|
215 |
+
Args:
|
216 |
+
query: ์ฌ์ฉ์ ์ง๋ฌธ
|
217 |
+
|
218 |
+
Returns:
|
219 |
+
๊ธฐ๋ณธ ์๋ต ํ
์คํธ
|
220 |
+
"""
|
221 |
+
try:
|
222 |
+
# ๋ฒกํฐ ๊ฒ์์ด๋ผ๋ ์คํ
|
223 |
+
if self.vector_store and self.vector_store.vector_store:
|
224 |
+
try:
|
225 |
+
docs = self.vector_store.similarity_search(query, k=3)
|
226 |
+
if docs:
|
227 |
+
context = "\n\n".join([doc.page_content for doc in docs])
|
228 |
+
response = f"""
|
229 |
+
์ง๋ฌธ์ ๋ํ ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค. RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.
|
230 |
+
|
231 |
+
๊ทธ๋ฌ๋ ๋ฌธ์์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์์ต๋๋ค:
|
232 |
+
|
233 |
+
{context}
|
234 |
+
|
235 |
+
RAG ์ฒด์ธ ์ด๊ธฐํ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๋ ค๋ฉด ๋ก๊ทธ๋ฅผ ํ์ธํ์ธ์.
|
236 |
+
"""
|
237 |
+
return response.strip()
|
238 |
+
except Exception as e:
|
239 |
+
print(f"๋ฒกํฐ ๊ฒ์ ์คํจ: {e}")
|
240 |
+
|
241 |
+
# ๊ธฐ๋ณธ ์๋ต
|
242 |
+
return "์ฃ์กํฉ๋๋ค. RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์ ์ง๋ฌธ์ ์๋ตํ ์ ์์ต๋๋ค. ๊ธฐ์ ์ ์ธ ๋ฌธ์ ๋ฅผ ํด๊ฒฐ ์ค์
๋๋ค."
|
243 |
+
|
244 |
+
except Exception as e:
|
245 |
+
print(f"๊ธฐ๋ณธ ์๋ต ์์ฑ ์คํจ: {e}")
|
246 |
+
return "์์คํ
์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๊ด๋ฆฌ์์๊ฒ ๋ฌธ์ํ์ธ์."
|
247 |
+
|
248 |
def _process_pdf_file(self, file_path: str) -> List[Document]:
|
249 |
"""
|
250 |
PDF ํ์ผ ์ฒ๋ฆฌ - docling ์คํจ ์ PyPDFLoader ์ฌ์ฉ
|
|
|
550 |
print("๋ฒกํฐ ์ธ๋ฑ์ค๊ฐ ์ด๊ธฐํ๋์ง ์์ ์ ์ฅํ์ง ์์ต๋๋ค.")
|
551 |
|
552 |
# RAG ์ฒด์ธ ์ด๊ธฐํ
|
553 |
+
try:
|
554 |
+
print("RAG ์ฒด์ธ ์ด๊ธฐํ ์๋...")
|
555 |
+
if RAG_CHAIN_AVAILABLE:
|
556 |
+
print("RAG_CHAIN_AVAILABLE=True, ์ด๊ธฐํ ์งํ")
|
557 |
+
# ์ง์ RAG ์ฒด์ธ ํด๋์ค๋ฅผ ์ฌ์ฉํ์ฌ ์ด๊ธฐํ
|
558 |
self.rag_chain = RAGChain(self.vector_store)
|
559 |
+
print("RAG ์ฒด์ธ ๊ฐ์ฒด ์์ฑ ์๋ฃ")
|
560 |
+
# ํ
์คํธ ์ฟผ๋ฆฌ ์คํํ์ฌ ์ฒด์ธ์ด ์๋ํ๋์ง ํ์ธ
|
561 |
+
try:
|
562 |
+
test_response = self.rag_chain.run("ํ
์คํธ ์ฟผ๋ฆฌ์
๋๋ค.")
|
563 |
+
print(f"RAG ์ฒด์ธ ํ
์คํธ ์ฑ๊ณต: ์๋ต ๊ธธ์ด {len(test_response)}")
|
564 |
+
self.is_initialized = True
|
565 |
+
except Exception as test_e:
|
566 |
+
print(f"RAG ์ฒด์ธ ํ
์คํธ ์คํจ: {test_e}")
|
567 |
+
import traceback
|
568 |
+
traceback.print_exc()
|
569 |
+
self.is_initialized = False
|
570 |
+
return f"RAG ์ฒด์ธ ํ
์คํธ ์คํจ: {test_e}"
|
571 |
+
else:
|
572 |
+
print("RAG_CHAIN_AVAILABLE=False, ์ด๊ธฐํ ๋ถ๊ฐ")
|
573 |
self.is_initialized = False
|
574 |
+
return "RAG ์ฒด์ธ ๋ชจ๋์ ์ฌ์ฉํ ์ ์์ต๋๋ค."
|
575 |
+
|
576 |
+
# ์ต์ข
์ํ ํ์ธ ๋ฐ ๋ก๊ทธ
|
577 |
+
print(f"RAG ์ฒด์ธ ์ด๊ธฐํ ๊ฒฐ๊ณผ: is_initialized={self.is_initialized}")
|
578 |
+
if self.is_initialized:
|
579 |
+
print("RAG ์ฒด์ธ ์ด๊ธฐํ ์ฑ๊ณต!")
|
580 |
+
else:
|
581 |
+
print("RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจํ์ง๋ง ์์ธ๋ ๋ฐ์ํ์ง ์์.")
|
582 |
+
return "RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจ: ์์ธ ๋ถ๋ช
"
|
583 |
+
|
584 |
+
except Exception as e:
|
585 |
+
print(f"RAG ์ฒด์ธ ์ด๊ธฐํ ์ค ์์ธ ๋ฐ์: {e}")
|
586 |
+
import traceback
|
587 |
+
traceback.print_exc()
|
|
|
|
|
|
|
|
|
|
|
|
|
588 |
self.is_initialized = False
|
589 |
+
return f"RAG ์ฒด์ธ ์ด๊ธฐํ ์คํจ: {e}"
|
590 |
+
|
591 |
+
total_time = time.time() - start_time
|
592 |
+
|
593 |
+
status_message = (
|
594 |
+
f"๋ฌธ์ ์ฒ๋ฆฌ ์๋ฃ!\n"
|
595 |
+
f"- ์ฒ๋ฆฌ๋ ํ์ผ: {len(self.processed_files)}๊ฐ\n"
|
596 |
+
f"- ์บ์๋ ํ์ผ: {len(cached_files)}๊ฐ\n"
|
597 |
+
f"- ์ ํ์ผ: {len(new_files)}๊ฐ\n"
|
598 |
+
f"- ์
๋ฐ์ดํธ๋ ํ์ผ: {len(updated_files)}๊ฐ\n"
|
599 |
+
f"- ์คํจํ ํ์ผ: {len(failed_files)}๊ฐ\n"
|
600 |
+
f"- ์ด ์ฒญํฌ ์: {len(self.documents)}๊ฐ\n"
|
601 |
+
f"- ์ฒ๋ฆฌ ์๊ฐ: {total_time:.2f}์ด\n"
|
602 |
+
f"- RAG ์ฒด์ธ ์ด๊ธฐํ: {'์ฑ๊ณต' if self.is_initialized else '์คํจ'}\n"
|
603 |
+
f"์ด์ ์ง๋ฌธํ ์ค๋น๊ฐ ๋์์ต๋๋ค!"
|
604 |
+
)
|
605 |
+
|
606 |
+
print(status_message)
|
607 |
+
return status_message
|
608 |
|
609 |
except Exception as e:
|
610 |
self.is_initialized = False
|
|
|
688 |
# RAG ์ฒด์ธ ์คํ ๋ฐ ์๋ต ์์ฑ
|
689 |
start_time = time.time()
|
690 |
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'")
|
691 |
+
|
692 |
+
if self.is_initialized and self.rag_chain is not None:
|
693 |
+
response = self.rag_chain.run(query)
|
694 |
+
else:
|
695 |
+
print("RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์: ๊ธฐ๋ณธ ์๋ต ์ฌ์ฉ")
|
696 |
+
response = self._fallback_response(query)
|
697 |
+
|
698 |
end_time = time.time()
|
699 |
|
700 |
query_time = end_time - start_time
|
|
|
715 |
new_history.append([query, error_msg])
|
716 |
return "", new_history
|
717 |
|
718 |
+
def process_voice_query(self, audio, chat_history: List[List[str]]) -> List[List[str]]:
|
719 |
"""
|
720 |
์์ฑ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ
|
721 |
|
|
|
727 |
์
๋ฐ์ดํธ๋ ๋ํ ๊ธฐ๋ก
|
728 |
"""
|
729 |
if audio is None:
|
730 |
+
return chat_history
|
731 |
|
732 |
try:
|
733 |
import numpy as np
|
|
|
757 |
if "error" in result:
|
758 |
error_msg = f"์์ฑ์ธ์ ์ค๋ฅ: {result.get('error')}"
|
759 |
print(f"[STT] {error_msg}")
|
760 |
+
new_history = list(chat_history)
|
761 |
+
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg])
|
762 |
+
return new_history
|
763 |
|
764 |
# ์ธ์๋ ํ
์คํธ ์ถ์ถ
|
765 |
recognized_text = result.get("text", "")
|
766 |
if not recognized_text:
|
767 |
error_msg = "์์ฑ์ ์ธ์ํ ์ ์์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์."
|
768 |
print("[STT] ์ธ์๋ ํ
์คํธ ์์")
|
769 |
+
new_history = list(chat_history)
|
770 |
+
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg])
|
771 |
+
return new_history
|
772 |
|
773 |
print(f"[STT] ์ธ์๋ ํ
์คํธ: {recognized_text}")
|
774 |
|
|
|
805 |
# RAG ์ฒด์ธ ์คํ ๋ฐ ์๋ต ์์ฑ
|
806 |
start_time = time.time()
|
807 |
print(f"RAG ์ฒด์ธ ์คํ ์ค: ์ฟผ๋ฆฌ = '{query}'")
|
808 |
+
|
809 |
+
if self.is_initialized and self.rag_chain is not None:
|
810 |
+
response = self.rag_chain.run(query)
|
811 |
+
else:
|
812 |
+
print("RAG ์ฒด์ธ์ด ์ด๊ธฐํ๋์ง ์์: ๊ธฐ๋ณธ ์๋ต ์ฌ์ฉ")
|
813 |
+
response = self._fallback_response(query)
|
814 |
+
|
815 |
end_time = time.time()
|
816 |
|
817 |
query_time = end_time - start_time
|
|
|
835 |
except Exception as e:
|
836 |
error_msg = f"์์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
|
837 |
print(f"[STT] {error_msg}")
|
838 |
+
import traceback
|
839 |
+
traceback.print_exc()
|
840 |
+
new_history = list(chat_history)
|
841 |
+
new_history.append(["์์ฑ ๋ฉ์์ง", error_msg])
|
842 |
+
return new_history
|
843 |
|
844 |
def launch_app(self) -> None:
|
845 |
"""
|
clova_stt.py
CHANGED
@@ -5,11 +5,9 @@ import os
|
|
5 |
import json
|
6 |
import requests
|
7 |
import tempfile
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
|
10 |
-
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
11 |
-
load_dotenv()
|
12 |
|
|
|
|
|
13 |
|
14 |
class ClovaSTT:
|
15 |
"""
|
@@ -20,8 +18,8 @@ class ClovaSTT:
|
|
20 |
"""
|
21 |
ํด๋ก๋ฐ STT ํด๋ผ์ด์ธํธ ์ด๊ธฐํ
|
22 |
"""
|
23 |
-
self.client_id =
|
24 |
-
self.client_secret =
|
25 |
|
26 |
# ํด๋ผ์ด์ธํธ ID์ Secret ๊ฒ์ฆ
|
27 |
if not self.client_id or not self.client_secret:
|
|
|
5 |
import json
|
6 |
import requests
|
7 |
import tempfile
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# config.py์์ ์ค์ ๊ฐ์ ธ์ค๊ธฐ
|
10 |
+
from config import NAVER_CLIENT_ID, NAVER_CLIENT_SECRET
|
11 |
|
12 |
class ClovaSTT:
|
13 |
"""
|
|
|
18 |
"""
|
19 |
ํด๋ก๋ฐ STT ํด๋ผ์ด์ธํธ ์ด๊ธฐํ
|
20 |
"""
|
21 |
+
self.client_id = NAVER_CLIENT_ID
|
22 |
+
self.client_secret = NAVER_CLIENT_SECRET
|
23 |
|
24 |
# ํด๋ผ์ด์ธํธ ID์ Secret ๊ฒ์ฆ
|
25 |
if not self.client_id or not self.client_secret:
|