Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import markdown
|
|
9 |
import tempfile
|
10 |
import base64
|
11 |
from datetime import datetime
|
|
|
12 |
|
13 |
# ๋ก๊น
์ค์
|
14 |
logging.basicConfig(
|
@@ -151,12 +152,52 @@ def extract_keywords(text: str, top_k: int = 5) -> str:
|
|
151 |
2) ๊ณต๋ฐฑ ๊ธฐ์ค ํ ํฐ ๋ถ๋ฆฌ
|
152 |
3) ์ต๋ top_k๊ฐ๋ง
|
153 |
"""
|
154 |
-
import re
|
155 |
text = re.sub(r"[^a-zA-Z0-9๊ฐ-ํฃ\s]", "", text)
|
156 |
tokens = text.split()
|
157 |
key_tokens = tokens[:top_k]
|
158 |
return " ".join(key_tokens)
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
# ์น ๊ฒ์ ํจ์
|
161 |
def do_web_search(query: str) -> str:
|
162 |
"""
|
@@ -171,9 +212,9 @@ def do_web_search(query: str) -> str:
|
|
171 |
"q": query,
|
172 |
"domain": "google.com",
|
173 |
"serp_type": "web", # ๊ธฐ๋ณธ ์น ๊ฒ์
|
174 |
-
"device": "desktop",
|
175 |
-
"lang": "
|
176 |
-
"num": "
|
177 |
}
|
178 |
|
179 |
headers = {
|
@@ -183,8 +224,13 @@ def do_web_search(query: str) -> str:
|
|
183 |
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
184 |
logging.info(f"์์ฒญ URL: {url} - ํ๋ผ๋ฏธํฐ: {params}")
|
185 |
|
186 |
-
#
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
188 |
response.raise_for_status()
|
189 |
|
190 |
logging.info(f"SerpHouse API ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
@@ -209,13 +255,13 @@ def do_web_search(query: str) -> str:
|
|
209 |
|
210 |
if not organic:
|
211 |
logging.warning("์๋ต์์ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
212 |
-
logging.debug(f"์๋ต ๊ตฌ์กฐ: {list(data.keys())}")
|
213 |
if isinstance(results, dict):
|
214 |
logging.debug(f"results ๊ตฌ์กฐ: {list(results.keys())}")
|
215 |
-
return "
|
216 |
|
217 |
# ๊ฒฐ๊ณผ ์ ์ ํ ๋ฐ ์ปจํ
์คํธ ๊ธธ์ด ์ต์ ํ
|
218 |
-
max_results = min(
|
219 |
limited_organic = organic[:max_results]
|
220 |
|
221 |
# ๊ฒฐ๊ณผ ํ์ ๊ฐ์ - ๋งํฌ๋ค์ด ํ์์ผ๋ก ์ถ๋ ฅํ์ฌ ๊ฐ๋
์ฑ ํฅ์
|
@@ -248,9 +294,12 @@ def do_web_search(query: str) -> str:
|
|
248 |
logging.info(f"๊ฒ์ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐ ์ฒ๋ฆฌ ์๋ฃ")
|
249 |
return search_results
|
250 |
|
|
|
|
|
|
|
251 |
except Exception as e:
|
252 |
logging.error(f"Web search failed: {e}")
|
253 |
-
return f"Web search failed: {str(e)}"
|
254 |
|
255 |
def chatbot_interface():
|
256 |
st.title("Ginigen Blog")
|
@@ -320,7 +369,6 @@ def chatbot_interface():
|
|
320 |
latest_blog = msg["content"]
|
321 |
|
322 |
# ํ์ดํ ์ถ์ถ ์๋ (์ฒซ ๋ฒ์งธ ์ ๋ชฉ ํ๊ทธ ์ฌ์ฉ)
|
323 |
-
import re
|
324 |
title_match = re.search(r'# (.*?)(\n|$)', latest_blog)
|
325 |
if title_match:
|
326 |
latest_blog_title = title_match.group(1).strip()
|
@@ -402,14 +450,20 @@ def chatbot_interface():
|
|
402 |
system_prompt = get_system_prompt()
|
403 |
if st.session_state.use_web_search:
|
404 |
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
|
414 |
# API ํธ์ถ
|
415 |
with client.messages.stream(
|
|
|
9 |
import tempfile
|
10 |
import base64
|
11 |
from datetime import datetime
|
12 |
+
import re
|
13 |
|
14 |
# ๋ก๊น
์ค์
|
15 |
logging.basicConfig(
|
|
|
152 |
2) ๊ณต๋ฐฑ ๊ธฐ์ค ํ ํฐ ๋ถ๋ฆฌ
|
153 |
3) ์ต๋ top_k๊ฐ๋ง
|
154 |
"""
|
|
|
155 |
text = re.sub(r"[^a-zA-Z0-9๊ฐ-ํฃ\s]", "", text)
|
156 |
tokens = text.split()
|
157 |
key_tokens = tokens[:top_k]
|
158 |
return " ".join(key_tokens)
|
159 |
|
160 |
+
# Mock ๊ฒ์ ๊ฒฐ๊ณผ ์์ฑ ํจ์ ์ถ๊ฐ (API ํค๊ฐ ์๊ฑฐ๋ ์ฐ๊ฒฐ ์คํจ ์ ๋์ฒด ์ฌ์ฉ)
|
161 |
+
def generate_mock_search_results(query):
|
162 |
+
"""API ์ฐ๊ฒฐ์ด ์๋ ๋ ์ฌ์ฉํ ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ ์์ฑ"""
|
163 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
164 |
+
mock_results = [
|
165 |
+
{
|
166 |
+
"title": f"{query}์ ๊ดํ ์ต์ ์ ๋ณด",
|
167 |
+
"link": "https://example.com/article1",
|
168 |
+
"snippet": f"{query}์ ๊ดํ ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ด ๊ฒฐ๊ณผ๋ API ์ฐ๊ฒฐ ๋ฌธ์ ๋ก ์ธํด ์์ฑ๋ ๊ฐ์ ๋ฐ์ดํฐ์
๋๋ค. ์ค์ ๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์๋์ ์ฐธ๊ณ ํ์ธ์. ์์ฑ ์๊ฐ: {current_time}",
|
169 |
+
"displayed_link": "example.com/article1"
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"title": f"{query} ๊ด๋ จ ์ฐ๊ตฌ ๋ํฅ",
|
173 |
+
"link": "https://example.org/research",
|
174 |
+
"snippet": "์ด๊ฒ์ API ์ฐ๊ฒฐ ๋ฌธ์ ๋ก ์ธํ ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ค์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ณด์ฌ๋๋ฆฌ์ง ๋ชปํด ์ฃ์กํฉ๋๋ค. ๋์ AI์ ๊ธฐ์กด ์ง์์ ํ์ฉํ์ฌ ๋ต๋ณ๋๋ฆฌ๊ฒ ์ต๋๋ค.",
|
175 |
+
"displayed_link": "example.org/research"
|
176 |
+
}
|
177 |
+
]
|
178 |
+
|
179 |
+
summary_lines = []
|
180 |
+
for idx, item in enumerate(mock_results, start=1):
|
181 |
+
title = item.get("title", "No title")
|
182 |
+
link = item.get("link", "#")
|
183 |
+
snippet = item.get("snippet", "No description")
|
184 |
+
displayed_link = item.get("displayed_link", link)
|
185 |
+
|
186 |
+
summary_lines.append(
|
187 |
+
f"### Result {idx}: {title}\n\n"
|
188 |
+
f"{snippet}\n\n"
|
189 |
+
f"**์ถ์ฒ**: [{displayed_link}]({link})\n\n"
|
190 |
+
f"---\n"
|
191 |
+
)
|
192 |
+
|
193 |
+
notice = """
|
194 |
+
# ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ (API ์ฐ๊ฒฐ ๋ฌธ์ ๋ก ์ธํด ์์ฑ๋จ)
|
195 |
+
์๋๋ API ์ฐ๊ฒฐ ๋ฌธ์ ๋ก ์ธํด ์์ฑ๋ ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ค์ ๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์๋์ ์ฐธ๊ณ ํ์ธ์.
|
196 |
+
๋์ AI์ ๊ธฐ์กด ์ง์์ ํ์ฉํ์ฌ ์ต๋ํ ์ ํํ ๋ต๋ณ์ ๋๋ฆฌ๊ฒ ์ต๋๋ค.
|
197 |
+
"""
|
198 |
+
|
199 |
+
return notice + "\n".join(summary_lines)
|
200 |
+
|
201 |
# ์น ๊ฒ์ ํจ์
|
202 |
def do_web_search(query: str) -> str:
|
203 |
"""
|
|
|
212 |
"q": query,
|
213 |
"domain": "google.com",
|
214 |
"serp_type": "web", # ๊ธฐ๋ณธ ์น ๊ฒ์
|
215 |
+
"device": "desktop",
|
216 |
+
"lang": "ko", # ํ๊ตญ์ด ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ํด ๋ณ๊ฒฝ
|
217 |
+
"num": "10" # ๊ฒฐ๊ณผ ์๋ฅผ 10๊ฐ๋ก ์ค์ (๋น ๋ฅธ ์๋ต์ ์ํด)
|
218 |
}
|
219 |
|
220 |
headers = {
|
|
|
224 |
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
225 |
logging.info(f"์์ฒญ URL: {url} - ํ๋ผ๋ฏธํฐ: {params}")
|
226 |
|
227 |
+
# Mock ์๋ต (์ค์ API ์ฐ๊ฒฐ์ด ์ด๋ ค์ธ ๊ฒฝ์ฐ)
|
228 |
+
if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
|
229 |
+
logging.warning("API ํค๊ฐ ์๊ฑฐ๋ Mock ๋ชจ๋์
๋๋ค. ๋ชจ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
230 |
+
return generate_mock_search_results(query)
|
231 |
+
|
232 |
+
# ํ์์์ ์ค์ (30์ด)
|
233 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
234 |
response.raise_for_status()
|
235 |
|
236 |
logging.info(f"SerpHouse API ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
|
|
255 |
|
256 |
if not organic:
|
257 |
logging.warning("์๋ต์์ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
258 |
+
logging.debug(f"์๋ต ๊ตฌ์กฐ: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
|
259 |
if isinstance(results, dict):
|
260 |
logging.debug(f"results ๊ตฌ์กฐ: {list(results.keys())}")
|
261 |
+
return "๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ๋์ ๊ธฐ์กด ์ง์์ ํ์ฉํ์ฌ ๋ต๋ณํ๊ฒ ์ต๋๋ค."
|
262 |
|
263 |
# ๊ฒฐ๊ณผ ์ ์ ํ ๋ฐ ์ปจํ
์คํธ ๊ธธ์ด ์ต์ ํ
|
264 |
+
max_results = min(10, len(organic))
|
265 |
limited_organic = organic[:max_results]
|
266 |
|
267 |
# ๊ฒฐ๊ณผ ํ์ ๊ฐ์ - ๋งํฌ๋ค์ด ํ์์ผ๋ก ์ถ๋ ฅํ์ฌ ๊ฐ๋
์ฑ ํฅ์
|
|
|
294 |
logging.info(f"๊ฒ์ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐ ์ฒ๋ฆฌ ์๋ฃ")
|
295 |
return search_results
|
296 |
|
297 |
+
except requests.exceptions.Timeout:
|
298 |
+
logging.error("Web search timed out")
|
299 |
+
return "Web search timed out. ๊ฒ์ ์๊ฐ์ด ์ด๊ณผ๋์์ต๋๋ค. ๊ธฐ์กด ์ง์์ ํ์ฉํ์ฌ ๋ต๋ณํ๊ฒ ์ต๋๋ค."
|
300 |
except Exception as e:
|
301 |
logging.error(f"Web search failed: {e}")
|
302 |
+
return f"Web search failed: {str(e)}. ๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๊ธฐ์กด ์ง์์ ํ์ฉํ์ฌ ๋ต๋ณํ๊ฒ ์ต๋๋ค."
|
303 |
|
304 |
def chatbot_interface():
|
305 |
st.title("Ginigen Blog")
|
|
|
369 |
latest_blog = msg["content"]
|
370 |
|
371 |
# ํ์ดํ ์ถ์ถ ์๋ (์ฒซ ๋ฒ์งธ ์ ๋ชฉ ํ๊ทธ ์ฌ์ฉ)
|
|
|
372 |
title_match = re.search(r'# (.*?)(\n|$)', latest_blog)
|
373 |
if title_match:
|
374 |
latest_blog_title = title_match.group(1).strip()
|
|
|
450 |
system_prompt = get_system_prompt()
|
451 |
if st.session_state.use_web_search:
|
452 |
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
453 |
+
try:
|
454 |
+
search_query = extract_keywords(prompt, top_k=5)
|
455 |
+
search_results = do_web_search(search_query)
|
456 |
+
|
457 |
+
if "search failed" not in search_results.lower() and "timed out" not in search_results.lower():
|
458 |
+
# ์์คํ
ํ๋กฌํํธ์ ๊ฒ์ ๊ฒฐ๊ณผ ์ถ๊ฐ
|
459 |
+
system_prompt += f"\n\n๊ฒ์ ๊ฒฐ๊ณผ:\n{search_results}\n"
|
460 |
+
st.success(f"๊ฒ์ ์๋ฃ: '{search_query}'์ ๋ํ ์ ๋ณด๋ฅผ ์์งํ์ต๋๋ค.")
|
461 |
+
else:
|
462 |
+
st.warning("์น ๊ฒ์์ ์งํํ์ผ๋ ๊ฒฐ๊ณผ๋ฅผ ๊ฐ์ ธ์ค๋๋ฐ ์คํจํ์ต๋๋ค. ๊ธฐ์กด ์ง์์ ํ์ฉํ์ฌ ๋ต๋ณํฉ๋๋ค.")
|
463 |
+
logging.warning(f"์น ๊ฒ์ ์คํจ ๋๋ ํ์์์: {search_results}")
|
464 |
+
except Exception as e:
|
465 |
+
st.error(f"์น ๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}")
|
466 |
+
logging.error(f"์น ๊ฒ์ ์ค๋ฅ: {str(e)}")
|
467 |
|
468 |
# API ํธ์ถ
|
469 |
with client.messages.stream(
|