Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,8 @@ import tempfile
|
|
10 |
import base64
|
11 |
from datetime import datetime
|
12 |
import re
|
13 |
-
from bs4 import BeautifulSoup # BeautifulSoup
|
|
|
14 |
|
15 |
# ๋ก๊น
์ค์
|
16 |
logging.basicConfig(
|
@@ -34,23 +35,53 @@ def get_system_prompt():
|
|
34 |
return """
|
35 |
๋น์ ์ ์ ๋ฌธ ๋ธ๋ก๊ทธ ์์ฑ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ชจ๋ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์์ฒญ์ ๋ํด ๋ค์์ 8๋จ๊ณ ํ๋ ์์ํฌ๋ฅผ ์ฒ ์ ํ ๋ฐ๋ฅด๋, ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๊ธ์ด ๋๋๋ก ์์ฑํด์ผ ํฉ๋๋ค:
|
36 |
|
37 |
-
๋
์ ์ฐ๊ฒฐ ๋จ๊ณ
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
์ด๋ฌํ ํ๋ ์์ํฌ๋ฅผ ๋ฐํ์ผ๋ก, ์์ฒญ๋ฐ์ ์ฃผ์ ์ ๋ํด ์ฒด๊ณ์ ์ด๊ณ ๋งค๋ ฅ์ ์ธ ๋ธ๋ก๊ทธ ํฌ์คํธ๋ฅผ ์์ฑํ๊ฒ ์ต๋๋ค.
|
56 |
"""
|
@@ -205,254 +236,64 @@ def generate_mock_search_results(query):
|
|
205 |
|
206 |
return notice + "\n".join(summary_lines)
|
207 |
|
208 |
-
# Google ๊ฒ์ ํจ์ (SerpAPI ๋์ ์ง์ ๊ฒ์)
|
209 |
-
# Google ๊ฒ์ ํจ์ (BeautifulSoup์ ์ฌ์ฉํ์ฌ ๊ฒฐ๊ณผ ํ์ฑ)
|
210 |
-
# Google ๊ฒ์ ํจ์ ๊ฐ์
|
211 |
-
def do_google_search(query, num_results=5):
|
212 |
-
try:
|
213 |
-
# ๋ค์ํ User-Agent ์ฌ์ฉ (Google ์ฐจ๋จ ๋ฐฉ์ง)
|
214 |
-
headers = {
|
215 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
216 |
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
217 |
-
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
218 |
-
'Referer': 'https://www.google.com/',
|
219 |
-
}
|
220 |
-
|
221 |
-
# ๊ฒ์ URL
|
222 |
-
search_url = f"https://www.google.com/search?q={query}&num={num_results}&hl=ko&gl=kr"
|
223 |
-
logging.info(f"๊ตฌ๊ธ ๊ฒ์ URL: {search_url}")
|
224 |
-
|
225 |
-
# ์์ฒญ ๋ณด๋ด๊ธฐ
|
226 |
-
response = requests.get(search_url, headers=headers, timeout=10)
|
227 |
-
|
228 |
-
# ์๋ต์ด ์ฑ๊ณต์ ์ธ์ง ํ์ธ
|
229 |
-
if response.status_code != 200:
|
230 |
-
logging.error(f"Google ๊ฒ์ ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
231 |
-
return generate_mock_search_results(query)
|
232 |
-
|
233 |
-
# HTML ํ์ฑ์ ์ํ ๋๋ฒ๊น
|
234 |
-
with open("google_response.html", "w", encoding="utf-8") as f:
|
235 |
-
f.write(response.text)
|
236 |
-
logging.info("Google ์๋ต HTML์ 'google_response.html'์ ์ ์ฅํ์ต๋๋ค.")
|
237 |
-
|
238 |
-
# BeautifulSoup์ผ๋ก HTML ํ์ฑ
|
239 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
240 |
-
|
241 |
-
# ๊ฒ์ ๊ฒฐ๊ณผ ์ถ์ถ
|
242 |
-
organic_results = []
|
243 |
-
|
244 |
-
# 2023-2025 ๊ตฌ๊ธ ๊ฒ์ ๊ฒฐ๊ณผ ์ ํ์ ์๋ (๋ค์ํ ์ ํ์ ์๋)
|
245 |
-
selectors = [
|
246 |
-
'div.g', 'div.Gx5Zad', 'div.tF2Cxc', 'div.yuRUbf',
|
247 |
-
'.v5yQqb', '.MjjYud', 'div[jscontroller]', '.fP1Qef',
|
248 |
-
'div[data-sokoban-container]', '.hlcw0c'
|
249 |
-
]
|
250 |
-
|
251 |
-
# ๋ชจ๋ ์ ํ์ ์๋
|
252 |
-
containers = []
|
253 |
-
for selector in selectors:
|
254 |
-
elements = soup.select(selector)
|
255 |
-
if elements:
|
256 |
-
logging.info(f"์ ํ์ '{selector}'๋ก {len(elements)}๊ฐ ์์๋ฅผ ์ฐพ์์ต๋๋ค.")
|
257 |
-
containers.extend(elements)
|
258 |
-
# ์ถฉ๋ถํ ์์๋ฅผ ์ฐพ์์ผ๋ฉด ์ค๋จ
|
259 |
-
if len(containers) >= num_results * 2: # ์ค๋ณต ๊ฐ๋ฅ์ฑ ๊ณ ๋ คํด 2๋ฐฐ๋ก ์ฐพ์
|
260 |
-
break
|
261 |
-
|
262 |
-
# ๋์: ๋ชจ๋ <a> ํ๊ทธ์์ href๊ฐ http๋ก ์์ํ๋ ๊ฒ๋ง ์ ํ
|
263 |
-
if not containers:
|
264 |
-
logging.warning("๊ตฌ์กฐํ๋ ์ปจํ
์ด๋๋ฅผ ์ฐพ์ ์ ์์ด ์ง์ ๋งํฌ๋ฅผ ๊ฒ์ํฉ๋๋ค.")
|
265 |
-
link_elements = soup.select('a[href^="http"]')
|
266 |
-
|
267 |
-
for link in link_elements:
|
268 |
-
if len(organic_results) >= num_results:
|
269 |
-
break
|
270 |
-
|
271 |
-
href = link.get('href', '')
|
272 |
-
# Google ๋ฆฌ๋ค์ด๋ ํธ URL ํํฐ๋ง
|
273 |
-
if '/url?' in href or 'google.com' in href:
|
274 |
-
continue
|
275 |
-
|
276 |
-
# ๋ถ๋ชจ ์์์์ ํ
์คํธ ์ถ์ถ ์๋
|
277 |
-
parent = link.parent
|
278 |
-
title = link.get_text(strip=True) or "์ ๋ชฉ ์์"
|
279 |
-
|
280 |
-
# ์ถฉ๋ถํ ๊ธธ์ด์ ํ
์คํธ๊ฐ ์๋ ๊ฒฝ์ฐ๋ง ๊ฒฐ๊ณผ๋ก ์ถ๊ฐ
|
281 |
-
if len(title) > 5: # ์๋ฏธ ์๋ ์ ๋ชฉ์ ๋ณดํต 5์ ์ด์
|
282 |
-
# ์ฃผ๋ณ ํ
์คํธ ์ถ์ถ ์๋
|
283 |
-
surrounding_text = ""
|
284 |
-
for sibling in parent.next_siblings:
|
285 |
-
if sibling.name and sibling.get_text(strip=True):
|
286 |
-
surrounding_text = sibling.get_text(strip=True)
|
287 |
-
break
|
288 |
-
|
289 |
-
# ๊ฒฐ๊ณผ ์ถ๊ฐ
|
290 |
-
organic_results.append({
|
291 |
-
"title": title,
|
292 |
-
"link": href,
|
293 |
-
"snippet": surrounding_text or "์ถ๊ฐ ์ค๋ช
์์",
|
294 |
-
"displayed_link": href.split('/')[2] if '/' in href else href
|
295 |
-
})
|
296 |
-
|
297 |
-
# ์ ํ์ ๊ธฐ๋ฐ ํ์ฑ ์๋
|
298 |
-
else:
|
299 |
-
for container in containers:
|
300 |
-
if len(organic_results) >= num_results:
|
301 |
-
break
|
302 |
-
|
303 |
-
# 1. ๋งํฌ ์ฐพ๊ธฐ
|
304 |
-
link_element = container.select_one('a[href^="http"]')
|
305 |
-
if not link_element:
|
306 |
-
continue
|
307 |
-
|
308 |
-
link = link_element.get('href', '')
|
309 |
-
# Google์ ๋ฆฌ๋ค์ด๋ ํธ URL์ด๋ฉด ๊ฑด๋๋ฐ๊ธฐ
|
310 |
-
if '/url?' in link or 'google.com' in link:
|
311 |
-
continue
|
312 |
-
|
313 |
-
# 2. ์ ๋ชฉ ์ฐพ๊ธฐ (๋ค์ํ ์ ํ์)
|
314 |
-
title_selectors = ['h3', '.LC20lb', '.DKV0Md', '.l', '.vvjwJb']
|
315 |
-
title = None
|
316 |
-
for selector in title_selectors:
|
317 |
-
title_element = container.select_one(selector)
|
318 |
-
if title_element and title_element.get_text(strip=True):
|
319 |
-
title = title_element.get_text(strip=True)
|
320 |
-
break
|
321 |
-
|
322 |
-
if not title:
|
323 |
-
title = link_element.get_text(strip=True) or "์ ๋ชฉ ์์"
|
324 |
-
|
325 |
-
# 3. ์ค๋ํซ ์ฐพ๊ธฐ (๋ค์ํ ์ ํ์)
|
326 |
-
snippet_selectors = ['.VwiC3b', '.lyLwlc', '.yXK7lf', '.lEBKkf', '.s', '.st']
|
327 |
-
snippet = "์ค๋ช
์์"
|
328 |
-
for selector in snippet_selectors:
|
329 |
-
snippet_element = container.select_one(selector)
|
330 |
-
if snippet_element and snippet_element.get_text(strip=True):
|
331 |
-
snippet = snippet_element.get_text(strip=True)
|
332 |
-
break
|
333 |
-
|
334 |
-
# 4. ํ์ ๋งํฌ ์ฐพ๊ธฐ
|
335 |
-
displayed_link_selectors = ['cite', '.UPmit', '.qLRx3b', '.iUh30']
|
336 |
-
displayed_link = link.split('/')[2] if '/' in link else link
|
337 |
-
for selector in displayed_link_selectors:
|
338 |
-
element = container.select_one(selector)
|
339 |
-
if element and element.get_text(strip=True):
|
340 |
-
displayed_link = element.get_text(strip=True)
|
341 |
-
break
|
342 |
-
|
343 |
-
# ์ค๋ณต ์ ๊ฑฐ
|
344 |
-
is_duplicate = False
|
345 |
-
for result in organic_results:
|
346 |
-
if result["link"] == link or result["title"] == title:
|
347 |
-
is_duplicate = True
|
348 |
-
break
|
349 |
-
|
350 |
-
if not is_duplicate:
|
351 |
-
organic_results.append({
|
352 |
-
"title": title,
|
353 |
-
"link": link,
|
354 |
-
"snippet": snippet,
|
355 |
-
"displayed_link": displayed_link
|
356 |
-
})
|
357 |
-
|
358 |
-
if not organic_results:
|
359 |
-
logging.warning("๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ํ์ฑํ ์ ์์ต๋๋ค. ๊ฐ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
360 |
-
return generate_mock_search_results(query)
|
361 |
-
|
362 |
-
# ๊ฒ์ ๊ฒฐ๊ณผ ๋งํฌ๋ค์ด ํ์์ผ๋ก ๋ณํ
|
363 |
-
summary_lines = []
|
364 |
-
for idx, item in enumerate(organic_results, start=1):
|
365 |
-
title = item.get("title", "No title")
|
366 |
-
link = item.get("link", "#")
|
367 |
-
snippet = item.get("snippet", "No description")
|
368 |
-
displayed_link = item.get("displayed_link", link)
|
369 |
-
|
370 |
-
summary_lines.append(
|
371 |
-
f"### Result {idx}: {title}\n\n"
|
372 |
-
f"{snippet}\n\n"
|
373 |
-
f"**์ถ์ฒ**: [{displayed_link}]({link})\n\n"
|
374 |
-
f"---\n"
|
375 |
-
)
|
376 |
-
|
377 |
-
# ๋ชจ๋ธ์๊ฒ ๋ช
ํํ ์ง์นจ ์ถ๊ฐ
|
378 |
-
instructions = """
|
379 |
-
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
380 |
-
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
381 |
-
1. ๊ฐ ๊ฒฐ๊ณผ์ ์ ๋ชฉ, ๋ด์ฉ, ์ถ์ฒ ๋งํฌ๋ฅผ ์ฐธ๊ณ ํ์ธ์
|
382 |
-
2. ๋ต๋ณ์ ๊ด๋ จ ์ ๋ณด์ ์ถ์ฒ๋ฅผ ๋ช
์์ ์ผ๋ก ์ธ์ฉํ์ธ์ (์: "X ์ถ์ฒ์ ๋ฐ๋ฅด๋ฉด...")
|
383 |
-
3. ์๋ต์ ์ค์ ์ถ์ฒ ๋งํฌ๋ฅผ ํฌํจํ์ธ์
|
384 |
-
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
385 |
-
"""
|
386 |
-
|
387 |
-
search_results = instructions + "\n".join(summary_lines)
|
388 |
-
logging.info(f"Google ๊ฒ์ ๊ฒฐ๊ณผ {len(organic_results)}๊ฐ ํ์ฑ ์๋ฃ")
|
389 |
-
return search_results
|
390 |
-
|
391 |
-
except Exception as e:
|
392 |
-
logging.error(f"Google ๊ฒ์ ์คํจ: {e}")
|
393 |
-
return generate_mock_search_results(query)
|
394 |
|
395 |
-
|
|
|
|
|
396 |
def do_web_search(query: str) -> str:
|
397 |
"""
|
398 |
-
์น ๊ฒ์์
|
|
|
|
|
|
|
399 |
"""
|
|
|
|
|
|
|
|
|
|
|
400 |
try:
|
401 |
-
# API
|
402 |
-
if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
|
403 |
-
logging.warning("API ํค๊ฐ ์๊ฑฐ๋ Mock ๋ชจ๋์
๋๋ค. ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
404 |
-
return generate_mock_search_results(query)
|
405 |
-
|
406 |
-
# SerpHouse API ์ฌ์ฉ
|
407 |
url = "https://api.serphouse.com/serp/live"
|
408 |
params = {
|
409 |
"q": query,
|
410 |
"domain": "google.com",
|
411 |
-
"serp_type": "web",
|
412 |
-
"device": "desktop",
|
413 |
-
"lang": "ko", # ํ๊ตญ์ด
|
414 |
-
"num": "5" # ๊ฒฐ๊ณผ
|
415 |
}
|
416 |
-
|
417 |
headers = {
|
418 |
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
|
419 |
}
|
420 |
|
421 |
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
422 |
-
|
423 |
-
# ์งง์ ํ์์์์ผ๋ก ์์ฒญ ์๋
|
424 |
response = requests.get(url, headers=headers, params=params, timeout=15)
|
425 |
response.raise_for_status()
|
426 |
|
427 |
-
logging.info(f"SerpHouse API ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
428 |
data = response.json()
|
429 |
|
430 |
-
#
|
431 |
results = data.get("results", {})
|
432 |
organic = None
|
433 |
|
434 |
-
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ
|
435 |
if isinstance(results, dict) and "organic" in results:
|
436 |
organic = results["organic"]
|
437 |
-
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 2
|
438 |
-
elif isinstance(results, dict) and "results" in results:
|
439 |
-
if isinstance(results["results"], dict) and "organic" in results["results"]:
|
440 |
-
organic = results["results"]["organic"]
|
441 |
-
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 3
|
442 |
elif "organic" in data:
|
443 |
organic = data["organic"]
|
444 |
-
|
|
|
445 |
if not organic:
|
446 |
-
logging.warning("์๋ต์์ organic
|
447 |
-
return
|
448 |
-
|
449 |
-
#
|
450 |
-
|
451 |
-
limited_organic = organic[:max_results]
|
452 |
|
453 |
-
# ๊ฒฐ๊ณผ
|
454 |
summary_lines = []
|
455 |
-
for idx, item in enumerate(
|
456 |
title = item.get("title", "No title")
|
457 |
link = item.get("link", "#")
|
458 |
snippet = item.get("snippet", "No description")
|
@@ -465,7 +306,6 @@ def do_web_search(query: str) -> str:
|
|
465 |
f"---\n"
|
466 |
)
|
467 |
|
468 |
-
# ๋ชจ๋ธ์๊ฒ ๋ช
ํํ ์ง์นจ ์ถ๊ฐ
|
469 |
instructions = """
|
470 |
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
471 |
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
@@ -475,16 +315,15 @@ def do_web_search(query: str) -> str:
|
|
475 |
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
476 |
"""
|
477 |
|
478 |
-
|
479 |
-
|
480 |
-
return search_results
|
481 |
-
|
482 |
except requests.exceptions.Timeout:
|
483 |
-
logging.error("
|
484 |
-
return
|
485 |
except Exception as e:
|
486 |
-
logging.error(f"
|
487 |
-
return
|
|
|
488 |
|
489 |
def chatbot_interface():
|
490 |
st.title("Ginigen Blog")
|
@@ -631,7 +470,7 @@ def chatbot_interface():
|
|
631 |
message_placeholder = st.empty()
|
632 |
full_response = ""
|
633 |
|
634 |
-
# ์น ๊ฒ์ ์ํ (
|
635 |
system_prompt = get_system_prompt()
|
636 |
if st.session_state.use_web_search:
|
637 |
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
@@ -639,7 +478,7 @@ def chatbot_interface():
|
|
639 |
search_query = extract_keywords(prompt, top_k=5)
|
640 |
st.info(f"๊ฒ์์ด: {search_query}")
|
641 |
|
642 |
-
#
|
643 |
search_results = do_web_search(search_query)
|
644 |
|
645 |
if "๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ" in search_results:
|
@@ -773,4 +612,4 @@ if __name__ == "__main__":
|
|
773 |
f.write("markdown>=3.5.1\n")
|
774 |
f.write("pillow>=10.1.0\n")
|
775 |
|
776 |
-
main()
|
|
|
10 |
import base64
|
11 |
from datetime import datetime
|
12 |
import re
|
13 |
+
from bs4 import BeautifulSoup # BeautifulSoup๋ ์ด์ ์ฌ์ฉํ์ง ์์ง๋ง, ํ์ ์ ์ ์ง
|
14 |
+
# (์ง์ ๊ตฌ๊ธ ๊ฒ์ ๋ก์ง์ ์ญ์ ํ์ผ๋ฏ๋ก ์ฌ์ค์ BeautifulSoup๋ ํ์ ์์ต๋๋ค.)
|
15 |
|
16 |
# ๋ก๊น
์ค์
|
17 |
logging.basicConfig(
|
|
|
35 |
return """
|
36 |
๋น์ ์ ์ ๋ฌธ ๋ธ๋ก๊ทธ ์์ฑ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ชจ๋ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์์ฒญ์ ๋ํด ๋ค์์ 8๋จ๊ณ ํ๋ ์์ํฌ๋ฅผ ์ฒ ์ ํ ๋ฐ๋ฅด๋, ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๊ธ์ด ๋๋๋ก ์์ฑํด์ผ ํฉ๋๋ค:
|
37 |
|
38 |
+
๋
์ ์ฐ๊ฒฐ ๋จ๊ณ
|
39 |
+
1.1. ๊ณต๊ฐ๋ ํ์ฑ์ ์ํ ์น๊ทผํ ์ธ์ฌ
|
40 |
+
1.2. ๋
์์ ์ค์ ๊ณ ๋ฏผ์ ๋ฐ์ํ ๋์
์ง๋ฌธ
|
41 |
+
1.3. ์ฃผ์ ์ ๋ํ ์ฆ๊ฐ์ ๊ด์ฌ ์ ๋
|
42 |
+
|
43 |
+
๋ฌธ์ ์ ์ ๋จ๊ณ
|
44 |
+
2.1. ๋
์์ ํ์ธํฌ์ธํธ ๊ตฌ์ฒดํ
|
45 |
+
2.2. ๋ฌธ์ ์ ์๊ธ์ฑ๊ณผ ์ํฅ๋ ๋ถ์
|
46 |
+
2.3. ํด๊ฒฐ ํ์์ฑ์ ๋ํ ๊ณต๊ฐ๋ ํ์ฑ
|
47 |
+
|
48 |
+
์ ๋ฌธ์ฑ ์
์ฆ ๋จ๊ณ
|
49 |
+
3.1. ๊ฐ๊ด์ ๋ฐ์ดํฐ ๊ธฐ๋ฐ ๋ถ์
|
50 |
+
3.2. ์ ๋ฌธ๊ฐ ๊ฒฌํด์ ์ฐ๊ตฌ ๊ฒฐ๊ณผ ์ธ์ฉ
|
51 |
+
3.3. ์ค์ ์ฌ๋ก๋ฅผ ํตํ ๋ฌธ์ ๊ตฌ์ฒดํ
|
52 |
+
|
53 |
+
์๋ฃจ์
์ ๊ณต ๋จ๊ณ
|
54 |
+
4.1. ๋จ๊ณ๋ณ ์ค์ฒ ๊ฐ์ด๋๋ผ์ธ ์ ์
|
55 |
+
4.2. ์ฆ์ ์ ์ฉ ๊ฐ๋ฅํ ๊ตฌ์ฒด์ ํ
|
56 |
+
4.3. ์์ ์ฅ์ ๋ฌผ๊ณผ ๊ทน๋ณต ๋ฐฉ์ ํฌํจ
|
57 |
+
|
58 |
+
์ ๋ขฐ๋ ๊ฐํ ๋จ๊ณ
|
59 |
+
5.1. ์ค์ ์ฑ๊ณต ์ฌ๋ก ์ ์
|
60 |
+
5.2. ๊ตฌ์ฒด์ ์ฌ์ฉ์ ํ๊ธฐ ์ธ์ฉ
|
61 |
+
5.3. ๊ฐ๊ด์ ๋ฐ์ดํฐ๋ก ํจ๊ณผ ์
์ฆ
|
62 |
+
|
63 |
+
ํ๋ ์ ๋ ๋จ๊ณ
|
64 |
+
6.1. ๋ช
ํํ ์ฒซ ์ค์ฒ ๋จ๊ณ ์ ์
|
65 |
+
6.2. ์๊ธ์ฑ์ ๊ฐ์กฐํ ํ๋ ์ด๊ตฌ
|
66 |
+
6.3. ์ค์ฒ ๋๊ธฐ ๋ถ์ฌ ์์ ํฌํจ
|
67 |
+
|
68 |
+
์ง์ ์ฑ ๊ฐํ ๋จ๊ณ
|
69 |
+
7.1. ์๋ฃจ์
์ ํ๊ณ ํฌ๋ช
ํ๊ฒ ๊ณต๊ฐ
|
70 |
+
7.2. ๊ฐ์ธ๋ณ ์ฐจ์ด ์กด์ฌ ์ธ์
|
71 |
+
7.3. ํ์ ์กฐ๊ฑด๊ณผ ์ฃผ์์ฌํญ ๋ช
์
|
72 |
+
|
73 |
+
๊ด๊ณ ์ง์ ๋จ๊ณ
|
74 |
+
8.1. ์ง์ ์ฑ ์๋ ๊ฐ์ฌ ์ธ์ฌ
|
75 |
+
8.2. ๋ค์ ์ปจํ
์ธ ์๊ณ ๋ก ๊ธฐ๋๊ฐ ์กฐ์ฑ
|
76 |
+
8.3. ์ํต ์ฑ๋ ์๋ด
|
77 |
+
|
78 |
+
์์ฑ ์ ์ค์์ฌํญ
|
79 |
+
9.1. ๊ธ์ ์: 1500-2000์ ๋ด์ธ
|
80 |
+
9.2. ๋ฌธ๋จ ๊ธธ์ด: 3-4๋ฌธ์ฅ ์ด๋ด
|
81 |
+
9.3. ์๊ฐ์ ๊ตฌ๋ถ: ์์ ๋ชฉ, ๊ตฌ๋ถ์ , ๋ฒํธ ๋ชฉ๋ก ํ์ฉ
|
82 |
+
9.4. ํค์ค๋งค๋: ์น๊ทผํ๊ณ ์ ๋ฌธ์ ์ธ ๋ํ์ฒด
|
83 |
+
9.5. ๋ฐ์ดํฐ: ๋ชจ๋ ์ ๋ณด์ ์ถ์ฒ ๋ช
์
|
84 |
+
9.6. ๊ฐ๋
์ฑ: ๋ช
ํํ ๋จ๋ฝ ๊ตฌ๋ถ๊ณผ ๊ฐ์กฐ์ ์ฌ์ฉ
|
85 |
|
86 |
์ด๋ฌํ ํ๋ ์์ํฌ๋ฅผ ๋ฐํ์ผ๋ก, ์์ฒญ๋ฐ์ ์ฃผ์ ์ ๋ํด ์ฒด๊ณ์ ์ด๊ณ ๋งค๋ ฅ์ ์ธ ๋ธ๋ก๊ทธ ํฌ์คํธ๋ฅผ ์์ฑํ๊ฒ ์ต๋๋ค.
|
87 |
"""
|
|
|
236 |
|
237 |
return notice + "\n".join(summary_lines)
|
238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
+
###################################################
|
241 |
+
# SerpHouse ๋ง ์ฌ์ฉํ๋ ์น ๊ฒ์ ํจ์
|
242 |
+
###################################################
|
243 |
def do_web_search(query: str) -> str:
|
244 |
"""
|
245 |
+
์น ๊ฒ์์ **SerpHouse**๋ก๋ง ์ํํ๋ ํจ์.
|
246 |
+
- SERPHOUSE_API_KEY๊ฐ ์๊ฑฐ๋ mock์ผ ๊ฒฝ์ฐ์๋ ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ
|
247 |
+
- API ํธ์ถ์ด ์ฑ๊ณตํ๋ฉด ๊ทธ ๊ฒฐ๊ณผ๋ฅผ ํ์ฑํ์ฌ ๋งํฌ๋ค์ด์ผ๋ก ๋ฐํ
|
248 |
+
- ์คํจํ๋ฉด ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ
|
249 |
"""
|
250 |
+
# 1) API ํค ์ ํจ์ฑ ์ฒดํฌ
|
251 |
+
if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
|
252 |
+
logging.warning("API ํค๊ฐ ์๊ฑฐ๋ Mock ๋ชจ๋์
๋๋ค. => ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ ๋ฐํ")
|
253 |
+
return generate_mock_search_results(query)
|
254 |
+
|
255 |
try:
|
256 |
+
# SerpHouse API
|
|
|
|
|
|
|
|
|
|
|
257 |
url = "https://api.serphouse.com/serp/live"
|
258 |
params = {
|
259 |
"q": query,
|
260 |
"domain": "google.com",
|
261 |
+
"serp_type": "web", # ์น ๊ฒ์
|
262 |
+
"device": "desktop",
|
263 |
+
"lang": "ko", # ํ๊ตญ์ด
|
264 |
+
"num": "5" # ๊ฒฐ๊ณผ ์ต๋ 5๊ฐ
|
265 |
}
|
|
|
266 |
headers = {
|
267 |
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
|
268 |
}
|
269 |
|
270 |
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
|
|
|
|
271 |
response = requests.get(url, headers=headers, params=params, timeout=15)
|
272 |
response.raise_for_status()
|
273 |
|
|
|
274 |
data = response.json()
|
275 |
|
276 |
+
# ๊ฒฐ๊ณผ ๊ตฌ์กฐ ํ์
|
277 |
results = data.get("results", {})
|
278 |
organic = None
|
279 |
|
280 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ ํ์ธ
|
281 |
if isinstance(results, dict) and "organic" in results:
|
282 |
organic = results["organic"]
|
|
|
|
|
|
|
|
|
|
|
283 |
elif "organic" in data:
|
284 |
organic = data["organic"]
|
285 |
+
|
286 |
+
# organic ๊ฒฐ๊ณผ๊ฐ ์๋ค๋ฉด ๊ฐ์ ๊ฒฐ๊ณผ
|
287 |
if not organic:
|
288 |
+
logging.warning("SerpHouse ์๋ต์์ organic ํญ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
289 |
+
return generate_mock_search_results(query)
|
290 |
+
|
291 |
+
# ์ต๋ 5๊ฐ๋ง ์ฌ๋ผ์ด์ฑ
|
292 |
+
organic = organic[:5]
|
|
|
293 |
|
294 |
+
# ๊ฒฐ๊ณผ ์์ฝ ๋ฌธ์์ด ๋ง๋ค๊ธฐ
|
295 |
summary_lines = []
|
296 |
+
for idx, item in enumerate(organic, start=1):
|
297 |
title = item.get("title", "No title")
|
298 |
link = item.get("link", "#")
|
299 |
snippet = item.get("snippet", "No description")
|
|
|
306 |
f"---\n"
|
307 |
)
|
308 |
|
|
|
309 |
instructions = """
|
310 |
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
311 |
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
|
|
315 |
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
316 |
"""
|
317 |
|
318 |
+
return instructions + "\n".join(summary_lines)
|
319 |
+
|
|
|
|
|
320 |
except requests.exceptions.Timeout:
|
321 |
+
logging.error("SerpHouse ๊ฒ์ ํ์์์. => ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
322 |
+
return generate_mock_search_results(query)
|
323 |
except Exception as e:
|
324 |
+
logging.error(f"SerpHouse ๊ฒ์ ์คํจ: {e}")
|
325 |
+
return generate_mock_search_results(query)
|
326 |
+
|
327 |
|
328 |
def chatbot_interface():
|
329 |
st.title("Ginigen Blog")
|
|
|
470 |
message_placeholder = st.empty()
|
471 |
full_response = ""
|
472 |
|
473 |
+
# ์น ๊ฒ์ ์ํ (์ต์
์ด ์ผ์ ธ ์์ ๊ฒฝ์ฐ)
|
474 |
system_prompt = get_system_prompt()
|
475 |
if st.session_state.use_web_search:
|
476 |
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
|
|
478 |
search_query = extract_keywords(prompt, top_k=5)
|
479 |
st.info(f"๊ฒ์์ด: {search_query}")
|
480 |
|
481 |
+
# SerpHouse API๋ก ๊ฒ์
|
482 |
search_results = do_web_search(search_query)
|
483 |
|
484 |
if "๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ" in search_results:
|
|
|
612 |
f.write("markdown>=3.5.1\n")
|
613 |
f.write("pillow>=10.1.0\n")
|
614 |
|
615 |
+
main()
|