ginipick commited on
Commit
3f26784
ยท
verified ยท
1 Parent(s): 1326112

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -248
app.py CHANGED
@@ -10,7 +10,8 @@ import tempfile
10
  import base64
11
  from datetime import datetime
12
  import re
13
- from bs4 import BeautifulSoup # BeautifulSoup ์ถ”๊ฐ€
 
14
 
15
  # ๋กœ๊น… ์„ค์ •
16
  logging.basicConfig(
@@ -34,23 +35,53 @@ def get_system_prompt():
34
  return """
35
  ๋‹น์‹ ์€ ์ „๋ฌธ ๋ธ”๋กœ๊ทธ ์ž‘์„ฑ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ๋ชจ๋“  ๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ์š”์ฒญ์— ๋Œ€ํ•ด ๋‹ค์Œ์˜ 8๋‹จ๊ณ„ ํ”„๋ ˆ์ž„์›Œํฌ๋ฅผ ์ฒ ์ €ํžˆ ๋”ฐ๋ฅด๋˜, ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๊ธ€์ด ๋˜๋„๋ก ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
36
 
37
- ๋…์ž ์—ฐ๊ฒฐ ๋‹จ๊ณ„ 1.1. ๊ณต๊ฐ๋Œ€ ํ˜•์„ฑ์„ ์œ„ํ•œ ์นœ๊ทผํ•œ ์ธ์‚ฌ 1.2. ๋…์ž์˜ ์‹ค์ œ ๊ณ ๋ฏผ์„ ๋ฐ˜์˜ํ•œ ๋„์ž… ์งˆ๋ฌธ 1.3. ์ฃผ์ œ์— ๋Œ€ํ•œ ์ฆ‰๊ฐ์  ๊ด€์‹ฌ ์œ ๋„
38
-
39
- ๋ฌธ์ œ ์ •์˜ ๋‹จ๊ณ„ 2.1. ๋…์ž์˜ ํŽ˜์ธํฌ์ธํŠธ ๊ตฌ์ฒดํ™” 2.2. ๋ฌธ์ œ์˜ ์‹œ๊ธ‰์„ฑ๊ณผ ์˜ํ–ฅ๋„ ๋ถ„์„ 2.3. ํ•ด๊ฒฐ ํ•„์š”์„ฑ์— ๋Œ€ํ•œ ๊ณต๊ฐ๋Œ€ ํ˜•์„ฑ
40
-
41
- ์ „๋ฌธ์„ฑ ์ž…์ฆ ๋‹จ๊ณ„ 3.1. ๊ฐ๊ด€์  ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ ๋ถ„์„ 3.2. ์ „๋ฌธ๊ฐ€ ๊ฒฌํ•ด์™€ ์—ฐ๊ตฌ ๊ฒฐ๊ณผ ์ธ์šฉ 3.3. ์‹ค์ œ ์‚ฌ๋ก€๋ฅผ ํ†ตํ•œ ๋ฌธ์ œ ๊ตฌ์ฒดํ™”
42
-
43
- ์†”๋ฃจ์…˜ ์ œ๊ณต ๋‹จ๊ณ„ 4.1. ๋‹จ๊ณ„๋ณ„ ์‹ค์ฒœ ๊ฐ€์ด๋“œ๋ผ์ธ ์ œ์‹œ 4.2. ์ฆ‰์‹œ ์ ์šฉ ๊ฐ€๋Šฅํ•œ ๊ตฌ์ฒด์  ํŒ 4.3. ์˜ˆ์ƒ ์žฅ์• ๋ฌผ๊ณผ ๊ทน๋ณต ๋ฐฉ์•ˆ ํฌํ•จ
44
-
45
- ์‹ ๋ขฐ๋„ ๊ฐ•ํ™” ๋‹จ๊ณ„ 5.1. ์‹ค์ œ ์„ฑ๊ณต ์‚ฌ๋ก€ ์ œ์‹œ 5.2. ๊ตฌ์ฒด์  ์‚ฌ์šฉ์ž ํ›„๊ธฐ ์ธ์šฉ 5.3. ๊ฐ๊ด€์  ๋ฐ์ดํ„ฐ๋กœ ํšจ๊ณผ ์ž…์ฆ
46
-
47
- ํ–‰๋™ ์œ ๋„ ๋‹จ๊ณ„ 6.1. ๋ช…ํ™•ํ•œ ์ฒซ ์‹ค์ฒœ ๋‹จ๊ณ„ ์ œ์‹œ 6.2. ์‹œ๊ธ‰์„ฑ์„ ๊ฐ•์กฐํ•œ ํ–‰๋™ ์ด‰๊ตฌ 6.3. ์‹ค์ฒœ ๋™๊ธฐ ๋ถ€์—ฌ ์š”์†Œ ํฌํ•จ
48
-
49
- ์ง„์ •์„ฑ ๊ฐ•ํ™” ๋‹จ๊ณ„ 7.1. ์†”๋ฃจ์…˜์˜ ํ•œ๊ณ„ ํˆฌ๋ช…ํ•˜๊ฒŒ ๊ณต๊ฐœ 7.2. ๊ฐœ์ธ๋ณ„ ์ฐจ์ด ์กด์žฌ ์ธ์ • 7.3. ํ•„์š” ์กฐ๊ฑด๊ณผ ์ฃผ์˜์‚ฌํ•ญ ๋ช…์‹œ
50
-
51
- ๊ด€๊ณ„ ์ง€์† ๋‹จ๊ณ„ 8.1. ์ง„์ •์„ฑ ์žˆ๋Š” ๊ฐ์‚ฌ ์ธ์‚ฌ 8.2. ๋‹ค์Œ ์ปจํ…์ธ  ์˜ˆ๊ณ ๋กœ ๊ธฐ๋Œ€๊ฐ ์กฐ์„ฑ 8.3. ์†Œํ†ต ์ฑ„๋„ ์•ˆ๋‚ด
52
-
53
- ์ž‘์„ฑ ์‹œ ์ค€์ˆ˜์‚ฌํ•ญ 9.1. ๊ธ€์ž ์ˆ˜: 1500-2000์ž ๋‚ด์™ธ 9.2. ๋ฌธ๋‹จ ๊ธธ์ด: 3-4๋ฌธ์žฅ ์ด๋‚ด 9.3. ์‹œ๊ฐ์  ๊ตฌ๋ถ„: ์†Œ์ œ๋ชฉ, ๊ตฌ๋ถ„์„ , ๋ฒˆํ˜ธ ๋ชฉ๋ก ํ™œ์šฉ 9.4. ํ†ค์•ค๋งค๋„ˆ: ์นœ๊ทผํ•˜๊ณ  ์ „๋ฌธ์ ์ธ ๋Œ€ํ™”์ฒด 9.5. ๋ฐ์ดํ„ฐ: ๋ชจ๋“  ์ •๋ณด์˜ ์ถœ์ฒ˜ ๋ช…์‹œ 9.6. ๊ฐ€๋…์„ฑ: ๋ช…ํ™•ํ•œ ๋‹จ๋ฝ ๊ตฌ๋ถ„๊ณผ ๊ฐ•์กฐ์  ์‚ฌ์šฉ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  ์ด๋Ÿฌํ•œ ํ”„๋ ˆ์ž„์›Œํฌ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ, ์š”์ฒญ๋ฐ›์€ ์ฃผ์ œ์— ๋Œ€ํ•ด ์ฒด๊ณ„์ ์ด๊ณ  ๋งค๋ ฅ์ ์ธ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ๋ฅผ ์ž‘์„ฑํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค.
56
  """
@@ -205,254 +236,64 @@ def generate_mock_search_results(query):
205
 
206
  return notice + "\n".join(summary_lines)
207
 
208
- # Google ๊ฒ€์ƒ‰ ํ•จ์ˆ˜ (SerpAPI ๋Œ€์‹  ์ง์ ‘ ๊ฒ€์ƒ‰)
209
- # Google ๊ฒ€์ƒ‰ ํ•จ์ˆ˜ (BeautifulSoup์„ ์‚ฌ์šฉํ•˜์—ฌ ๊ฒฐ๊ณผ ํŒŒ์‹ฑ)
210
- # Google ๊ฒ€์ƒ‰ ํ•จ์ˆ˜ ๊ฐœ์„ 
211
- def do_google_search(query, num_results=5):
212
- try:
213
- # ๋‹ค์–‘ํ•œ User-Agent ์‚ฌ์šฉ (Google ์ฐจ๋‹จ ๋ฐฉ์ง€)
214
- headers = {
215
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
216
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
217
- 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
218
- 'Referer': 'https://www.google.com/',
219
- }
220
-
221
- # ๊ฒ€์ƒ‰ URL
222
- search_url = f"https://www.google.com/search?q={query}&num={num_results}&hl=ko&gl=kr"
223
- logging.info(f"๊ตฌ๊ธ€ ๊ฒ€์ƒ‰ URL: {search_url}")
224
-
225
- # ์š”์ฒญ ๋ณด๋‚ด๊ธฐ
226
- response = requests.get(search_url, headers=headers, timeout=10)
227
-
228
- # ์‘๋‹ต์ด ์„ฑ๊ณต์ ์ธ์ง€ ํ™•์ธ
229
- if response.status_code != 200:
230
- logging.error(f"Google ๊ฒ€์ƒ‰ ์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
231
- return generate_mock_search_results(query)
232
-
233
- # HTML ํŒŒ์‹ฑ์„ ์œ„ํ•œ ๋””๋ฒ„๊น…
234
- with open("google_response.html", "w", encoding="utf-8") as f:
235
- f.write(response.text)
236
- logging.info("Google ์‘๋‹ต HTML์„ 'google_response.html'์— ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
237
-
238
- # BeautifulSoup์œผ๋กœ HTML ํŒŒ์‹ฑ
239
- soup = BeautifulSoup(response.text, 'html.parser')
240
-
241
- # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ถ”์ถœ
242
- organic_results = []
243
-
244
- # 2023-2025 ๊ตฌ๊ธ€ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์„ ํƒ์ž ์‹œ๋„ (๋‹ค์–‘ํ•œ ์„ ํƒ์ž ์‹œ๋„)
245
- selectors = [
246
- 'div.g', 'div.Gx5Zad', 'div.tF2Cxc', 'div.yuRUbf',
247
- '.v5yQqb', '.MjjYud', 'div[jscontroller]', '.fP1Qef',
248
- 'div[data-sokoban-container]', '.hlcw0c'
249
- ]
250
-
251
- # ๋ชจ๋“  ์„ ํƒ์ž ์‹œ๋„
252
- containers = []
253
- for selector in selectors:
254
- elements = soup.select(selector)
255
- if elements:
256
- logging.info(f"์„ ํƒ์ž '{selector}'๋กœ {len(elements)}๊ฐœ ์š”์†Œ๋ฅผ ์ฐพ์•˜์Šต๋‹ˆ๋‹ค.")
257
- containers.extend(elements)
258
- # ์ถฉ๋ถ„ํ•œ ์š”์†Œ๋ฅผ ์ฐพ์•˜์œผ๋ฉด ์ค‘๋‹จ
259
- if len(containers) >= num_results * 2: # ์ค‘๋ณต ๊ฐ€๋Šฅ์„ฑ ๊ณ ๋ คํ•ด 2๋ฐฐ๋กœ ์ฐพ์Œ
260
- break
261
-
262
- # ๋Œ€์•ˆ: ๋ชจ๋“  <a> ํƒœ๊ทธ์—์„œ href๊ฐ€ http๋กœ ์‹œ์ž‘ํ•˜๋Š” ๊ฒƒ๋งŒ ์„ ํƒ
263
- if not containers:
264
- logging.warning("๊ตฌ์กฐํ™”๋œ ์ปจํ…Œ์ด๋„ˆ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด ์ง์ ‘ ๋งํฌ๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.")
265
- link_elements = soup.select('a[href^="http"]')
266
-
267
- for link in link_elements:
268
- if len(organic_results) >= num_results:
269
- break
270
-
271
- href = link.get('href', '')
272
- # Google ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ URL ํ•„ํ„ฐ๋ง
273
- if '/url?' in href or 'google.com' in href:
274
- continue
275
-
276
- # ๋ถ€๋ชจ ์š”์†Œ์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹œ๋„
277
- parent = link.parent
278
- title = link.get_text(strip=True) or "์ œ๋ชฉ ์—†์Œ"
279
-
280
- # ์ถฉ๋ถ„ํ•œ ๊ธธ์ด์˜ ํ…์ŠคํŠธ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ๋งŒ ๊ฒฐ๊ณผ๋กœ ์ถ”๊ฐ€
281
- if len(title) > 5: # ์˜๋ฏธ ์žˆ๋Š” ์ œ๋ชฉ์€ ๋ณดํ†ต 5์ž ์ด์ƒ
282
- # ์ฃผ๋ณ€ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹œ๋„
283
- surrounding_text = ""
284
- for sibling in parent.next_siblings:
285
- if sibling.name and sibling.get_text(strip=True):
286
- surrounding_text = sibling.get_text(strip=True)
287
- break
288
-
289
- # ๊ฒฐ๊ณผ ์ถ”๊ฐ€
290
- organic_results.append({
291
- "title": title,
292
- "link": href,
293
- "snippet": surrounding_text or "์ถ”๊ฐ€ ์„ค๋ช… ์—†์Œ",
294
- "displayed_link": href.split('/')[2] if '/' in href else href
295
- })
296
-
297
- # ์„ ํƒ์ž ๊ธฐ๋ฐ˜ ํŒŒ์‹ฑ ์‹œ๋„
298
- else:
299
- for container in containers:
300
- if len(organic_results) >= num_results:
301
- break
302
-
303
- # 1. ๋งํฌ ์ฐพ๊ธฐ
304
- link_element = container.select_one('a[href^="http"]')
305
- if not link_element:
306
- continue
307
-
308
- link = link_element.get('href', '')
309
- # Google์˜ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ URL์ด๋ฉด ๊ฑด๋„ˆ๋›ฐ๊ธฐ
310
- if '/url?' in link or 'google.com' in link:
311
- continue
312
-
313
- # 2. ์ œ๋ชฉ ์ฐพ๊ธฐ (๋‹ค์–‘ํ•œ ์„ ํƒ์ž)
314
- title_selectors = ['h3', '.LC20lb', '.DKV0Md', '.l', '.vvjwJb']
315
- title = None
316
- for selector in title_selectors:
317
- title_element = container.select_one(selector)
318
- if title_element and title_element.get_text(strip=True):
319
- title = title_element.get_text(strip=True)
320
- break
321
-
322
- if not title:
323
- title = link_element.get_text(strip=True) or "์ œ๋ชฉ ์—†์Œ"
324
-
325
- # 3. ์Šค๋‹ˆํŽซ ์ฐพ๊ธฐ (๋‹ค์–‘ํ•œ ์„ ํƒ์ž)
326
- snippet_selectors = ['.VwiC3b', '.lyLwlc', '.yXK7lf', '.lEBKkf', '.s', '.st']
327
- snippet = "์„ค๋ช… ์—†์Œ"
328
- for selector in snippet_selectors:
329
- snippet_element = container.select_one(selector)
330
- if snippet_element and snippet_element.get_text(strip=True):
331
- snippet = snippet_element.get_text(strip=True)
332
- break
333
-
334
- # 4. ํ‘œ์‹œ ๋งํฌ ์ฐพ๊ธฐ
335
- displayed_link_selectors = ['cite', '.UPmit', '.qLRx3b', '.iUh30']
336
- displayed_link = link.split('/')[2] if '/' in link else link
337
- for selector in displayed_link_selectors:
338
- element = container.select_one(selector)
339
- if element and element.get_text(strip=True):
340
- displayed_link = element.get_text(strip=True)
341
- break
342
-
343
- # ์ค‘๋ณต ์ œ๊ฑฐ
344
- is_duplicate = False
345
- for result in organic_results:
346
- if result["link"] == link or result["title"] == title:
347
- is_duplicate = True
348
- break
349
-
350
- if not is_duplicate:
351
- organic_results.append({
352
- "title": title,
353
- "link": link,
354
- "snippet": snippet,
355
- "displayed_link": displayed_link
356
- })
357
-
358
- if not organic_results:
359
- logging.warning("๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ํŒŒ์‹ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๊ฐ€์ƒ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
360
- return generate_mock_search_results(query)
361
-
362
- # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
363
- summary_lines = []
364
- for idx, item in enumerate(organic_results, start=1):
365
- title = item.get("title", "No title")
366
- link = item.get("link", "#")
367
- snippet = item.get("snippet", "No description")
368
- displayed_link = item.get("displayed_link", link)
369
-
370
- summary_lines.append(
371
- f"### Result {idx}: {title}\n\n"
372
- f"{snippet}\n\n"
373
- f"**์ถœ์ฒ˜**: [{displayed_link}]({link})\n\n"
374
- f"---\n"
375
- )
376
-
377
- # ๋ชจ๋ธ์—๊ฒŒ ๋ช…ํ™•ํ•œ ์ง€์นจ ์ถ”๊ฐ€
378
- instructions = """
379
- # ์›น ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ
380
- ์•„๋ž˜๋Š” ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•  ๋•Œ ์ด ์ •๋ณด๋ฅผ ํ™œ์šฉํ•˜์„ธ์š”:
381
- 1. ๊ฐ ๊ฒฐ๊ณผ์˜ ์ œ๋ชฉ, ๋‚ด์šฉ, ์ถœ์ฒ˜ ๋งํฌ๋ฅผ ์ฐธ๊ณ ํ•˜์„ธ์š”
382
- 2. ๋‹ต๋ณ€์— ๊ด€๋ จ ์ •๋ณด์˜ ์ถœ์ฒ˜๋ฅผ ๋ช…์‹œ์ ์œผ๋กœ ์ธ์šฉํ•˜์„ธ์š” (์˜ˆ: "X ์ถœ์ฒ˜์— ๋”ฐ๋ฅด๋ฉด...")
383
- 3. ์‘๋‹ต์— ์‹ค์ œ ์ถœ์ฒ˜ ๋งํฌ๋ฅผ ํฌํ•จํ•˜์„ธ์š”
384
- 4. ์—ฌ๋Ÿฌ ์ถœ์ฒ˜์˜ ์ •๋ณด๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”
385
- """
386
-
387
- search_results = instructions + "\n".join(summary_lines)
388
- logging.info(f"Google ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ {len(organic_results)}๊ฐœ ํŒŒ์‹ฑ ์™„๋ฃŒ")
389
- return search_results
390
-
391
- except Exception as e:
392
- logging.error(f"Google ๊ฒ€์ƒ‰ ์‹คํŒจ: {e}")
393
- return generate_mock_search_results(query)
394
 
395
- # ์›น ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
 
 
396
  def do_web_search(query: str) -> str:
397
  """
398
- ์›น ๊ฒ€์ƒ‰์„ ์ˆ˜ํ–‰ํ•˜๋Š” ํ•จ์ˆ˜ - SerpHouse API ๋˜๋Š” ์ง์ ‘ ๊ตฌ๊ธ€ ๊ฒ€์ƒ‰
 
 
 
399
  """
 
 
 
 
 
400
  try:
401
- # API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ 'mock'์ธ ๊ฒฝ์šฐ
402
- if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
403
- logging.warning("API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ Mock ๋ชจ๋“œ์ž…๋‹ˆ๋‹ค. ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
404
- return generate_mock_search_results(query)
405
-
406
- # SerpHouse API ์‚ฌ์šฉ
407
  url = "https://api.serphouse.com/serp/live"
408
  params = {
409
  "q": query,
410
  "domain": "google.com",
411
- "serp_type": "web",
412
- "device": "desktop",
413
- "lang": "ko", # ํ•œ๊ตญ์–ด ๊ฒฐ๊ณผ
414
- "num": "5" # ๊ฒฐ๊ณผ ์ˆ˜ ์ค„์ž„
415
  }
416
-
417
  headers = {
418
  "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
419
  }
420
 
421
  logging.info(f"SerpHouse API ํ˜ธ์ถœ ์ค‘... ๊ฒ€์ƒ‰์–ด: {query}")
422
-
423
- # ์งง์€ ํƒ€์ž„์•„์›ƒ์œผ๋กœ ์š”์ฒญ ์‹œ๋„
424
  response = requests.get(url, headers=headers, params=params, timeout=15)
425
  response.raise_for_status()
426
 
427
- logging.info(f"SerpHouse API ์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
428
  data = response.json()
429
 
430
- # ๋‹ค์–‘ํ•œ ์‘๋‹ต ๊ตฌ์กฐ ์ฒ˜๋ฆฌ
431
  results = data.get("results", {})
432
  organic = None
433
 
434
- # ๊ฐ€๋Šฅํ•œ ์‘๋‹ต ๊ตฌ์กฐ 1
435
  if isinstance(results, dict) and "organic" in results:
436
  organic = results["organic"]
437
- # ๊ฐ€๋Šฅํ•œ ์‘๋‹ต ๊ตฌ์กฐ 2
438
- elif isinstance(results, dict) and "results" in results:
439
- if isinstance(results["results"], dict) and "organic" in results["results"]:
440
- organic = results["results"]["organic"]
441
- # ๊ฐ€๋Šฅํ•œ ์‘๋‹ต ๊ตฌ์กฐ 3
442
  elif "organic" in data:
443
  organic = data["organic"]
444
-
 
445
  if not organic:
446
- logging.warning("์‘๋‹ต์—์„œ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๊ตฌ๊ธ€ ์ง์ ‘ ๊ฒ€์ƒ‰์œผ๋กœ ์ „ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
447
- return do_google_search(query)
448
-
449
- # ๊ฒฐ๊ณผ ์ˆ˜ ์ œํ•œ ๋ฐ ์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ตœ์ ํ™”
450
- max_results = min(5, len(organic))
451
- limited_organic = organic[:max_results]
452
 
453
- # ๊ฒฐ๊ณผ ํ˜•์‹ ๊ฐœ์„ 
454
  summary_lines = []
455
- for idx, item in enumerate(limited_organic, start=1):
456
  title = item.get("title", "No title")
457
  link = item.get("link", "#")
458
  snippet = item.get("snippet", "No description")
@@ -465,7 +306,6 @@ def do_web_search(query: str) -> str:
465
  f"---\n"
466
  )
467
 
468
- # ๋ชจ๋ธ์—๊ฒŒ ๋ช…ํ™•ํ•œ ์ง€์นจ ์ถ”๊ฐ€
469
  instructions = """
470
  # ์›น ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ
471
  ์•„๋ž˜๋Š” ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•  ๋•Œ ์ด ์ •๋ณด๋ฅผ ํ™œ์šฉํ•˜์„ธ์š”:
@@ -475,16 +315,15 @@ def do_web_search(query: str) -> str:
475
  4. ์—ฌ๋Ÿฌ ์ถœ์ฒ˜์˜ ์ •๋ณด๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”
476
  """
477
 
478
- search_results = instructions + "\n".join(summary_lines)
479
- logging.info(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐœ ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
480
- return search_results
481
-
482
  except requests.exceptions.Timeout:
483
- logging.error("Web search timed out, ์ง์ ‘ ๊ตฌ๊ธ€ ๊ฒ€์ƒ‰์œผ๋กœ ์ „ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
484
- return do_google_search(query)
485
  except Exception as e:
486
- logging.error(f"Web search failed: {e}, ์ง์ ‘ ๊ตฌ๊ธ€ ๊ฒ€์ƒ‰์œผ๋กœ ์ „ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
487
- return do_google_search(query)
 
488
 
489
  def chatbot_interface():
490
  st.title("Ginigen Blog")
@@ -631,7 +470,7 @@ def chatbot_interface():
631
  message_placeholder = st.empty()
632
  full_response = ""
633
 
634
- # ์›น ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ (์›น ๊ฒ€์ƒ‰ ์˜ต์…˜์ด ์ผœ์ ธ ์žˆ์„ ๊ฒฝ์šฐ)
635
  system_prompt = get_system_prompt()
636
  if st.session_state.use_web_search:
637
  with st.spinner("์›น์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ๊ฒ€์ƒ‰ ์ค‘..."):
@@ -639,7 +478,7 @@ def chatbot_interface():
639
  search_query = extract_keywords(prompt, top_k=5)
640
  st.info(f"๊ฒ€์ƒ‰์–ด: {search_query}")
641
 
642
- # ๋‘ ๊ฐ€์ง€ ๋ฐฉ๋ฒ• ๋ชจ๋‘ ์‹œ๋„ (SerpHouse API์™€ ์ง์ ‘ ๊ฒ€์ƒ‰)
643
  search_results = do_web_search(search_query)
644
 
645
  if "๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ" in search_results:
@@ -773,4 +612,4 @@ if __name__ == "__main__":
773
  f.write("markdown>=3.5.1\n")
774
  f.write("pillow>=10.1.0\n")
775
 
776
- main()
 
10
  import base64
11
  from datetime import datetime
12
  import re
13
+ from bs4 import BeautifulSoup # BeautifulSoup๋Š” ์ด์ œ ์‚ฌ์šฉํ•˜์ง€ ์•Š์ง€๋งŒ, ํ•„์š” ์‹œ ์œ ์ง€
14
+ # (์ง์ ‘ ๊ตฌ๊ธ€ ๊ฒ€์ƒ‰ ๋กœ์ง์€ ์‚ญ์ œํ–ˆ์œผ๋ฏ€๋กœ ์‚ฌ์‹ค์ƒ BeautifulSoup๋Š” ํ•„์š” ์—†์Šต๋‹ˆ๋‹ค.)
15
 
16
  # ๋กœ๊น… ์„ค์ •
17
  logging.basicConfig(
 
35
  return """
36
  ๋‹น์‹ ์€ ์ „๋ฌธ ๋ธ”๋กœ๊ทธ ์ž‘์„ฑ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ๋ชจ๋“  ๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ ์š”์ฒญ์— ๋Œ€ํ•ด ๋‹ค์Œ์˜ 8๋‹จ๊ณ„ ํ”„๋ ˆ์ž„์›Œํฌ๋ฅผ ์ฒ ์ €ํžˆ ๋”ฐ๋ฅด๋˜, ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๊ธ€์ด ๋˜๋„๋ก ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
37
 
38
+ ๋…์ž ์—ฐ๊ฒฐ ๋‹จ๊ณ„
39
+ 1.1. ๊ณต๊ฐ๋Œ€ ํ˜•์„ฑ์„ ์œ„ํ•œ ์นœ๊ทผํ•œ ์ธ์‚ฌ
40
+ 1.2. ๋…์ž์˜ ์‹ค์ œ ๊ณ ๋ฏผ์„ ๋ฐ˜์˜ํ•œ ๋„์ž… ์งˆ๋ฌธ
41
+ 1.3. ์ฃผ์ œ์— ๋Œ€ํ•œ ์ฆ‰๊ฐ์  ๊ด€์‹ฌ ์œ ๋„
42
+
43
+ ๋ฌธ์ œ ์ •์˜ ๋‹จ๊ณ„
44
+ 2.1. ๋…์ž์˜ ํŽ˜์ธํฌ์ธํŠธ ๊ตฌ์ฒดํ™”
45
+ 2.2. ๋ฌธ์ œ์˜ ์‹œ๊ธ‰์„ฑ๊ณผ ์˜ํ–ฅ๋„ ๋ถ„์„
46
+ 2.3. ํ•ด๊ฒฐ ํ•„์š”์„ฑ์— ๋Œ€ํ•œ ๊ณต๊ฐ๋Œ€ ํ˜•์„ฑ
47
+
48
+ ์ „๋ฌธ์„ฑ ์ž…์ฆ ๋‹จ๊ณ„
49
+ 3.1. ๊ฐ๊ด€์  ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ ๋ถ„์„
50
+ 3.2. ์ „๋ฌธ๊ฐ€ ๊ฒฌํ•ด์™€ ์—ฐ๊ตฌ ๊ฒฐ๊ณผ ์ธ์šฉ
51
+ 3.3. ์‹ค์ œ ์‚ฌ๋ก€๋ฅผ ํ†ตํ•œ ๋ฌธ์ œ ๊ตฌ์ฒดํ™”
52
+
53
+ ์†”๋ฃจ์…˜ ์ œ๊ณต ๋‹จ๊ณ„
54
+ 4.1. ๋‹จ๊ณ„๋ณ„ ์‹ค์ฒœ ๊ฐ€์ด๋“œ๋ผ์ธ ์ œ์‹œ
55
+ 4.2. ์ฆ‰์‹œ ์ ์šฉ ๊ฐ€๋Šฅํ•œ ๊ตฌ์ฒด์  ํŒ
56
+ 4.3. ์˜ˆ์ƒ ์žฅ์• ๋ฌผ๊ณผ ๊ทน๋ณต ๋ฐฉ์•ˆ ํฌํ•จ
57
+
58
+ ์‹ ๋ขฐ๋„ ๊ฐ•ํ™” ๋‹จ๊ณ„
59
+ 5.1. ์‹ค์ œ ์„ฑ๊ณต ์‚ฌ๋ก€ ์ œ์‹œ
60
+ 5.2. ๊ตฌ์ฒด์  ์‚ฌ์šฉ์ž ํ›„๊ธฐ ์ธ์šฉ
61
+ 5.3. ๊ฐ๊ด€์  ๋ฐ์ดํ„ฐ๋กœ ํšจ๊ณผ ์ž…์ฆ
62
+
63
+ ํ–‰๋™ ์œ ๋„ ๋‹จ๊ณ„
64
+ 6.1. ๋ช…ํ™•ํ•œ ์ฒซ ์‹ค์ฒœ ๋‹จ๊ณ„ ์ œ์‹œ
65
+ 6.2. ์‹œ๊ธ‰์„ฑ์„ ๊ฐ•์กฐํ•œ ํ–‰๋™ ์ด‰๊ตฌ
66
+ 6.3. ์‹ค์ฒœ ๋™๊ธฐ ๋ถ€์—ฌ ์š”์†Œ ํฌํ•จ
67
+
68
+ ์ง„์ •์„ฑ ๊ฐ•ํ™” ๋‹จ๊ณ„
69
+ 7.1. ์†”๋ฃจ์…˜์˜ ํ•œ๊ณ„ ํˆฌ๋ช…ํ•˜๊ฒŒ ๊ณต๊ฐœ
70
+ 7.2. ๊ฐœ์ธ๋ณ„ ์ฐจ์ด ์กด์žฌ ์ธ์ •
71
+ 7.3. ํ•„์š” ์กฐ๊ฑด๊ณผ ์ฃผ์˜์‚ฌํ•ญ ๋ช…์‹œ
72
+
73
+ ๊ด€๊ณ„ ์ง€์† ๋‹จ๊ณ„
74
+ 8.1. ์ง„์ •์„ฑ ์žˆ๋Š” ๊ฐ์‚ฌ ์ธ์‚ฌ
75
+ 8.2. ๋‹ค์Œ ์ปจํ…์ธ  ์˜ˆ๊ณ ๋กœ ๊ธฐ๋Œ€๊ฐ ์กฐ์„ฑ
76
+ 8.3. ์†Œํ†ต ์ฑ„๋„ ์•ˆ๋‚ด
77
+
78
+ ์ž‘์„ฑ ์‹œ ์ค€์ˆ˜์‚ฌํ•ญ
79
+ 9.1. ๊ธ€์ž ์ˆ˜: 1500-2000์ž ๋‚ด์™ธ
80
+ 9.2. ๋ฌธ๋‹จ ๊ธธ์ด: 3-4๋ฌธ์žฅ ์ด๋‚ด
81
+ 9.3. ์‹œ๊ฐ์  ๊ตฌ๋ถ„: ์†Œ์ œ๋ชฉ, ๊ตฌ๋ถ„์„ , ๋ฒˆํ˜ธ ๋ชฉ๋ก ํ™œ์šฉ
82
+ 9.4. ํ†ค์•ค๋งค๋„ˆ: ์นœ๊ทผํ•˜๊ณ  ์ „๋ฌธ์ ์ธ ๋Œ€ํ™”์ฒด
83
+ 9.5. ๋ฐ์ดํ„ฐ: ๋ชจ๋“  ์ •๋ณด์˜ ์ถœ์ฒ˜ ๋ช…์‹œ
84
+ 9.6. ๊ฐ€๋…์„ฑ: ๋ช…ํ™•ํ•œ ๋‹จ๋ฝ ๊ตฌ๋ถ„๊ณผ ๊ฐ•์กฐ์  ์‚ฌ์šฉ
85
 
86
  ์ด๋Ÿฌํ•œ ํ”„๋ ˆ์ž„์›Œํฌ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ, ์š”์ฒญ๋ฐ›์€ ์ฃผ์ œ์— ๋Œ€ํ•ด ์ฒด๊ณ„์ ์ด๊ณ  ๋งค๋ ฅ์ ์ธ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ๋ฅผ ์ž‘์„ฑํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค.
87
  """
 
236
 
237
  return notice + "\n".join(summary_lines)
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ ###################################################
241
+ # SerpHouse ๋งŒ ์‚ฌ์šฉํ•˜๋Š” ์›น ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
242
+ ###################################################
243
  def do_web_search(query: str) -> str:
244
  """
245
+ ์›น ๊ฒ€์ƒ‰์„ **SerpHouse**๋กœ๋งŒ ์ˆ˜ํ–‰ํ•˜๋Š” ํ•จ์ˆ˜.
246
+ - SERPHOUSE_API_KEY๊ฐ€ ์—†๊ฑฐ๋‚˜ mock์ผ ๊ฒฝ์šฐ์—๋Š” ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜
247
+ - API ํ˜ธ์ถœ์ด ์„ฑ๊ณตํ•˜๋ฉด ๊ทธ ๊ฒฐ๊ณผ๋ฅผ ํŒŒ์‹ฑํ•˜์—ฌ ๋งˆํฌ๋‹ค์šด์œผ๋กœ ๋ฐ˜ํ™˜
248
+ - ์‹คํŒจํ•˜๋ฉด ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜
249
  """
250
+ # 1) API ํ‚ค ์œ ํšจ์„ฑ ์ฒดํฌ
251
+ if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
252
+ logging.warning("API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ Mock ๋ชจ๋“œ์ž…๋‹ˆ๋‹ค. => ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜")
253
+ return generate_mock_search_results(query)
254
+
255
  try:
256
+ # SerpHouse API
 
 
 
 
 
257
  url = "https://api.serphouse.com/serp/live"
258
  params = {
259
  "q": query,
260
  "domain": "google.com",
261
+ "serp_type": "web", # ์›น ๊ฒ€์ƒ‰
262
+ "device": "desktop",
263
+ "lang": "ko", # ํ•œ๊ตญ์–ด
264
+ "num": "5" # ๊ฒฐ๊ณผ ์ตœ๋Œ€ 5๊ฐœ
265
  }
 
266
  headers = {
267
  "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
268
  }
269
 
270
  logging.info(f"SerpHouse API ํ˜ธ์ถœ ์ค‘... ๊ฒ€์ƒ‰์–ด: {query}")
 
 
271
  response = requests.get(url, headers=headers, params=params, timeout=15)
272
  response.raise_for_status()
273
 
 
274
  data = response.json()
275
 
276
+ # ๊ฒฐ๊ณผ ๊ตฌ์กฐ ํŒŒ์•…
277
  results = data.get("results", {})
278
  organic = None
279
 
280
+ # ๊ฐ€๋Šฅํ•œ ์‘๋‹ต ๊ตฌ์กฐ ํ™•์ธ
281
  if isinstance(results, dict) and "organic" in results:
282
  organic = results["organic"]
 
 
 
 
 
283
  elif "organic" in data:
284
  organic = data["organic"]
285
+
286
+ # organic ๊ฒฐ๊ณผ๊ฐ€ ์—†๋‹ค๋ฉด ๊ฐ€์ƒ ๊ฒฐ๊ณผ
287
  if not organic:
288
+ logging.warning("SerpHouse ์‘๋‹ต์—์„œ organic ํ•ญ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
289
+ return generate_mock_search_results(query)
290
+
291
+ # ์ตœ๋Œ€ 5๊ฐœ๋งŒ ์Šฌ๋ผ์ด์‹ฑ
292
+ organic = organic[:5]
 
293
 
294
+ # ๊ฒฐ๊ณผ ์š”์•ฝ ๋ฌธ์ž์—ด ๋งŒ๋“ค๊ธฐ
295
  summary_lines = []
296
+ for idx, item in enumerate(organic, start=1):
297
  title = item.get("title", "No title")
298
  link = item.get("link", "#")
299
  snippet = item.get("snippet", "No description")
 
306
  f"---\n"
307
  )
308
 
 
309
  instructions = """
310
  # ์›น ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ
311
  ์•„๋ž˜๋Š” ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•  ๋•Œ ์ด ์ •๋ณด๋ฅผ ํ™œ์šฉํ•˜์„ธ์š”:
 
315
  4. ์—ฌ๋Ÿฌ ์ถœ์ฒ˜์˜ ์ •๋ณด๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”
316
  """
317
 
318
+ return instructions + "\n".join(summary_lines)
319
+
 
 
320
  except requests.exceptions.Timeout:
321
+ logging.error("SerpHouse ๊ฒ€์ƒ‰ ํƒ€์ž„์•„์›ƒ. => ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
322
+ return generate_mock_search_results(query)
323
  except Exception as e:
324
+ logging.error(f"SerpHouse ๊ฒ€์ƒ‰ ์‹คํŒจ: {e}")
325
+ return generate_mock_search_results(query)
326
+
327
 
328
  def chatbot_interface():
329
  st.title("Ginigen Blog")
 
470
  message_placeholder = st.empty()
471
  full_response = ""
472
 
473
+ # ์›น ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ (์˜ต์…˜์ด ์ผœ์ ธ ์žˆ์„ ๊ฒฝ์šฐ)
474
  system_prompt = get_system_prompt()
475
  if st.session_state.use_web_search:
476
  with st.spinner("์›น์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ๊ฒ€์ƒ‰ ์ค‘..."):
 
478
  search_query = extract_keywords(prompt, top_k=5)
479
  st.info(f"๊ฒ€์ƒ‰์–ด: {search_query}")
480
 
481
+ # SerpHouse API๋กœ ๊ฒ€์ƒ‰
482
  search_results = do_web_search(search_query)
483
 
484
  if "๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ" in search_results:
 
612
  f.write("markdown>=3.5.1\n")
613
  f.write("pillow>=10.1.0\n")
614
 
615
+ main()