ginipick commited on
Commit
d835c63
ยท
verified ยท
1 Parent(s): 5f32e07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -19
app.py CHANGED
@@ -9,6 +9,7 @@ import markdown
9
  import tempfile
10
  import base64
11
  from datetime import datetime
 
12
 
13
  # ๋กœ๊น… ์„ค์ •
14
  logging.basicConfig(
@@ -151,12 +152,52 @@ def extract_keywords(text: str, top_k: int = 5) -> str:
151
  2) ๊ณต๋ฐฑ ๊ธฐ์ค€ ํ† ํฐ ๋ถ„๋ฆฌ
152
  3) ์ตœ๋Œ€ top_k๊ฐœ๋งŒ
153
  """
154
- import re
155
  text = re.sub(r"[^a-zA-Z0-9๊ฐ€-ํžฃ\s]", "", text)
156
  tokens = text.split()
157
  key_tokens = tokens[:top_k]
158
  return " ".join(key_tokens)
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  # ์›น ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
161
  def do_web_search(query: str) -> str:
162
  """
@@ -171,9 +212,9 @@ def do_web_search(query: str) -> str:
171
  "q": query,
172
  "domain": "google.com",
173
  "serp_type": "web", # ๊ธฐ๋ณธ ์›น ๊ฒ€์ƒ‰
174
- "device": "desktop",
175
- "lang": "en",
176
- "num": "20" # ์ตœ๋Œ€ 20๊ฐœ ๊ฒฐ๊ณผ๋งŒ ์š”์ฒญ
177
  }
178
 
179
  headers = {
@@ -183,8 +224,13 @@ def do_web_search(query: str) -> str:
183
  logging.info(f"SerpHouse API ํ˜ธ์ถœ ์ค‘... ๊ฒ€์ƒ‰์–ด: {query}")
184
  logging.info(f"์š”์ฒญ URL: {url} - ํŒŒ๋ผ๋ฏธํ„ฐ: {params}")
185
 
186
- # GET ์š”์ฒญ ์ˆ˜ํ–‰
187
- response = requests.get(url, headers=headers, params=params, timeout=60)
 
 
 
 
 
188
  response.raise_for_status()
189
 
190
  logging.info(f"SerpHouse API ์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
@@ -209,13 +255,13 @@ def do_web_search(query: str) -> str:
209
 
210
  if not organic:
211
  logging.warning("์‘๋‹ต์—์„œ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
212
- logging.debug(f"์‘๋‹ต ๊ตฌ์กฐ: {list(data.keys())}")
213
  if isinstance(results, dict):
214
  logging.debug(f"results ๊ตฌ์กฐ: {list(results.keys())}")
215
- return "No web search results found or unexpected API response structure."
216
 
217
  # ๊ฒฐ๊ณผ ์ˆ˜ ์ œํ•œ ๋ฐ ์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ตœ์ ํ™”
218
- max_results = min(20, len(organic))
219
  limited_organic = organic[:max_results]
220
 
221
  # ๊ฒฐ๊ณผ ํ˜•์‹ ๊ฐœ์„  - ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅํ•˜์—ฌ ๊ฐ€๋…์„ฑ ํ–ฅ์ƒ
@@ -248,9 +294,12 @@ def do_web_search(query: str) -> str:
248
  logging.info(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐœ ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
249
  return search_results
250
 
 
 
 
251
  except Exception as e:
252
  logging.error(f"Web search failed: {e}")
253
- return f"Web search failed: {str(e)}"
254
 
255
  def chatbot_interface():
256
  st.title("Ginigen Blog")
@@ -320,7 +369,6 @@ def chatbot_interface():
320
  latest_blog = msg["content"]
321
 
322
  # ํƒ€์ดํ‹€ ์ถ”์ถœ ์‹œ๋„ (์ฒซ ๋ฒˆ์งธ ์ œ๋ชฉ ํƒœ๊ทธ ์‚ฌ์šฉ)
323
- import re
324
  title_match = re.search(r'# (.*?)(\n|$)', latest_blog)
325
  if title_match:
326
  latest_blog_title = title_match.group(1).strip()
@@ -402,14 +450,20 @@ def chatbot_interface():
402
  system_prompt = get_system_prompt()
403
  if st.session_state.use_web_search:
404
  with st.spinner("์›น์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ๊ฒ€์ƒ‰ ์ค‘..."):
405
- search_query = extract_keywords(prompt, top_k=5)
406
- search_results = do_web_search(search_query)
407
- if "search failed" not in search_results.lower():
408
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์— ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ถ”๊ฐ€
409
- system_prompt += f"\n\n๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ:\n{search_results}\n"
410
- st.success(f"๊ฒ€์ƒ‰ ์™„๋ฃŒ: '{search_query}'์— ๋Œ€ํ•œ ์ •๋ณด๋ฅผ ์ˆ˜์ง‘ํ–ˆ์Šต๋‹ˆ๋‹ค.")
411
- else:
412
- st.error("์›น ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
 
 
 
 
 
 
413
 
414
  # API ํ˜ธ์ถœ
415
  with client.messages.stream(
 
9
  import tempfile
10
  import base64
11
  from datetime import datetime
12
+ import re
13
 
14
  # ๋กœ๊น… ์„ค์ •
15
  logging.basicConfig(
 
152
  2) ๊ณต๋ฐฑ ๊ธฐ์ค€ ํ† ํฐ ๋ถ„๋ฆฌ
153
  3) ์ตœ๋Œ€ top_k๊ฐœ๋งŒ
154
  """
 
155
  text = re.sub(r"[^a-zA-Z0-9๊ฐ€-ํžฃ\s]", "", text)
156
  tokens = text.split()
157
  key_tokens = tokens[:top_k]
158
  return " ".join(key_tokens)
159
 
160
+ # Mock ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ƒ์„ฑ ํ•จ์ˆ˜ ์ถ”๊ฐ€ (API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ ์—ฐ๊ฒฐ ์‹คํŒจ ์‹œ ๋Œ€์ฒด ์‚ฌ์šฉ)
161
+ def generate_mock_search_results(query):
162
+ """API ์—ฐ๊ฒฐ์ด ์•ˆ๋  ๋•Œ ์‚ฌ์šฉํ•  ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ƒ์„ฑ"""
163
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
164
+ mock_results = [
165
+ {
166
+ "title": f"{query}์— ๊ด€ํ•œ ์ตœ์‹  ์ •๋ณด",
167
+ "link": "https://example.com/article1",
168
+ "snippet": f"{query}์— ๊ด€ํ•œ ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค. ์ด ๊ฒฐ๊ณผ๋Š” API ์—ฐ๊ฒฐ ๋ฌธ์ œ๋กœ ์ธํ•ด ์ƒ์„ฑ๋œ ๊ฐ€์ƒ ๋ฐ์ดํ„ฐ์ž…๋‹ˆ๋‹ค. ์‹ค์ œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์•„๋‹˜์„ ์ฐธ๊ณ ํ•˜์„ธ์š”. ์ƒ์„ฑ ์‹œ๊ฐ„: {current_time}",
169
+ "displayed_link": "example.com/article1"
170
+ },
171
+ {
172
+ "title": f"{query} ๊ด€๋ จ ์—ฐ๊ตฌ ๋™ํ–ฅ",
173
+ "link": "https://example.org/research",
174
+ "snippet": "์ด๊ฒƒ์€ API ์—ฐ๊ฒฐ ๋ฌธ์ œ๋กœ ์ธํ•œ ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค. ์‹ค์ œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ณด์—ฌ๋“œ๋ฆฌ์ง€ ๋ชปํ•ด ์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๋Œ€์‹  AI์˜ ๊ธฐ์กด ์ง€์‹์„ ํ™œ์šฉํ•˜์—ฌ ๋‹ต๋ณ€๋“œ๋ฆฌ๊ฒ ์Šต๋‹ˆ๋‹ค.",
175
+ "displayed_link": "example.org/research"
176
+ }
177
+ ]
178
+
179
+ summary_lines = []
180
+ for idx, item in enumerate(mock_results, start=1):
181
+ title = item.get("title", "No title")
182
+ link = item.get("link", "#")
183
+ snippet = item.get("snippet", "No description")
184
+ displayed_link = item.get("displayed_link", link)
185
+
186
+ summary_lines.append(
187
+ f"### Result {idx}: {title}\n\n"
188
+ f"{snippet}\n\n"
189
+ f"**์ถœ์ฒ˜**: [{displayed_link}]({link})\n\n"
190
+ f"---\n"
191
+ )
192
+
193
+ notice = """
194
+ # ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ (API ์—ฐ๊ฒฐ ๋ฌธ์ œ๋กœ ์ธํ•ด ์ƒ์„ฑ๋จ)
195
+ ์•„๋ž˜๋Š” API ์—ฐ๊ฒฐ ๋ฌธ์ œ๋กœ ์ธํ•ด ์ƒ์„ฑ๋œ ๊ฐ€์ƒ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค. ์‹ค์ œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์•„๋‹˜์„ ์ฐธ๊ณ ํ•˜์„ธ์š”.
196
+ ๋Œ€์‹  AI์˜ ๊ธฐ์กด ์ง€์‹์„ ํ™œ์šฉํ•˜์—ฌ ์ตœ๋Œ€ํ•œ ์ •ํ™•ํ•œ ๋‹ต๋ณ€์„ ๋“œ๋ฆฌ๊ฒ ์Šต๋‹ˆ๋‹ค.
197
+ """
198
+
199
+ return notice + "\n".join(summary_lines)
200
+
201
  # ์›น ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
202
  def do_web_search(query: str) -> str:
203
  """
 
212
  "q": query,
213
  "domain": "google.com",
214
  "serp_type": "web", # ๊ธฐ๋ณธ ์›น ๊ฒ€์ƒ‰
215
+ "device": "desktop",
216
+ "lang": "ko", # ํ•œ๊ตญ์–ด ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์œ„ํ•ด ๋ณ€๊ฒฝ
217
+ "num": "10" # ๊ฒฐ๊ณผ ์ˆ˜๋ฅผ 10๊ฐœ๋กœ ์ค„์ž„ (๋น ๋ฅธ ์‘๋‹ต์„ ์œ„ํ•ด)
218
  }
219
 
220
  headers = {
 
224
  logging.info(f"SerpHouse API ํ˜ธ์ถœ ์ค‘... ๊ฒ€์ƒ‰์–ด: {query}")
225
  logging.info(f"์š”์ฒญ URL: {url} - ํŒŒ๋ผ๋ฏธํ„ฐ: {params}")
226
 
227
+ # Mock ์‘๋‹ต (์‹ค์ œ API ์—ฐ๊ฒฐ์ด ์–ด๋ ค์šธ ๊ฒฝ์šฐ)
228
+ if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
229
+ logging.warning("API ํ‚ค๊ฐ€ ์—†๊ฑฐ๋‚˜ Mock ๋ชจ๋“œ์ž…๋‹ˆ๋‹ค. ๋ชจ์˜ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
230
+ return generate_mock_search_results(query)
231
+
232
+ # ํƒ€์ž„์•„์›ƒ ์ค„์ž„ (30์ดˆ)
233
+ response = requests.get(url, headers=headers, params=params, timeout=30)
234
  response.raise_for_status()
235
 
236
  logging.info(f"SerpHouse API ์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
 
255
 
256
  if not organic:
257
  logging.warning("์‘๋‹ต์—์„œ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
258
+ logging.debug(f"์‘๋‹ต ๊ตฌ์กฐ: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
259
  if isinstance(results, dict):
260
  logging.debug(f"results ๊ตฌ์กฐ: {list(results.keys())}")
261
+ return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๋Œ€์‹  ๊ธฐ์กด ์ง€์‹์„ ํ™œ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค."
262
 
263
  # ๊ฒฐ๊ณผ ์ˆ˜ ์ œํ•œ ๋ฐ ์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ตœ์ ํ™”
264
+ max_results = min(10, len(organic))
265
  limited_organic = organic[:max_results]
266
 
267
  # ๊ฒฐ๊ณผ ํ˜•์‹ ๊ฐœ์„  - ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅํ•˜์—ฌ ๊ฐ€๋…์„ฑ ํ–ฅ์ƒ
 
294
  logging.info(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐœ ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
295
  return search_results
296
 
297
+ except requests.exceptions.Timeout:
298
+ logging.error("Web search timed out")
299
+ return "Web search timed out. ๊ฒ€์ƒ‰ ์‹œ๊ฐ„์ด ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ๊ธฐ์กด ์ง€์‹์„ ํ™œ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค."
300
  except Exception as e:
301
  logging.error(f"Web search failed: {e}")
302
+ return f"Web search failed: {str(e)}. ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๊ธฐ์กด ์ง€์‹์„ ํ™œ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค."
303
 
304
  def chatbot_interface():
305
  st.title("Ginigen Blog")
 
369
  latest_blog = msg["content"]
370
 
371
  # ํƒ€์ดํ‹€ ์ถ”์ถœ ์‹œ๋„ (์ฒซ ๋ฒˆ์งธ ์ œ๋ชฉ ํƒœ๊ทธ ์‚ฌ์šฉ)
 
372
  title_match = re.search(r'# (.*?)(\n|$)', latest_blog)
373
  if title_match:
374
  latest_blog_title = title_match.group(1).strip()
 
450
  system_prompt = get_system_prompt()
451
  if st.session_state.use_web_search:
452
  with st.spinner("์›น์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ๊ฒ€์ƒ‰ ์ค‘..."):
453
+ try:
454
+ search_query = extract_keywords(prompt, top_k=5)
455
+ search_results = do_web_search(search_query)
456
+
457
+ if "search failed" not in search_results.lower() and "timed out" not in search_results.lower():
458
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์— ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ถ”๊ฐ€
459
+ system_prompt += f"\n\n๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ:\n{search_results}\n"
460
+ st.success(f"๊ฒ€์ƒ‰ ์™„๋ฃŒ: '{search_query}'์— ๋Œ€ํ•œ ์ •๋ณด๋ฅผ ์ˆ˜์ง‘ํ–ˆ์Šต๋‹ˆ๋‹ค.")
461
+ else:
462
+ st.warning("์›น ๊ฒ€์ƒ‰์„ ์ง„ํ–‰ํ–ˆ์œผ๋‚˜ ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š”๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค. ๊ธฐ์กด ์ง€์‹์„ ํ™œ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
463
+ logging.warning(f"์›น ๊ฒ€์ƒ‰ ์‹คํŒจ ๋˜๋Š” ํƒ€์ž„์•„์›ƒ: {search_results}")
464
+ except Exception as e:
465
+ st.error(f"์›น ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}")
466
+ logging.error(f"์›น ๊ฒ€์ƒ‰ ์˜ค๋ฅ˜: {str(e)}")
467
 
468
  # API ํ˜ธ์ถœ
469
  with client.messages.stream(