JUNGU commited on
Commit
dad4fc1
ยท
verified ยท
1 Parent(s): b066505

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +189 -209
src/streamlit_app.py CHANGED
@@ -11,28 +11,13 @@ from collections import Counter
11
  import json
12
  import os
13
  from datetime import datetime, timedelta
14
- from openai import OpenAI # ์ƒˆ๋กœ์šด import ๋ฐฉ์‹
15
  from dotenv import load_dotenv
16
  import traceback
17
  import plotly.graph_objects as go
18
  import schedule
19
  import threading
20
  import matplotlib.pyplot as plt
21
- import kss # KoNLPy ๋Œ€์‹  KSS ์‚ฌ์šฉ
22
- from PIL import Image
23
- import base64
24
- from io import BytesIO
25
- import logging
26
-
27
- # ๋กœ๊น… ์„ค์ •
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format='%(asctime)s - %(levelname)s - %(message)s',
31
- handlers=[
32
- logging.StreamHandler(),
33
- logging.FileHandler('/tmp/crawler.log')
34
- ]
35
- )
36
 
37
  # ์›Œ๋“œํด๋ผ์šฐ๋“œ ์ถ”๊ฐ€
38
  try:
@@ -55,52 +40,41 @@ class SchedulerState:
55
  global_scheduler_state = SchedulerState()
56
 
57
  # API ํ‚ค ๊ด€๋ฆฌ๋ฅผ ์œ„ํ•œ ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”
58
- if 'openai_client' not in st.session_state:
59
- st.session_state.openai_client = None
60
 
61
  # ์—ฌ๋Ÿฌ ๋ฐฉ๋ฒ•์œผ๋กœ API ํ‚ค ๋กœ๋“œ ์‹œ๋„
62
  load_dotenv() # .env ํŒŒ์ผ์—์„œ ๋กœ๋“œ ์‹œ๋„
63
 
64
- # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”๋ฅผ ์œ„ํ•œ ํ•จ์ˆ˜
65
- def init_openai_client(api_key=None):
66
- try:
67
- if api_key:
68
- client = OpenAI(api_key=api_key)
69
- # ๊ฐ„๋‹จํ•œ API ํ‚ค ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
70
- client.models.list() # API ํ‚ค๊ฐ€ ์œ ํšจํ•œ์ง€ ํ…Œ์ŠคํŠธ
71
- return client
72
- return None
73
- except Exception as e:
74
- st.error(f"API ํ‚ค ์ดˆ๊ธฐํ™” ์˜ค๋ฅ˜: {str(e)}")
75
- return None
76
-
77
  # 1. ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ํ™•์ธ
78
- api_key = os.environ.get('OPENAI_API_KEY')
79
- if api_key:
80
- st.session_state.openai_client = init_openai_client(api_key)
81
 
82
- # 2. Streamlit secrets์—์„œ API ํ‚ค ํ™•์ธ
83
- if not st.session_state.openai_client:
84
  try:
85
  if 'OPENAI_API_KEY' in st.secrets:
86
- st.session_state.openai_client = init_openai_client(st.secrets['OPENAI_API_KEY'])
 
87
  except Exception as e:
88
  pass # secrets ํŒŒ์ผ์ด ์—†์–ด๋„ ์˜ค๋ฅ˜ ๋ฐœ์ƒํ•˜์ง€ ์•Š์Œ
89
 
90
- # NLTK ๋ฐ์ดํ„ฐ ๊ฒฝ๋กœ ์„ค์ • - ํ˜„์žฌ ์›Œํฌ์ŠคํŽ˜์ด์Šค์˜ nltk_data ์‚ฌ์šฉ
91
- nltk_data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'nltk_data')
92
- nltk.data.path.insert(0, nltk_data_path)
 
93
 
94
- # ํ•„์š”ํ•œ NLTK ๋ฐ์ดํ„ฐ ํ™•์ธ
95
  try:
96
  nltk.data.find('tokenizers/punkt')
97
  except LookupError:
98
- nltk.download('punkt', download_dir=nltk_data_path)
99
 
100
  try:
101
  nltk.data.find('corpora/stopwords')
102
  except LookupError:
103
- nltk.download('stopwords', download_dir=nltk_data_path)
104
 
105
  # ํŽ˜์ด์ง€ ์„ค์ •
106
  st.set_page_config(page_title="๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ", page_icon="๐Ÿ“ฐ", layout="wide")
@@ -116,12 +90,9 @@ with st.sidebar:
116
  st.divider()
117
  api_key = st.text_input("OpenAI API ํ‚ค ์ž…๋ ฅ", type="password")
118
  if api_key:
119
- client = init_openai_client(api_key)
120
- if client:
121
- st.session_state.openai_client = client
122
- st.success("API ํ‚ค๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
123
- else:
124
- st.error("์œ ํšจํ•˜์ง€ ์•Š์€ API ํ‚ค์ž…๋‹ˆ๋‹ค.")
125
 
126
  # ์ €์žฅ๋œ ๊ธฐ์‚ฌ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ํ•จ์ˆ˜
127
  def load_saved_articles():
@@ -141,21 +112,16 @@ def crawl_naver_news(keyword, num_articles=5):
141
  """
142
  ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ์ˆ˜์ง‘ํ•˜๋Š” ํ•จ์ˆ˜
143
  """
144
- logging.info(f"ํฌ๋กค๋ง ์‹œ์ž‘: ํ‚ค์›Œ๋“œ={keyword}, ๊ธฐ์‚ฌ ์ˆ˜={num_articles}")
145
  url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
146
  results = []
147
 
148
  try:
149
  # ํŽ˜์ด์ง€ ์š”์ฒญ
150
- logging.info(f"์š”์ฒญ URL: {url}")
151
  response = requests.get(url)
152
- logging.info(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
153
-
154
  soup = BeautifulSoup(response.text, 'html.parser')
155
 
156
  # ๋‰ด์Šค ์•„์ดํ…œ ์ฐพ๊ธฐ
157
  news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
158
- logging.info(f"์ฐพ์€ ๋‰ด์Šค ์•„์ดํ…œ ์ˆ˜: {len(news_items)}")
159
 
160
  # ๊ฐ ๋‰ด์Šค ์•„์ดํ…œ์—์„œ ์ •๋ณด ์ถ”์ถœ
161
  for i, item in enumerate(news_items):
@@ -190,68 +156,48 @@ def crawl_naver_news(keyword, num_articles=5):
190
  'description': description,
191
  'source': source,
192
  'date': date,
193
- 'content': ""
194
  })
195
 
196
- logging.info(f"๊ธฐ์‚ฌ ์ถ”์ถœ ์„ฑ๊ณต: {title}")
197
-
198
  except Exception as e:
199
- logging.error(f"๊ธฐ์‚ฌ ์ •๋ณด ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
200
  continue
201
 
202
  except Exception as e:
203
- logging.error(f"ํŽ˜์ด์ง€ ์š”์ฒญ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
204
 
205
- logging.info(f"ํฌ๋กค๋ง ์™„๋ฃŒ: {len(results)}๊ฐœ ๊ธฐ์‚ฌ ์ˆ˜์ง‘")
206
  return results
207
 
208
  # ๊ธฐ์‚ฌ ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ
209
  def get_article_content(url):
210
- logging.info(f"๊ธฐ์‚ฌ ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹œ์ž‘: {url}")
211
  try:
212
  response = requests.get(url, timeout=5)
213
- logging.info(f"์›๋ฌธ ์š”์ฒญ ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
214
-
215
  soup = BeautifulSoup(response.text, 'html.parser')
216
 
217
  # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ณธ๋ฌธ ์ฐพ๊ธฐ
218
  content = soup.select_one('#dic_area')
219
  if content:
220
  text = content.text.strip()
221
- text = re.sub(r'\s+', ' ', text)
222
- logging.info("๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ณธ๋ฌธ ์ถ”์ถœ ์„ฑ๊ณต")
223
  return text
224
 
225
- # ๋‹ค๋ฅธ ๋‰ด์Šค ์‚ฌ์ดํŠธ ๋ณธ๋ฌธ ์ฐพ๊ธฐ
226
  content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
227
  if content:
228
  text = content.text.strip()
229
  text = re.sub(r'\s+', ' ', text)
230
- logging.info("์ผ๋ฐ˜ ๋‰ด์Šค ๋ณธ๋ฌธ ์ถ”์ถœ ์„ฑ๊ณต")
231
  return text
232
 
233
- logging.warning("๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
234
  return "๋ณธ๋ฌธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
235
  except Exception as e:
236
- logging.error(f"์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {str(e)}", exc_info=True)
237
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
238
 
239
- # NLTK๋ฅผ ์ด์šฉํ•œ ํ‚ค์›Œ๋“œ ๋ถ„์„ (KSS ํ™œ์šฉ)
240
  def analyze_keywords(text, top_n=10):
241
- # ํ•œ๊ตญ์–ด ๋ถˆ์šฉ์–ด ๋ชฉ๋ก
242
  korean_stopwords = ['์ด', '๊ทธ', '์ €', '๊ฒƒ', '๋ฐ', '๋“ฑ', '๋ฅผ', '์„', '์—', '์—์„œ', '์˜', '์œผ๋กœ', '๋กœ']
243
 
244
- # KSS๋ฅผ ์‚ฌ์šฉํ•œ ๋ฌธ์žฅ ๋ถ„๋ฆฌ ๋ฐ ํ† ํฐํ™”
245
- try:
246
- sentences = kss.split_sentences(text)
247
- tokens = []
248
- for sentence in sentences:
249
- # ๊ฐ„๋‹จํ•œ ํ† ํฐํ™” (๊ณต๋ฐฑ ๊ธฐ์ค€)
250
- tokens.extend(sentence.split())
251
- except:
252
- # KSS ์‹คํŒจ์‹œ ๊ธฐ๋ณธ ํ† ํฐํ™”
253
- tokens = text.split()
254
-
255
  tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
256
 
257
  word_count = Counter(tokens)
@@ -379,55 +325,36 @@ def analyze_news_content(news_df):
379
  results['top_keywords'] = []
380
  return results
381
 
382
- # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑ (์ƒˆ๋กœ์šด ๋ฒ„์ „ ๋ฐฉ์‹)
383
  def generate_article(original_content, prompt_text):
384
  try:
385
- if not st.session_state.openai_client:
386
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
387
 
388
- response = st.session_state.openai_client.chat.completions.create(
389
- model="gpt-4", # ๋˜๋Š” ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์ ์ ˆํ•œ ๋ชจ๋ธ
390
  messages=[
391
  {"role": "system", "content": "๋‹น์‹ ์€ ์ „๋ฌธ์ ์ธ ๋‰ด์Šค ๊ธฐ์ž์ž…๋‹ˆ๋‹ค. ์ฃผ์–ด์ง„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ƒˆ๋กœ์šด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."},
392
  {"role": "user", "content": f"๋‹ค์Œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ {prompt_text}\n\n{original_content[:1000]}"}
393
  ],
394
  max_tokens=2000
395
  )
396
- return response.choices[0].message.content
397
  except Exception as e:
398
  return f"๊ธฐ์‚ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
399
 
400
- # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ด๋ฏธ์ง€ ์ƒ์„ฑ (์ƒˆ๋กœ์šด ๋ฒ„์ „ ๋ฐฉ์‹)
401
  def generate_image(prompt):
402
  try:
403
- if not st.session_state.openai_client:
404
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
405
 
406
- # GPT Image 1 ๋ชจ๋ธ๋กœ ์ด๋ฏธ์ง€ ์ƒ์„ฑ
407
- result = st.session_state.openai_client.images.generate(
408
- model="gpt-image-1", # ์ƒˆ๋กœ์šด ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
409
  prompt=prompt,
 
410
  size="1024x1024"
411
  )
412
-
413
- # base64 ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ๋ฅผ ๋””์ฝ”๋”ฉ
414
- image_base64 = result.data[0].b64_json
415
- image_bytes = base64.b64decode(image_base64)
416
-
417
- # BytesIO ๊ฐ์ฒด๋กœ ๋ณ€ํ™˜
418
- image = BytesIO(image_bytes)
419
-
420
- # PIL Image๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ํฌ๊ธฐ ์กฐ์ • (์„ ํƒ์‚ฌํ•ญ)
421
- pil_image = Image.open(image)
422
- pil_image = pil_image.resize((800, 800), Image.LANCZOS) # ํฌ๊ธฐ ์กฐ์ •
423
-
424
- # ๋‹ค์‹œ BytesIO๋กœ ๋ณ€ํ™˜
425
- output = BytesIO()
426
- pil_image.save(output, format="JPEG", quality=80, optimize=True)
427
- output.seek(0)
428
-
429
- return output
430
-
431
  except Exception as e:
432
  return f"์ด๋ฏธ์ง€ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
433
 
@@ -449,14 +376,11 @@ def run_scheduled_task():
449
  traceback.print_exc()
450
 
451
  def perform_news_task(task_type, keyword, num_articles, file_prefix):
452
- logging.info(f"์Šค์ผ€์ค„๋Ÿฌ ์ž‘์—… ์‹œ์ž‘: {task_type}, ํ‚ค์›Œ๋“œ={keyword}")
453
  try:
454
  articles = crawl_naver_news(keyword, num_articles)
455
- logging.info(f"์ˆ˜์ง‘๋œ ๊ธฐ์‚ฌ ์ˆ˜: {len(articles)}")
456
 
457
  # ๊ธฐ์‚ฌ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ
458
- for i, article in enumerate(articles):
459
- logging.info(f"๊ธฐ์‚ฌ {i+1}/{len(articles)} ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ: {article['title']}")
460
  article['content'] = get_article_content(article['link'])
461
  time.sleep(0.5) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
462
 
@@ -468,12 +392,10 @@ def perform_news_task(task_type, keyword, num_articles, file_prefix):
468
  with open(filename, 'w', encoding='utf-8') as f:
469
  json.dump(articles, f, ensure_ascii=False, indent=2)
470
 
471
- logging.info(f"๊ฒฐ๊ณผ ์ €์žฅ ์™„๋ฃŒ: {filename}")
472
-
473
  global_scheduler_state.last_run = datetime.now()
474
  print(f"{datetime.now()} - {task_type} ๋‰ด์Šค ๊ธฐ์‚ฌ ์ˆ˜์ง‘ ์™„๋ฃŒ: {keyword}")
475
 
476
- # ์ „์—ญ ์ƒํƒœ์— ์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅ
477
  result_item = {
478
  'task_type': task_type,
479
  'keyword': keyword,
@@ -484,7 +406,7 @@ def perform_news_task(task_type, keyword, num_articles, file_prefix):
484
  global_scheduler_state.scheduled_results.append(result_item)
485
 
486
  except Exception as e:
487
- logging.error(f"์ž‘์—… ์‹คํ–‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
488
  traceback.print_exc()
489
 
490
  def start_scheduler(daily_tasks, interval_tasks):
@@ -641,25 +563,9 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
641
  with keyword_tab1:
642
  keywords = analyze_keywords(selected_article['content'])
643
 
644
- # Plotly๋ฅผ ์‚ฌ์šฉํ•œ ์‹œ๊ฐํ™”
645
  df = pd.DataFrame(keywords, columns=['๋‹จ์–ด', '๋นˆ๋„์ˆ˜'])
646
- fig = go.Figure(data=[
647
- go.Bar(
648
- x=df['๋‹จ์–ด'],
649
- y=df['๋นˆ๋„์ˆ˜'],
650
- marker_color='rgb(55, 83, 109)'
651
- )
652
- ])
653
-
654
- fig.update_layout(
655
- title='ํ‚ค์›Œ๋“œ ๋นˆ๋„ ๋ถ„์„',
656
- xaxis_title='ํ‚ค์›Œ๋“œ',
657
- yaxis_title='๋นˆ๋„์ˆ˜',
658
- height=500,
659
- margin=dict(l=50, r=50, t=80, b=50)
660
- )
661
-
662
- st.plotly_chart(fig, use_container_width=True)
663
 
664
  st.write("**์ฃผ์š” ํ‚ค์›Œ๋“œ:**")
665
  for word, count in keywords:
@@ -689,14 +595,7 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
689
  # ํ…์ŠคํŠธ ํ†ต๊ณ„ ๊ณ„์‚ฐ
690
  word_count = len(re.findall(r'\b\w+\b', content))
691
  char_count = len(content)
692
- try:
693
- # KSS๋กœ ๋ฌธ์žฅ ๋ถ„๋ฆฌ
694
- sentences = kss.split_sentences(content)
695
- sentence_count = len(sentences)
696
- except:
697
- # KSS ์‹คํŒจ์‹œ ๊ธฐ๋ณธ ๋ฌธ์žฅ ๋ถ„๋ฆฌ
698
- sentence_count = len(re.split(r'[.!?]+', content))
699
-
700
  avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
701
  avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
702
 
@@ -716,50 +615,136 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
716
  with col2:
717
  st.metric("ํ‰๊ท  ๋ฌธ์žฅ ๊ธธ์ด", f"{avg_sentence_length:.1f}๋‹จ์–ด")
718
 
719
- # ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜
720
  complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
721
  st.progress(complexity_score / 10)
722
  st.write(f"ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜: {complexity_score:.1f}/10")
723
-
724
- # ํ’ˆ์‚ฌ ๋ถ„์„ ๋ถ€๋ถ„ ์ œ๊ฑฐ (KoNLPy ์˜์กด์„ฑ ์ œ๊ฑฐ)
725
- st.info("์ƒ์„ธ ํ’ˆ์‚ฌ ๋ถ„์„์€ ํ˜„์žฌ ์ง€์›๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
 
727
  elif analysis_type == "๊ฐ์ • ๋ถ„์„":
728
  if st.button("๊ฐ์ • ๋ถ„์„ํ•˜๊ธฐ"):
729
- if st.session_state.openai_client:
730
  with st.spinner("๊ธฐ์‚ฌ์˜ ๊ฐ์ •์„ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
731
  try:
732
- response = st.session_state.openai_client.chat.completions.create(
733
- model="gpt-4",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  messages=[
735
- {"role": "system", "content": """๋‹น์‹ ์€ ํ…์ŠคํŠธ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
736
- ๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๊ณ , ๋ฐ˜๋“œ์‹œ ์•„๋ž˜ ํ˜•์‹์˜ JSON์œผ๋กœ ์‘๋‹ตํ•ด์ฃผ์„ธ์š”:
737
- {
738
- "sentiment": "๊ธ์ •์ /๋ถ€์ •์ /์ค‘๋ฆฝ์ ",
739
- "reason": "์ด์œ  ์„ค๋ช…...",
740
- "keywords": [
741
- {"word": "ํ‚ค์›Œ๋“œ1", "score": 8},
742
- {"word": "ํ‚ค์›Œ๋“œ2", "score": 7}
743
- ]
744
- }"""},
745
- {"role": "user", "content": f"๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ๋ถ„์„ํ•ด ์ฃผ์„ธ์š”:\n\n์ œ๋ชฉ: {selected_article['title']}\n\n๋‚ด์šฉ: {selected_article['content'][:1500]}"}
746
  ],
747
- max_tokens=800,
748
- response_format={ "type": "json_object" } # JSON ์‘๋‹ต ํ˜•์‹ ๊ฐ•์ œ
749
  )
750
 
751
- # ์‘๋‹ต ๋‚ด์šฉ ํ™•์ธ ๋ฐ ๋””๋ฒ„๊น…
752
- content = response.choices[0].message.content
753
- logging.info(f"API ์‘๋‹ต: {content}")
754
-
755
- # JSON ํŒŒ์‹ฑ
756
- try:
757
- analysis_result = json.loads(content)
758
- except json.JSONDecodeError as e:
759
- logging.error(f"JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {str(e)}")
760
- logging.error(f"ํŒŒ์‹ฑ ์‹œ๋„ํ•œ ๋‚ด์šฉ: {content}")
761
- st.error("API ์‘๋‹ต์„ ํŒŒ์‹ฑํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์‘๋‹ต ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
762
- st.stop() # return ๋Œ€์‹  st.stop() ์‚ฌ์šฉ
763
 
764
  # ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
765
  st.subheader("๊ฐ์ • ๋ถ„์„ ๊ฒฐ๊ณผ")
@@ -908,9 +893,9 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
908
 
909
  except Exception as e:
910
  st.error(f"๊ฐ์ • ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
911
- st.error(traceback.format_exc())
912
  else:
913
- st.warning("OpenAI API ํ‚ค๋ฅผ ์‚ฌ์ด๋“œ๋ฐ”์—์„œ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
914
 
915
  elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
916
  st.header("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ")
@@ -945,7 +930,7 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
945
  generate_image_too = st.checkbox("๊ธฐ์‚ฌ ์ƒ์„ฑ ํ›„ ์ด๋ฏธ์ง€๋„ ํ•จ๊ป˜ ์ƒ์„ฑํ•˜๊ธฐ", value=True)
946
 
947
  if st.button("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ"):
948
- if st.session_state.openai_client:
949
  with st.spinner("๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
950
  new_article = generate_article(selected_article['content'], prompt_text)
951
 
@@ -955,6 +940,7 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
955
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑํ•˜๊ธฐ (์˜ต์…˜์ด ์„ ํƒ๋œ ๊ฒฝ์šฐ)
956
  if generate_image_too:
957
  with st.spinner("๊ธฐ์‚ฌ ๊ด€๋ จ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
 
958
  image_prompt = f"""์‹ ๋ฌธ๊ธฐ์‚ฌ ์ œ๋ชฉ "{selected_article['title']}" ์„ ๋ณด๊ณ  ์ด๋ฏธ์ง€๋ฅผ ๋งŒ๋“ค์–ด์ค˜
959
  ์ด๋ฏธ์ง€์—๋Š” ๋‹ค์Œ ์š”์†Œ๊ฐ€ ํฌํ•จ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
960
  - ๊ธฐ์‚ฌ๋ฅผ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ๋„์‹
@@ -963,13 +949,13 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
963
  """
964
 
965
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑ
966
- image = generate_image(image_prompt)
967
 
968
- if isinstance(image, BytesIO):
969
  st.subheader("์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€:")
970
- st.image(image, use_column_width=True)
971
  else:
972
- st.error(image)
973
 
974
  # ์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ ์˜ต์…˜
975
  if st.button("์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ"):
@@ -987,6 +973,8 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
987
  else:
988
  st.warning("OpenAI API ํ‚ค๋ฅผ ์‚ฌ์ด๋“œ๋ฐ”์—์„œ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
989
 
 
 
990
  elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
991
  st.header("๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ")
992
 
@@ -1073,30 +1061,6 @@ elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
1073
  with tab3:
1074
  st.subheader("์Šค์ผ€์ค„๋Ÿฌ ์ œ์–ด ๋ฐ ์ƒํƒœ")
1075
 
1076
- # ๋กœ๊ทธ ๋ทฐ์–ด๋ฅผ ์ƒ๋‹จ์— ๋ฐฐ์น˜
1077
- st.subheader("์‹ค์‹œ๊ฐ„ ๋กœ๊ทธ")
1078
- log_container = st.empty()
1079
-
1080
- def update_logs():
1081
- try:
1082
- with open('/tmp/crawler.log', 'r') as f:
1083
- logs = f.readlines()
1084
- return ''.join(logs[-100:]) # ์ตœ๊ทผ 100์ค„๋งŒ ํ‘œ์‹œ
1085
- except Exception as e:
1086
- return f"๋กœ๊ทธ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {str(e)}"
1087
-
1088
- # ๋กœ๊ทธ ์ž๋™ ์—…๋ฐ์ดํŠธ
1089
- if st.checkbox("๋กœ๊ทธ ์ž๋™ ์—…๋ฐ์ดํŠธ", value=True):
1090
- log_content = update_logs()
1091
- log_container.text_area("์ตœ๊ทผ ๋กœ๊ทธ", value=log_content, height=400)
1092
- else:
1093
- if st.button("๋กœ๊ทธ ์ƒˆ๋กœ๊ณ ์นจ"):
1094
- log_content = update_logs()
1095
- log_container.text_area("์ตœ๊ทผ ๋กœ๊ทธ", value=log_content, height=400)
1096
-
1097
- st.divider()
1098
-
1099
- # ์Šค์ผ€์ค„๋Ÿฌ ์ œ์–ด
1100
  col1, col2 = st.columns(2)
1101
 
1102
  with col1:
@@ -1180,4 +1144,20 @@ elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
1180
 
1181
  # ํ‘ธํ„ฐ
1182
  st.markdown("---")
1183
- st.markdown("ยฉ ๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ @conanssam")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import json
12
  import os
13
  from datetime import datetime, timedelta
14
+ import openai # ๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹ ์‚ฌ์šฉ
15
  from dotenv import load_dotenv
16
  import traceback
17
  import plotly.graph_objects as go
18
  import schedule
19
  import threading
20
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # ์›Œ๋“œํด๋ผ์šฐ๋“œ ์ถ”๊ฐ€
23
  try:
 
40
  global_scheduler_state = SchedulerState()
41
 
42
  # API ํ‚ค ๊ด€๋ฆฌ๋ฅผ ์œ„ํ•œ ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”
43
+ if 'openai_api_key' not in st.session_state:
44
+ st.session_state.openai_api_key = None
45
 
46
  # ์—ฌ๋Ÿฌ ๋ฐฉ๋ฒ•์œผ๋กœ API ํ‚ค ๋กœ๋“œ ์‹œ๋„
47
  load_dotenv() # .env ํŒŒ์ผ์—์„œ ๋กœ๋“œ ์‹œ๋„
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # 1. ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ํ™•์ธ
50
+ if os.environ.get('OPENAI_API_KEY'):
51
+ st.session_state.openai_api_key = os.environ.get('OPENAI_API_KEY')
52
+ openai.api_key = st.session_state.openai_api_key
53
 
54
+ # 2. Streamlit secrets์—์„œ API ํ‚ค ํ™•์ธ (try-except๋กœ ์˜ค๋ฅ˜ ๋ฐฉ์ง€)
55
+ if not st.session_state.openai_api_key:
56
  try:
57
  if 'OPENAI_API_KEY' in st.secrets:
58
+ st.session_state.openai_api_key = st.secrets['OPENAI_API_KEY']
59
+ openai.api_key = st.session_state.openai_api_key
60
  except Exception as e:
61
  pass # secrets ํŒŒ์ผ์ด ์—†์–ด๋„ ์˜ค๋ฅ˜ ๋ฐœ์ƒํ•˜์ง€ ์•Š์Œ
62
 
63
+ # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•˜๋„๋ก NLTK ๋ฐ์ดํ„ฐ ๊ฒฝ๋กœ ์„ค์ •
64
+ nltk_data_dir = '/tmp/nltk_data'
65
+ os.makedirs(nltk_data_dir, exist_ok=True)
66
+ nltk.data.path.insert(0, nltk_data_dir) # ์ด ๊ฒฝ๋กœ๋ฅผ ์šฐ์„  ๊ฒ€์ƒ‰ํ•˜๋„๋ก ์„ค์ •
67
 
68
+ # ํ•„์š”ํ•œ NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
69
  try:
70
  nltk.data.find('tokenizers/punkt')
71
  except LookupError:
72
+ nltk.download('punkt', download_dir=nltk_data_dir)
73
 
74
  try:
75
  nltk.data.find('corpora/stopwords')
76
  except LookupError:
77
+ nltk.download('stopwords', download_dir=nltk_data_dir)
78
 
79
  # ํŽ˜์ด์ง€ ์„ค์ •
80
  st.set_page_config(page_title="๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ", page_icon="๐Ÿ“ฐ", layout="wide")
 
90
  st.divider()
91
  api_key = st.text_input("OpenAI API ํ‚ค ์ž…๋ ฅ", type="password")
92
  if api_key:
93
+ st.session_state.openai_api_key = api_key
94
+ openai.api_key = api_key
95
+ st.success("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
 
 
 
96
 
97
  # ์ €์žฅ๋œ ๊ธฐ์‚ฌ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ํ•จ์ˆ˜
98
  def load_saved_articles():
 
112
  """
113
  ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ์ˆ˜์ง‘ํ•˜๋Š” ํ•จ์ˆ˜
114
  """
 
115
  url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
116
  results = []
117
 
118
  try:
119
  # ํŽ˜์ด์ง€ ์š”์ฒญ
 
120
  response = requests.get(url)
 
 
121
  soup = BeautifulSoup(response.text, 'html.parser')
122
 
123
  # ๋‰ด์Šค ์•„์ดํ…œ ์ฐพ๊ธฐ
124
  news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
 
125
 
126
  # ๊ฐ ๋‰ด์Šค ์•„์ดํ…œ์—์„œ ์ •๋ณด ์ถ”์ถœ
127
  for i, item in enumerate(news_items):
 
156
  'description': description,
157
  'source': source,
158
  'date': date,
159
+ 'content': "" # ๋‚˜์ค‘์— ์›๋ฌธ ๋‚ด์šฉ์„ ์ €์žฅํ•  ํ•„๋“œ
160
  })
161
 
 
 
162
  except Exception as e:
163
+ st.error(f"๊ธฐ์‚ฌ ์ •๋ณด ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
164
  continue
165
 
166
  except Exception as e:
167
+ st.error(f"ํŽ˜์ด์ง€ ์š”์ฒญ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
168
 
 
169
  return results
170
 
171
  # ๊ธฐ์‚ฌ ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ
172
  def get_article_content(url):
 
173
  try:
174
  response = requests.get(url, timeout=5)
 
 
175
  soup = BeautifulSoup(response.text, 'html.parser')
176
 
177
  # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ณธ๋ฌธ ์ฐพ๊ธฐ
178
  content = soup.select_one('#dic_area')
179
  if content:
180
  text = content.text.strip()
181
+ text = re.sub(r'\s+', ' ', text) # ์—ฌ๋Ÿฌ ๊ณต๋ฐฑ ์ œ๊ฑฐ
 
182
  return text
183
 
184
+ # ๋‹ค๋ฅธ ๋‰ด์Šค ์‚ฌ์ดํŠธ ๋ณธ๋ฌธ ์ฐพ๊ธฐ (์—ฌ๋Ÿฌ ์‚ฌ์ดํŠธ ๋Œ€์‘ ํ•„์š”)
185
  content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
186
  if content:
187
  text = content.text.strip()
188
  text = re.sub(r'\s+', ' ', text)
 
189
  return text
190
 
 
191
  return "๋ณธ๋ฌธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
192
  except Exception as e:
 
193
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
194
 
195
+ # NLTK๋ฅผ ์ด์šฉํ•œ ํ‚ค์›Œ๋“œ ๋ถ„์„
196
  def analyze_keywords(text, top_n=10):
197
+ # ํ•œ๊ตญ์–ด ๋ถˆ์šฉ์–ด ๋ชฉ๋ก (์ง์ ‘ ์ •์˜ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
198
  korean_stopwords = ['์ด', '๊ทธ', '์ €', '๊ฒƒ', '๋ฐ', '๋“ฑ', '๋ฅผ', '์„', '์—', '์—์„œ', '์˜', '์œผ๋กœ', '๋กœ']
199
 
200
+ tokens = word_tokenize(text)
 
 
 
 
 
 
 
 
 
 
201
  tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
202
 
203
  word_count = Counter(tokens)
 
325
  results['top_keywords'] = []
326
  return results
327
 
328
+ # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑ (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
329
  def generate_article(original_content, prompt_text):
330
  try:
331
+ if not st.session_state.openai_api_key:
332
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
333
 
334
+ response = openai.ChatCompletion.create(
335
+ model="gpt-4.1-mini",
336
  messages=[
337
  {"role": "system", "content": "๋‹น์‹ ์€ ์ „๋ฌธ์ ์ธ ๋‰ด์Šค ๊ธฐ์ž์ž…๋‹ˆ๋‹ค. ์ฃผ์–ด์ง„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ƒˆ๋กœ์šด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."},
338
  {"role": "user", "content": f"๋‹ค์Œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ {prompt_text}\n\n{original_content[:1000]}"}
339
  ],
340
  max_tokens=2000
341
  )
342
+ return response.choices[0].message['content']
343
  except Exception as e:
344
  return f"๊ธฐ์‚ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
345
 
346
+ # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ด๋ฏธ์ง€ ์ƒ์„ฑ (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
347
  def generate_image(prompt):
348
  try:
349
+ if not st.session_state.openai_api_key:
350
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
351
 
352
+ response = openai.Image.create(
 
 
353
  prompt=prompt,
354
+ n=1,
355
  size="1024x1024"
356
  )
357
+ return response['data'][0]['url']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  except Exception as e:
359
  return f"์ด๋ฏธ์ง€ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
360
 
 
376
  traceback.print_exc()
377
 
378
  def perform_news_task(task_type, keyword, num_articles, file_prefix):
 
379
  try:
380
  articles = crawl_naver_news(keyword, num_articles)
 
381
 
382
  # ๊ธฐ์‚ฌ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ
383
+ for article in articles:
 
384
  article['content'] = get_article_content(article['link'])
385
  time.sleep(0.5) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
386
 
 
392
  with open(filename, 'w', encoding='utf-8') as f:
393
  json.dump(articles, f, ensure_ascii=False, indent=2)
394
 
 
 
395
  global_scheduler_state.last_run = datetime.now()
396
  print(f"{datetime.now()} - {task_type} ๋‰ด์Šค ๊ธฐ์‚ฌ ์ˆ˜์ง‘ ์™„๋ฃŒ: {keyword}")
397
 
398
+ # ์ „์—ญ ์ƒํƒœ์— ์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅ (UI ์—…๋ฐ์ดํŠธ์šฉ)
399
  result_item = {
400
  'task_type': task_type,
401
  'keyword': keyword,
 
406
  global_scheduler_state.scheduled_results.append(result_item)
407
 
408
  except Exception as e:
409
+ print(f"์ž‘์—… ์‹คํ–‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
410
  traceback.print_exc()
411
 
412
  def start_scheduler(daily_tasks, interval_tasks):
 
563
  with keyword_tab1:
564
  keywords = analyze_keywords(selected_article['content'])
565
 
566
+ # ์‹œ๊ฐํ™”
567
  df = pd.DataFrame(keywords, columns=['๋‹จ์–ด', '๋นˆ๋„์ˆ˜'])
568
+ st.bar_chart(df.set_index('๋‹จ์–ด'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
 
570
  st.write("**์ฃผ์š” ํ‚ค์›Œ๋“œ:**")
571
  for word, count in keywords:
 
595
  # ํ…์ŠคํŠธ ํ†ต๊ณ„ ๊ณ„์‚ฐ
596
  word_count = len(re.findall(r'\b\w+\b', content))
597
  char_count = len(content)
598
+ sentence_count = len(re.split(r'[.!?]+', content))
 
 
 
 
 
 
 
599
  avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
600
  avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
601
 
 
615
  with col2:
616
  st.metric("ํ‰๊ท  ๋ฌธ์žฅ ๊ธธ์ด", f"{avg_sentence_length:.1f}๋‹จ์–ด")
617
 
618
+ # ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
619
  complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
620
  st.progress(complexity_score / 10)
621
  st.write(f"ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜: {complexity_score:.1f}/10")
622
+
623
+ # ์ถœํ˜„ ๋นˆ๋„ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„
624
+ st.subheader("ํ’ˆ์‚ฌ๋ณ„ ๋ถ„ํฌ (ํ•œ๊ตญ์–ด/์˜์–ด ์ง€์›)")
625
+ try:
626
+ # KoNLPy ์„ค์น˜ ํ™•์ธ
627
+ try:
628
+ from konlpy.tag import Okt
629
+ konlpy_installed = True
630
+ except ImportError:
631
+ konlpy_installed = False
632
+ st.warning("ํ•œ๊ตญ์–ด ํ˜•ํƒœ์†Œ ๋ถ„์„์„ ์œ„ํ•ด KoNLPy๋ฅผ ์„ค์น˜ํ•ด์ฃผ์„ธ์š”: pip install konlpy")
633
+
634
+ # ์˜์–ด POS tagger ์ค€๋น„
635
+ from nltk import pos_tag
636
+ try:
637
+ nltk.data.find('taggers/averaged_perceptron_tagger')
638
+ except LookupError:
639
+ nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
640
+
641
+ # ์–ธ์–ด ๊ฐ์ง€ (๊ฐ„๋‹จํ•œ ๋ฐฉ์‹)
642
+ is_korean = bool(re.search(r'[๊ฐ€-ํžฃ]', content))
643
+
644
+ if is_korean and konlpy_installed:
645
+ # ํ•œ๊ตญ์–ด ํ˜•ํƒœ์†Œ ๋ถ„์„
646
+ okt = Okt()
647
+ tagged = okt.pos(content)
648
+
649
+ # ํ•œ๊ตญ์–ด ํ’ˆ์‚ฌ ๋งคํ•‘
650
+ pos_dict = {
651
+ 'Noun': '๋ช…์‚ฌ', 'NNG': '๋ช…์‚ฌ', 'NNP': '๊ณ ์œ ๋ช…์‚ฌ',
652
+ 'Verb': '๋™์‚ฌ', 'VV': '๋™์‚ฌ', 'VA': 'ํ˜•์šฉ์‚ฌ',
653
+ 'Adjective': 'ํ˜•์šฉ์‚ฌ',
654
+ 'Adverb': '๋ถ€์‚ฌ',
655
+ 'Josa': '์กฐ์‚ฌ', 'Punctuation': '๊ตฌ๋‘์ ',
656
+ 'Determiner': '๊ด€ํ˜•์‚ฌ', 'Exclamation': '๊ฐํƒ„์‚ฌ'
657
+ }
658
+
659
+ pos_counts = {'๋ช…์‚ฌ': 0, '๋™์‚ฌ': 0, 'ํ˜•์šฉ์‚ฌ': 0, '๋ถ€์‚ฌ': 0, '์กฐ์‚ฌ': 0, '๊ตฌ๋‘์ ': 0, '๊ด€ํ˜•์‚ฌ': 0, '๊ฐํƒ„์‚ฌ': 0, '๊ธฐํƒ€': 0}
660
+
661
+ for _, pos in tagged:
662
+ if pos in pos_dict:
663
+ pos_counts[pos_dict[pos]] += 1
664
+ elif pos.startswith('N'): # ๊ธฐํƒ€ ๋ช…์‚ฌ๋ฅ˜
665
+ pos_counts['๋ช…์‚ฌ'] += 1
666
+ elif pos.startswith('V'): # ๊ธฐํƒ€ ๋™์‚ฌ๋ฅ˜
667
+ pos_counts['๋™์‚ฌ'] += 1
668
+ else:
669
+ pos_counts['๊ธฐํƒ€'] += 1
670
+
671
+ else:
672
+ # ์˜์–ด POS ํƒœ๊น…
673
+ tokens = word_tokenize(content.lower())
674
+ tagged = pos_tag(tokens)
675
+
676
+ # ์˜์–ด ํ’ˆ์‚ฌ ๋งคํ•‘
677
+ pos_dict = {
678
+ 'NN': '๋ช…์‚ฌ', 'NNS': '๋ช…์‚ฌ', 'NNP': '๊ณ ์œ ๋ช…์‚ฌ', 'NNPS': '๊ณ ์œ ๋ช…์‚ฌ',
679
+ 'VB': '๋™์‚ฌ', 'VBD': '๋™์‚ฌ', 'VBG': '๋™์‚ฌ', 'VBN': '๋™์‚ฌ', 'VBP': '๋™์‚ฌ', 'VBZ': '๋™์‚ฌ',
680
+ 'JJ': 'ํ˜•์šฉ์‚ฌ', 'JJR': 'ํ˜•์šฉ์‚ฌ', 'JJS': 'ํ˜•์šฉ์‚ฌ',
681
+ 'RB': '๋ถ€์‚ฌ', 'RBR': '๋ถ€์‚ฌ', 'RBS': '๋ถ€์‚ฌ'
682
+ }
683
+
684
+ pos_counts = {'๋ช…์‚ฌ': 0, '๋™์‚ฌ': 0, 'ํ˜•์šฉ์‚ฌ': 0, '๋ถ€์‚ฌ': 0, '๊ธฐํƒ€': 0}
685
+
686
+ for _, pos in tagged:
687
+ if pos in pos_dict:
688
+ pos_counts[pos_dict[pos]] += 1
689
+ else:
690
+ pos_counts['๊ธฐํƒ€'] += 1
691
+
692
+ # ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
693
+ pos_df = pd.DataFrame({
694
+ 'ํ’ˆ์‚ฌ': list(pos_counts.keys()),
695
+ '๋นˆ๋„': list(pos_counts.values())
696
+ })
697
+
698
+ st.bar_chart(pos_df.set_index('ํ’ˆ์‚ฌ'))
699
+
700
+ if is_korean:
701
+ st.info("ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ๊ฐ์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
702
+ else:
703
+ st.info("์˜์–ด ํ…์ŠคํŠธ๊ฐ€ ๊ฐ์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
704
+ except Exception as e:
705
+ st.error(f"ํ’ˆ์‚ฌ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
706
+ st.error(traceback.format_exc())
707
 
708
  elif analysis_type == "๊ฐ์ • ๋ถ„์„":
709
  if st.button("๊ฐ์ • ๋ถ„์„ํ•˜๊ธฐ"):
710
+ if st.session_state.openai_api_key:
711
  with st.spinner("๊ธฐ์‚ฌ์˜ ๊ฐ์ •์„ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
712
  try:
713
+ # ๊ฐ์ • ๋ถ„์„ ํ”„๋กฌํ”„ํŠธ ์„ค์ • (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
714
+ prompt = """
715
+ ๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๊ณ , ์•„๋ž˜ ์˜ˆ์‹œ์ฒ˜๋Ÿผ JSON๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
716
+ ๋ถˆํ•„์š”ํ•œ ์„ค๋ช…, ์ธ์‚ฌ๋ง, ๊ธฐํƒ€ ํ…์ŠคํŠธ ์—†์ด ๋ฐ˜๋“œ์‹œ JSON๋งŒ ์ถœ๋ ฅํ•˜์„ธ์š”.
717
+
718
+ ์˜ˆ์‹œ:
719
+ {
720
+ "sentiment": "๊ธ์ •์ ",
721
+ "reason": "์ด ๊ธฐ์‚ฌ์—์„œ๋Š” ๊ธ์ •์ ์ธ ๋‹จ์–ด์™€ ํ‘œํ˜„์ด ๋งŽ์ด ์‚ฌ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
722
+ "keywords": [
723
+ {"word": "ํฌ๋ง", "score": 8},
724
+ {"word": "์„ฑ๊ณต", "score": 7},
725
+ {"word": "๊ธฐ๋Œ€", "score": 6},
726
+ {"word": "์„ฑ์žฅ", "score": 7},
727
+ {"word": "ํ˜์‹ ", "score": 8}
728
+ ]
729
+ }
730
+
731
+ ๋ถ„์„ํ•  ๊ธฐ์‚ฌ:
732
+ ์ œ๋ชฉ: {title}
733
+ ๋‚ด์šฉ: {content}
734
+ """
735
+
736
+ # ๊ฐ์ • ๋ถ„์„
737
+ response = openai.ChatCompletion.create(
738
+ model="gpt-4.1-mini",
739
  messages=[
740
+ {"role": "system", "content": "๋‹น์‹ ์€ ํ…์ŠคํŠธ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๊ณ , '๊ธ์ •์ ', '๋ถ€์ •์ ', '์ค‘๋ฆฝ์ ' ์ค‘ ํ•˜๋‚˜๋กœ ๋ถ„๋ฅ˜ํ•ด ์ฃผ์„ธ์š”. ๋˜ํ•œ ๊ธฐ์‚ฌ์—์„œ ๋“œ๋Ÿฌ๋‚˜๋Š” ํ•ต์‹ฌ ๊ฐ์ • ํ‚ค์›Œ๋“œ๋ฅผ 5๊ฐœ ์ถ”์ถœํ•˜๊ณ , ๊ฐ ํ‚ค์›Œ๋“œ๋ณ„๋กœ 1-10 ์‚ฌ์ด์˜ ๊ฐ•๋„ ์ ์ˆ˜๋ฅผ ๋งค๊ฒจ์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์‘๋‹ตํ•ด์ฃผ์„ธ์š”: {'sentiment': '๊ธ์ •์ /๋ถ€์ •์ /์ค‘๋ฆฝ์ ', 'reason': '์ด์œ  ์„ค๋ช…...', 'keywords': [{'word': 'ํ‚ค์›Œ๋“œ1', 'score': 8}, {'word': 'ํ‚ค์›Œ๋“œ2', 'score': 7}, ...]}"},
741
+ {"role": "user", "content": prompt.format(title=selected_article['title'], content=selected_article['content'][:1500])}
 
 
 
 
 
 
 
 
 
742
  ],
743
+ max_tokens=800
 
744
  )
745
 
746
+ # JSON ํŒŒ์‹ฑ (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
747
+ analysis_result = json.loads(response.choices[0].message['content'])
 
 
 
 
 
 
 
 
 
 
748
 
749
  # ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
750
  st.subheader("๊ฐ์ • ๋ถ„์„ ๊ฒฐ๊ณผ")
 
893
 
894
  except Exception as e:
895
  st.error(f"๊ฐ์ • ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
896
+ st.code(traceback.format_exc())
897
  else:
898
+ st.warning("OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์‚ฌ์ด๋“œ๋ฐ”์—์„œ API ํ‚ค๋ฅผ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
899
 
900
  elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
901
  st.header("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ")
 
930
  generate_image_too = st.checkbox("๊ธฐ์‚ฌ ์ƒ์„ฑ ํ›„ ์ด๋ฏธ์ง€๋„ ํ•จ๊ป˜ ์ƒ์„ฑํ•˜๊ธฐ", value=True)
931
 
932
  if st.button("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ"):
933
+ if st.session_state.openai_api_key:
934
  with st.spinner("๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
935
  new_article = generate_article(selected_article['content'], prompt_text)
936
 
 
940
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑํ•˜๊ธฐ (์˜ต์…˜์ด ์„ ํƒ๋œ ๊ฒฝ์šฐ)
941
  if generate_image_too:
942
  with st.spinner("๊ธฐ์‚ฌ ๊ด€๋ จ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
943
+ # ์ด๋ฏธ์ง€ ์ƒ์„ฑ ํ”„๋กฌํ”„ํŠธ ์ค€๋น„
944
  image_prompt = f"""์‹ ๋ฌธ๊ธฐ์‚ฌ ์ œ๋ชฉ "{selected_article['title']}" ์„ ๋ณด๊ณ  ์ด๋ฏธ์ง€๋ฅผ ๋งŒ๋“ค์–ด์ค˜
945
  ์ด๋ฏธ์ง€์—๋Š” ๋‹ค์Œ ์š”์†Œ๊ฐ€ ํฌํ•จ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
946
  - ๊ธฐ์‚ฌ๋ฅผ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ๋„์‹
 
949
  """
950
 
951
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑ
952
+ image_url = generate_image(image_prompt)
953
 
954
+ if image_url and not image_url.startswith("์ด๋ฏธ์ง€ ์ƒ์„ฑ ์˜ค๋ฅ˜"):
955
  st.subheader("์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€:")
956
+ st.image(image_url)
957
  else:
958
+ st.error(image_url)
959
 
960
  # ์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ ์˜ต์…˜
961
  if st.button("์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ"):
 
973
  else:
974
  st.warning("OpenAI API ํ‚ค๋ฅผ ์‚ฌ์ด๋“œ๋ฐ”์—์„œ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
975
 
976
+
977
+
978
  elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
979
  st.header("๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ")
980
 
 
1061
  with tab3:
1062
  st.subheader("์Šค์ผ€์ค„๋Ÿฌ ์ œ์–ด ๋ฐ ์ƒํƒœ")
1063
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
  col1, col2 = st.columns(2)
1065
 
1066
  with col1:
 
1144
 
1145
  # ํ‘ธํ„ฐ
1146
  st.markdown("---")
1147
+ st.markdown("ยฉ ๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ @conanssam")
1148
+
1149
+ def extract_json_from_response(response_text):
1150
+ # JSON ๊ฐ์ฒด ๋ถ€๋ถ„๋งŒ ์ถ”์ถœ (๊ฐ€์žฅ ๋จผ์ € ๋‚˜์˜ค๋Š” ์ค‘๊ด„ํ˜ธ ์Œ)
1151
+ match = re.search(r'\{.*\}', response_text, re.DOTALL)
1152
+ if match:
1153
+ json_str = match.group(0)
1154
+ try:
1155
+ return json.loads(json_str)
1156
+ except Exception as e:
1157
+ st.error(f"JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {str(e)}")
1158
+ st.code(json_str)
1159
+ return None
1160
+ else:
1161
+ st.error("์‘๋‹ต์—์„œ JSON์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
1162
+ st.code(response_text)
1163
+ return None