JUNGU commited on
Commit
551eae9
Β·
verified Β·
1 Parent(s): f542ec8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +272 -1046
src/streamlit_app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import requests
@@ -9,1116 +11,340 @@ from nltk.tokenize import word_tokenize
9
  from nltk.corpus import stopwords
10
  from collections import Counter
11
  import json
12
- import os
13
  from datetime import datetime, timedelta
14
- import openai # ꡬ 버전 방식 μ‚¬μš©
15
- from dotenv import load_dotenv
16
- import traceback
17
- import plotly.graph_objects as go
18
  import schedule
19
  import threading
20
  import matplotlib.pyplot as plt
 
21
 
22
- # μ›Œλ“œν΄λΌμš°λ“œ μΆ”κ°€
23
- try:
24
- from wordcloud import WordCloud
25
- except ImportError:
26
- st.error("wordcloud νŒ¨ν‚€μ§€λ₯Ό μ„€μΉ˜ν•΄μ£Όμ„Έμš”: pip install wordcloud")
27
- WordCloud = None
28
 
29
- # μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 클래슀 μΆ”κ°€
30
- class SchedulerState:
31
- def __init__(self):
32
- self.is_running = False
33
- self.thread = None
34
- self.last_run = None
35
- self.next_run = None
36
- self.scheduled_jobs = []
37
- self.scheduled_results = []
38
 
39
- # μ „μ—­ μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 객체 생성 (μŠ€λ ˆλ“œ μ•ˆμ—μ„œ μ‚¬μš©)
40
- global_scheduler_state = SchedulerState()
41
-
42
- # API ν‚€ 관리λ₯Ό μœ„ν•œ μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™”
43
- if 'openai_api_key' not in st.session_state:
44
- st.session_state.openai_api_key = None
45
-
46
- # μ—¬λŸ¬ λ°©λ²•μœΌλ‘œ API ν‚€ λ‘œλ“œ μ‹œλ„
47
- load_dotenv() # .env νŒŒμΌμ—μ„œ λ‘œλ“œ μ‹œλ„
48
-
49
- # 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ API ν‚€ 확인
50
- if os.environ.get('OPENAI_API_KEY'):
51
- st.session_state.openai_api_key = os.environ.get('OPENAI_API_KEY')
52
- openai.api_key = st.session_state.openai_api_key
53
-
54
- # 2. Streamlit secretsμ—μ„œ API ν‚€ 확인 (try-except둜 였λ₯˜ λ°©μ§€)
55
- if not st.session_state.openai_api_key:
56
  try:
57
- if 'OPENAI_API_KEY' in st.secrets:
58
- st.session_state.openai_api_key = st.secrets['OPENAI_API_KEY']
59
- openai.api_key = st.session_state.openai_api_key
60
- except Exception as e:
61
- pass # secrets 파일이 없어도 였λ₯˜ λ°œμƒν•˜μ§€ μ•ŠμŒ
62
-
63
- # μž„μ‹œ 디렉토리λ₯Ό μ‚¬μš©ν•˜λ„λ‘ NLTK 데이터 경둜 μ„€μ •
64
- nltk_data_dir = '/tmp/nltk_data'
65
- os.makedirs(nltk_data_dir, exist_ok=True)
66
- nltk.data.path.insert(0, nltk_data_dir) # 이 경둜λ₯Ό μš°μ„  κ²€μƒ‰ν•˜λ„λ‘ μ„€μ •
67
-
68
- # ν•„μš”ν•œ NLTK 데이터 λ‹€μš΄λ‘œλ“œ
69
- try:
70
- nltk.data.find('tokenizers/punkt')
71
- except LookupError:
72
- nltk.download('punkt', download_dir=nltk_data_dir)
73
-
74
- try:
75
- nltk.data.find('corpora/stopwords')
76
- except LookupError:
77
- nltk.download('stopwords', download_dir=nltk_data_dir)
 
78
 
79
- # νŽ˜μ΄μ§€ μ„€μ •
80
- st.set_page_config(page_title="λ‰΄μŠ€ 기사 도ꡬ", page_icon="πŸ“°", layout="wide")
81
-
82
- # μ‚¬μ΄λ“œλ°”μ— API ν‚€ μž…λ ₯ ν•„λ“œ μΆ”κ°€
83
  with st.sidebar:
84
  st.title("λ‰΄μŠ€ 기사 도ꡬ")
85
- menu = st.radio(
86
- "메뉴 선택",
87
- ["λ‰΄μŠ€ 기사 크둀링", "기사 λΆ„μ„ν•˜κΈ°", "μƒˆ 기사 μƒμ„±ν•˜κΈ°", "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°"]
88
- )
89
-
90
- st.divider()
91
- api_key = st.text_input("OpenAI API ν‚€ μž…λ ₯", type="password")
92
- if api_key:
93
- st.session_state.openai_api_key = api_key
94
- openai.api_key = api_key
95
- st.success("API ν‚€κ°€ μ„€μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
96
-
97
- # μ €μž₯된 기사λ₯Ό λΆˆλŸ¬μ˜€λŠ” ν•¨μˆ˜
98
  def load_saved_articles():
99
- if os.path.exists('/tmp/saved_articles/articles.json'):
100
- with open('/tmp/saved_articles/articles.json', 'r', encoding='utf-8') as f:
 
101
  return json.load(f)
102
  return []
103
 
104
- # 기사λ₯Ό μ €μž₯ν•˜λŠ” ν•¨μˆ˜
105
  def save_articles(articles):
106
- os.makedirs('/tmp/saved_articles', exist_ok=True)
107
- with open('/tmp/saved_articles/articles.json', 'w', encoding='utf-8') as f:
108
  json.dump(articles, f, ensure_ascii=False, indent=2)
109
 
 
110
  @st.cache_data
111
  def crawl_naver_news(keyword, num_articles=5):
112
- """
113
- 넀이버 λ‰΄μŠ€ 기사λ₯Ό μˆ˜μ§‘ν•˜λŠ” ν•¨μˆ˜
114
- """
115
  url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
116
  results = []
117
-
118
  try:
119
- # νŽ˜μ΄μ§€ μš”μ²­
120
- response = requests.get(url)
121
- soup = BeautifulSoup(response.text, 'html.parser')
122
-
123
- # λ‰΄μŠ€ μ•„μ΄ν…œ μ°ΎκΈ°
124
- news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
125
-
126
- # 각 λ‰΄μŠ€ μ•„μ΄ν…œμ—μ„œ 정보 μΆ”μΆœ
127
- for i, item in enumerate(news_items):
128
- if i >= num_articles:
129
- break
130
-
131
- try:
132
- # 제λͺ©κ³Ό 링크 μΆ”μΆœ
133
- title_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww span')
134
- if not title_element:
135
- continue
136
-
137
- title = title_element.text.strip()
138
- link_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww')
139
- link = link_element['href'] if link_element else ""
140
-
141
- # 언둠사 μΆ”μΆœ
142
- press_element = item.select_one('div.sds-comps-profile-info-title span.sds-comps-text-type-body2')
143
- source = press_element.text.strip() if press_element else "μ•Œ 수 μ—†μŒ"
144
-
145
- # λ‚ μ§œ μΆ”μΆœ
146
- date_element = item.select_one('span.r0VOr')
147
- date = date_element.text.strip() if date_element else "μ•Œ 수 μ—†μŒ"
148
-
149
- # 미리보기 λ‚΄μš© μΆ”μΆœ
150
- desc_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww.IaKmSOGPdofdPwPE6cyU > span')
151
- description = desc_element.text.strip() if desc_element else "λ‚΄μš© μ—†μŒ"
152
-
153
- results.append({
154
- 'title': title,
155
- 'link': link,
156
- 'description': description,
157
- 'source': source,
158
- 'date': date,
159
- 'content': "" # λ‚˜μ€‘μ— 원문 λ‚΄μš©μ„ μ €μž₯ν•  ν•„λ“œ
160
- })
161
-
162
- except Exception as e:
163
- st.error(f"기사 정보 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
164
- continue
165
-
166
  except Exception as e:
167
- st.error(f"νŽ˜μ΄μ§€ μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
168
-
169
  return results
170
 
171
- # 기사 원문 κ°€μ Έμ˜€κΈ°
172
  def get_article_content(url):
173
  try:
174
- response = requests.get(url, timeout=5)
175
- soup = BeautifulSoup(response.text, 'html.parser')
176
-
177
- # 넀이버 λ‰΄μŠ€ λ³Έλ¬Έ μ°ΎκΈ°
178
- content = soup.select_one('#dic_area')
179
- if content:
180
- text = content.text.strip()
181
- text = re.sub(r'\s+', ' ', text) # μ—¬λŸ¬ 곡백 제거
182
- return text
183
-
184
- # λ‹€λ₯Έ λ‰΄μŠ€ μ‚¬μ΄νŠΈ λ³Έλ¬Έ μ°ΎκΈ° (μ—¬λŸ¬ μ‚¬μ΄νŠΈ λŒ€μ‘ ν•„μš”)
185
- content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
186
- if content:
187
- text = content.text.strip()
188
- text = re.sub(r'\s+', ' ', text)
189
  return text
190
-
191
- return "본문을 κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
192
- except Exception as e:
193
- return f"였λ₯˜ λ°œμƒ: {str(e)}"
194
 
195
- # NLTKλ₯Ό μ΄μš©ν•œ ν‚€μ›Œλ“œ 뢄석
196
  def analyze_keywords(text, top_n=10):
197
- # ν•œκ΅­μ–΄ λΆˆμš©μ–΄ λͺ©λ‘ (직접 μ •μ˜ν•΄μ•Ό ν•©λ‹ˆλ‹€)
198
- korean_stopwords = ['이', 'κ·Έ', 'μ €', '것', '및', 'λ“±', 'λ₯Ό', '을', '에', 'μ—μ„œ', '의', '으둜', '둜']
199
-
200
- tokens = word_tokenize(text)
201
- tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
202
-
203
- word_count = Counter(tokens)
204
- top_keywords = word_count.most_common(top_n)
205
-
206
- return top_keywords
207
-
208
- #μ›Œλ“œ ν΄λΌμš°λ“œμš© 뢄석
209
- def extract_keywords_for_wordcloud(text, top_n=50):
210
- if not text or len(text.strip()) < 10:
211
- return {}
212
-
213
  try:
214
- try:
215
- tokens = word_tokenize(text.lower())
216
- except Exception as e:
217
- st.warning(f"{str(e)} 였λ₯˜λ°œμƒ")
218
- tokens = text.lower().split()
219
-
220
- stop_words = set()
221
- try:
222
- stop_words = set(stopwords.words('english'))
223
- except Exception:
224
- pass
225
-
226
- korea_stop_words = {
227
- '및', 'λ“±', 'λ₯Ό', '이', '의', 'κ°€', '에', 'λŠ”', '으둜', 'μ—μ„œ', 'κ·Έ', '또', 'λ˜λŠ”', 'ν•˜λŠ”', 'ν• ', 'ν•˜κ³ ',
228
- 'μžˆλ‹€', '이닀', 'μœ„ν•΄', '것이닀', '것은', 'λŒ€ν•œ', 'λ•Œλ¬Έ', '그리고', 'ν•˜μ§€λ§Œ', 'κ·ΈλŸ¬λ‚˜', 'κ·Έλž˜μ„œ',
229
- 'μž…λ‹ˆλ‹€', 'ν•©λ‹ˆλ‹€', 'μŠ΅λ‹ˆλ‹€', 'μš”', 'μ£ ', 'κ³ ', 'κ³Ό', '와', '도', '은', '수', '것', 'λ“€', '제', 'μ €',
230
- 'λ…„', 'μ›”', '일', 'μ‹œ', 'λΆ„', '초', 'μ§€λ‚œ', 'μ˜¬ν•΄', 'λ‚΄λ…„', '졜근', 'ν˜„μž¬', '였늘', '내일', 'μ–΄μ œ',
231
- 'μ˜€μ „', 'μ˜€ν›„', 'λΆ€ν„°', 'κΉŒμ§€', 'μ—κ²Œ', 'κ»˜μ„œ', '이라고', '라고', 'ν•˜λ©°', 'ν•˜λ©΄μ„œ', '따라', '톡해',
232
- 'κ΄€λ ¨', 'ν•œνŽΈ', '특히', 'κ°€μž₯', '맀우', '더', '덜', '많이', '쑰금', '항상', '자주', '가끔', '거의',
233
- 'μ „ν˜€', 'λ°”λ‘œ', '정말', 'λ§Œμ•½', 'λΉ„λ‘―ν•œ', '등을', '등이', 'λ“±μ˜', 'λ“±κ³Ό', '등도', '등에', 'λ“±μ—μ„œ',
234
- '기자', 'λ‰΄μŠ€', '사진', 'μ—°ν•©λ‰΄μŠ€', 'λ‰΄μ‹œμŠ€', '제곡', '무단', 'μ „μž¬', '재배포', 'κΈˆμ§€', '액컀', '멘트',
235
- '일보', '데일리', '경제', 'μ‚¬νšŒ', 'μ •μΉ˜', '세계', 'κ³Όν•™', '아이티', 'λ‹·μ»΄', '씨넷', 'λΈ”λ‘œν„°', 'μ „μžμ‹ λ¬Έ'
236
- }
237
- stop_words.update(korea_stop_words)
238
-
239
- # 1κΈ€μž 이상이고 λΆˆμš©μ–΄κ°€ μ•„λ‹Œ ν† ν°λ§Œ 필터링
240
- filtered_tokens = [word for word in tokens if len(word) > 1 and word not in stop_words]
241
-
242
- # 단어 λΉˆλ„ 계산
243
- word_freq = {}
244
- for word in filtered_tokens:
245
- if word.isalnum(): # μ•ŒνŒŒλ²³κ³Ό 숫자만 ν¬ν•¨λœ λ‹¨μ–΄λ§Œ ν—ˆμš©
246
- word_freq[word] = word_freq.get(word, 0) + 1
247
-
248
- # λΉˆλ„μˆœμœΌλ‘œ μ •λ ¬ν•˜μ—¬ μƒμœ„ n개 λ°˜ν™˜
249
- sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
250
-
251
- if not sorted_words:
252
- return {"data": 1, "analysis": 1, "news": 1}
253
-
254
- return dict(sorted_words[:top_n])
255
-
256
  except Exception as e:
257
- st.error(f"였λ₯˜λ°œμƒ {str(e)}")
258
- return {"data": 1, "analysis": 1, "news": 1}
259
-
260
-
261
- # μ›Œλ“œ ν΄λΌμš°λ“œ 생성 ν•¨μˆ˜
262
- def generate_wordcloud(keywords_dict):
263
- if not WordCloud:
264
- st.warning("μ›Œλ“œν΄λΌμš°λ“œ μ„€μΉ˜μ•ˆλ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.")
265
- return None
266
- try:
267
- wc= WordCloud(
268
- width=800,
269
- height=400,
270
- background_color = 'white',
271
- colormap = 'viridis',
272
- max_font_size=150,
273
- random_state=42
274
- ).generate_from_frequencies(keywords_dict)
275
-
276
- try:
277
- possible_font_paths=["NanumGothic.ttf", "이름"]
278
-
279
- font_path = None
280
- for path in possible_font_paths:
281
- if os.path.exists(path):
282
- font_path = path
283
- break
284
-
285
- if font_path:
286
- wc= WordCloud(
287
- font_path=font_path,
288
- width=800,
289
- height=400,
290
- background_color = 'white',
291
- colormap = 'viridis',
292
- max_font_size=150,
293
- random_state=42
294
- ).generate_from_frequencies(keywords_dict)
295
- except Exception as e:
296
- print(f"였λ₯˜λ°œμƒ {str(e)}")
297
-
298
- return wc
299
-
300
- except Exception as e:
301
- st.error(f"였λ₯˜λ°œμƒ {str(e)}")
302
- return None
303
 
304
- # λ‰΄μŠ€ 뢄석 ν•¨μˆ˜
305
- def analyze_news_content(news_df):
306
- if news_df.empty:
307
- return "데이터가 μ—†μŠ΅λ‹ˆλ‹€"
308
-
309
- results = {}
310
- #μΉ΄ν…Œκ³ λ¦¬λ³„
311
- if 'source' in news_df.columns:
312
- results['source_counts'] = news_df['source'].value_counts().to_dict()
313
- #μΉ΄ν…Œκ³ λ¦¬λ³„
314
- if 'date' in news_df.columns:
315
- results['date_counts'] = news_df['date'].value_counts().to_dict()
316
-
317
- #ν‚€μ›Œλ“œλΆ„μ„
318
- all_text = " ".join(news_df['title'].fillna('') + " " + news_df['content'].fillna(''))
319
-
320
- if len(all_text.strip()) > 0:
321
- results['top_keywords_for_wordcloud']= extract_keywords_for_wordcloud(all_text, top_n=50)
322
- results['top_keywords'] = analyze_keywords(all_text)
323
- else:
324
- results['top_keywords_for_wordcloud']={}
325
- results['top_keywords'] = []
326
- return results
327
-
328
- # OpenAI APIλ₯Ό μ΄μš©ν•œ μƒˆ 기사 생성 (ꡬ 버전 방식)
329
- def generate_article(original_content, prompt_text):
330
  try:
331
- if not st.session_state.openai_api_key:
332
- return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
333
-
334
- response = openai.ChatCompletion.create(
335
- model="gpt-4.1-mini",
336
  messages=[
337
- {"role": "system", "content": "당신은 전문적인 λ‰΄μŠ€ κΈ°μžμž…λ‹ˆλ‹€. μ£Όμ–΄μ§„ λ‚΄μš©μ„ λ°”νƒ•μœΌλ‘œ μƒˆλ‘œμš΄ 기사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”."},
338
- {"role": "user", "content": f"λ‹€μŒ λ‚΄μš©μ„ λ°”νƒ•μœΌλ‘œ {prompt_text}\n\n{original_content[:1000]}"}
339
  ],
340
- max_tokens=2000
341
  )
342
- return response.choices[0].message['content']
343
  except Exception as e:
344
- return f"기사 생성 였λ₯˜: {str(e)}"
345
 
346
- # OpenAI APIλ₯Ό μ΄μš©ν•œ 이미지 생성 (ꡬ 버전 방식)
347
  def generate_image(prompt):
 
 
348
  try:
349
- if not st.session_state.openai_api_key:
350
- return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
351
-
352
- response = openai.Image.create(
353
- prompt=prompt,
354
- n=1,
355
- size="1024x1024"
356
- )
357
- return response['data'][0]['url']
358
- except Exception as e:
359
- return f"이미지 생성 였λ₯˜: {str(e)}"
360
-
361
- # μŠ€μΌ€μ€„λŸ¬ κ΄€λ ¨ ν•¨μˆ˜λ“€
362
- def get_next_run_time(hour, minute):
363
- now = datetime.now()
364
- next_run = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
365
- if next_run <= now:
366
- next_run += timedelta(days=1)
367
- return next_run
368
-
369
- def run_scheduled_task():
370
- try:
371
- while global_scheduler_state.is_running:
372
- schedule.run_pending()
373
- time.sleep(1)
374
  except Exception as e:
375
- print(f"μŠ€μΌ€μ€„λŸ¬ μ—λŸ¬ λ°œμƒ: {e}")
376
- traceback.print_exc()
377
 
378
- def perform_news_task(task_type, keyword, num_articles, file_prefix):
379
- try:
380
- articles = crawl_naver_news(keyword, num_articles)
381
-
382
- # 기사 λ‚΄μš© κ°€μ Έμ˜€κΈ°
383
- for article in articles:
384
- article['content'] = get_article_content(article['link'])
385
- time.sleep(0.5) # μ„œλ²„ λΆ€ν•˜ λ°©μ§€
386
-
387
- # κ²°κ³Ό μ €μž₯
388
- os.makedirs('/tmp/scheduled_news', exist_ok=True)
389
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
390
- filename = f"/tmp/scheduled_news/{file_prefix}_{task_type}_{timestamp}.json"
391
-
392
- with open(filename, 'w', encoding='utf-8') as f:
393
- json.dump(articles, f, ensure_ascii=False, indent=2)
394
-
395
- global_scheduler_state.last_run = datetime.now()
396
- print(f"{datetime.now()} - {task_type} λ‰΄μŠ€ 기사 μˆ˜μ§‘ μ™„λ£Œ: {keyword}")
397
-
398
- # μ „μ—­ μƒνƒœμ— μˆ˜μ§‘ κ²°κ³Όλ₯Ό μ €μž₯ (UI μ—…λ°μ΄νŠΈμš©)
399
- result_item = {
400
- 'task_type': task_type,
401
- 'keyword': keyword,
402
- 'timestamp': timestamp,
403
- 'num_articles': len(articles),
404
- 'filename': filename
405
- }
406
- global_scheduler_state.scheduled_results.append(result_item)
407
-
408
- except Exception as e:
409
- print(f"μž‘μ—… μ‹€ν–‰ 쀑 였λ₯˜ λ°œμƒ: {e}")
410
- traceback.print_exc()
411
-
412
- def start_scheduler(daily_tasks, interval_tasks):
413
- if not global_scheduler_state.is_running:
414
- schedule.clear()
415
- global_scheduler_state.scheduled_jobs = []
416
-
417
- # 일별 νƒœμŠ€ν¬ 등둝
418
- for task in daily_tasks:
419
- hour = task['hour']
420
- minute = task['minute']
421
- keyword = task['keyword']
422
- num_articles = task['num_articles']
423
-
424
- job_id = f"daily_{keyword}_{hour}_{minute}"
425
- schedule.every().day.at(f"{hour:02d}:{minute:02d}").do(
426
- perform_news_task, "daily", keyword, num_articles, job_id
427
- ).tag(job_id)
428
-
429
- global_scheduler_state.scheduled_jobs.append({
430
- 'id': job_id,
431
- 'type': 'daily',
432
- 'time': f"{hour:02d}:{minute:02d}",
433
- 'keyword': keyword,
434
- 'num_articles': num_articles
435
- })
436
-
437
- # μ‹œκ°„ 간격 νƒœμŠ€ν¬ 등둝
438
- for task in interval_tasks:
439
- interval_minutes = task['interval_minutes']
440
- keyword = task['keyword']
441
- num_articles = task['num_articles']
442
- run_immediately = task['run_immediately']
443
-
444
- job_id = f"interval_{keyword}_{interval_minutes}"
445
-
446
- if run_immediately:
447
- # μ¦‰μ‹œ μ‹€ν–‰
448
- perform_news_task("interval", keyword, num_articles, job_id)
449
-
450
- # λΆ„ κ°„κ²©μœΌλ‘œ μ˜ˆμ•½
451
- schedule.every(interval_minutes).minutes.do(
452
- perform_news_task, "interval", keyword, num_articles, job_id
453
- ).tag(job_id)
454
-
455
- global_scheduler_state.scheduled_jobs.append({
456
- 'id': job_id,
457
- 'type': 'interval',
458
- 'interval': f"{interval_minutes}λΆ„λ§ˆλ‹€",
459
- 'keyword': keyword,
460
- 'num_articles': num_articles,
461
- 'run_immediately': run_immediately
462
- })
463
-
464
- # λ‹€μŒ μ‹€ν–‰ μ‹œκ°„ 계산
465
- next_run = schedule.next_run()
466
- if next_run:
467
- global_scheduler_state.next_run = next_run
468
-
469
- # μŠ€μΌ€μ€„λŸ¬ μ“°λ ˆλ“œ μ‹œμž‘
470
- global_scheduler_state.is_running = True
471
- global_scheduler_state.thread = threading.Thread(
472
- target=run_scheduled_task, daemon=True
473
- )
474
- global_scheduler_state.thread.start()
475
-
476
- # μƒνƒœλ₯Ό μ„Έμ…˜ μƒνƒœλ‘œλ„ 볡사 (UI ν‘œμ‹œμš©)
477
- if 'scheduler_status' not in st.session_state:
478
- st.session_state.scheduler_status = {}
479
-
480
- st.session_state.scheduler_status = {
481
- 'is_running': global_scheduler_state.is_running,
482
- 'last_run': global_scheduler_state.last_run,
483
- 'next_run': global_scheduler_state.next_run,
484
- 'jobs_count': len(global_scheduler_state.scheduled_jobs)
485
- }
486
 
487
  def stop_scheduler():
488
- if global_scheduler_state.is_running:
489
- global_scheduler_state.is_running = False
490
- schedule.clear()
491
- if global_scheduler_state.thread:
492
- global_scheduler_state.thread.join(timeout=1)
493
- global_scheduler_state.next_run = None
494
- global_scheduler_state.scheduled_jobs = []
495
-
496
- # UI μƒνƒœ μ—…λ°μ΄νŠΈ
497
- if 'scheduler_status' in st.session_state:
498
- st.session_state.scheduler_status['is_running'] = False
499
 
500
- # 메뉴에 λ”°λ₯Έ ν™”λ©΄ ν‘œμ‹œ
501
  if menu == "λ‰΄μŠ€ 기사 크둀링":
502
  st.header("λ‰΄μŠ€ 기사 크둀링")
503
-
504
- keyword = st.text_input("검색어 μž…λ ₯", "인곡지λŠ₯")
505
- num_articles = st.slider("κ°€μ Έμ˜¬ 기사 수", min_value=1, max_value=20, value=5)
506
-
507
  if st.button("기사 κ°€μ Έμ˜€κΈ°"):
508
- with st.spinner("기사λ₯Ό μˆ˜μ§‘ μ€‘μž…λ‹ˆλ‹€..."):
509
- articles = crawl_naver_news(keyword, num_articles)
510
-
511
- # 기사 λ‚΄μš© κ°€μ Έμ˜€κΈ°
512
- for i, article in enumerate(articles):
513
- st.progress((i + 1) / len(articles))
514
- article['content'] = get_article_content(article['link'])
515
- time.sleep(0.5) # μ„œλ²„ λΆ€ν•˜ λ°©μ§€
516
-
517
- # κ²°κ³Ό μ €μž₯ 및 ν‘œμ‹œ
518
- save_articles(articles)
519
- st.success(f"{len(articles)}개의 기사λ₯Ό μˆ˜μ§‘ν–ˆμŠ΅λ‹ˆλ‹€!")
520
-
521
- # μˆ˜μ§‘ν•œ 기사 ν‘œμ‹œ
522
- for article in articles:
523
- with st.expander(f"{article['title']} - {article['source']}"):
524
- st.write(f"**좜처:** {article['source']}")
525
- st.write(f"**λ‚ μ§œ:** {article['date']}")
526
- st.write(f"**μš”μ•½:** {article['description']}")
527
- st.write(f"**링크:** {article['link']}")
528
- st.write("**본문 미리보기:**")
529
- st.write(article['content'][:300] + "..." if len(article['content']) > 300 else article['content'])
530
 
531
  elif menu == "기사 뢄석���기":
532
  st.header("기사 λΆ„μ„ν•˜κΈ°")
533
-
534
- articles = load_saved_articles()
535
- if not articles:
536
- st.warning("μ €μž₯된 기사가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 'λ‰΄μŠ€ 기사 크둀링' λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
537
  else:
538
- # 기사 선택
539
- titles = [article['title'] for article in articles]
540
- selected_title = st.selectbox("뢄석할 기사 선택", titles)
541
-
542
- selected_article = next((a for a in articles if a['title'] == selected_title), None)
543
-
544
- if selected_article:
545
- st.write(f"**제λͺ©:** {selected_article['title']}")
546
- st.write(f"**좜처:** {selected_article['source']}")
547
-
548
- # λ³Έλ¬Έ ν‘œμ‹œ
549
- with st.expander("기사 λ³Έλ¬Έ 보기"):
550
- st.write(selected_article['content'])
551
-
552
- # 뢄석 방법 선택
553
- analysis_type = st.radio(
554
- "뢄석 방법",
555
- ["ν‚€μ›Œλ“œ 뢄석", "감정 뢄석", "ν…μŠ€νŠΈ 톡계"]
556
- )
557
-
558
- if analysis_type == "ν‚€μ›Œλ“œ 뢄석":
559
- if st.button("ν‚€μ›Œλ“œ λΆ„μ„ν•˜κΈ°"):
560
- with st.spinner("ν‚€μ›Œλ“œλ₯Ό 뢄석 μ€‘μž…λ‹ˆλ‹€..."):
561
- keyword_tab1, keyword_tab2 = st.tabs(["ν‚€μ›Œλ“œ λΉˆλ„", "μ›Œλ“œν΄λΌμš°λ“œ"])
562
-
563
- with keyword_tab1:
564
- keywords = analyze_keywords(selected_article['content'])
565
-
566
- # μ‹œκ°ν™”
567
- df = pd.DataFrame(keywords, columns=['단어', 'λΉˆλ„μˆ˜'])
568
- st.bar_chart(df.set_index('단어'))
569
-
570
- st.write("**μ£Όμš” ν‚€μ›Œλ“œ:**")
571
- for word, count in keywords:
572
- st.write(f"- {word}: {count}회")
573
- with keyword_tab2:
574
- keyword_dict = extract_keywords_for_wordcloud(selected_article['content'])
575
- wc = generate_wordcloud(keyword_dict)
576
-
577
- if wc:
578
- fig, ax = plt.subplots(figsize=(10, 5))
579
- ax.imshow(wc, interpolation='bilinear')
580
- ax.axis('off')
581
- st.pyplot(fig)
582
-
583
- # ν‚€μ›Œλ“œ μƒμœ„ 20개 ν‘œμ‹œ
584
- st.write("**μƒμœ„ 20개 ν‚€μ›Œλ“œ:**")
585
- top_keywords = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True)[:20]
586
- keyword_df = pd.DataFrame(top_keywords, columns=['ν‚€μ›Œλ“œ', 'λΉˆλ„'])
587
- st.dataframe(keyword_df)
588
- else:
589
- st.error("μ›Œλ“œν΄λΌμš°λ“œλ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
590
-
591
- elif analysis_type == "ν…μŠ€νŠΈ 톡계":
592
- if st.button("ν…μŠ€νŠΈ 톡계 뢄석"):
593
- content = selected_article['content']
594
-
595
- # ν…μŠ€νŠΈ 톡계 계산
596
- word_count = len(re.findall(r'\b\w+\b', content))
597
- char_count = len(content)
598
- sentence_count = len(re.split(r'[.!?]+', content))
599
- avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
600
- avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
601
-
602
- # 톡계 ν‘œμ‹œ
603
- st.subheader("ν…μŠ€νŠΈ 톡계")
604
- col1, col2, col3 = st.columns(3)
605
- with col1:
606
- st.metric("단어 수", f"{word_count:,}")
607
- with col2:
608
- st.metric("문자 수", f"{char_count:,}")
609
- with col3:
610
- st.metric("λ¬Έμž₯ 수", f"{sentence_count:,}")
611
-
612
- col1, col2 = st.columns(2)
613
- with col1:
614
- st.metric("평균 단어 길이", f"{avg_word_length:.1f}자")
615
- with col2:
616
- st.metric("평균 λ¬Έμž₯ 길이", f"{avg_sentence_length:.1f}단어")
617
-
618
- # ν…μŠ€νŠΈ λ³΅μž‘μ„± 점수 (κ°„λ‹¨ν•œ μ˜ˆμ‹œ)
619
- complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
620
- st.progress(complexity_score / 10)
621
- st.write(f"ν…μŠ€νŠΈ λ³΅μž‘μ„± 점수: {complexity_score:.1f}/10")
622
-
623
- # μΆœν˜„ λΉˆλ„ λ§‰λŒ€ κ·Έλž˜ν”„
624
- st.subheader("���사별 뢄포 (ν•œκ΅­μ–΄/μ˜μ–΄ 지원)")
625
- try:
626
- # KoNLPy μ„€μΉ˜ 확인
627
- try:
628
- from konlpy.tag import Okt
629
- konlpy_installed = True
630
- except ImportError:
631
- konlpy_installed = False
632
- st.warning("ν•œκ΅­μ–΄ ν˜•νƒœμ†Œ 뢄석을 μœ„ν•΄ KoNLPyλ₯Ό μ„€μΉ˜ν•΄μ£Όμ„Έμš”: pip install konlpy")
633
-
634
- # μ˜μ–΄ POS tagger μ€€λΉ„
635
- from nltk import pos_tag
636
- try:
637
- nltk.data.find('taggers/averaged_perceptron_tagger')
638
- except LookupError:
639
- nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
640
-
641
- # μ–Έμ–΄ 감지 (κ°„λ‹¨ν•œ 방식)
642
- is_korean = bool(re.search(r'[κ°€-힣]', content))
643
-
644
- if is_korean and konlpy_installed:
645
- # ν•œκ΅­μ–΄ ν˜•νƒœμ†Œ 뢄석
646
- okt = Okt()
647
- tagged = okt.pos(content)
648
-
649
- # ν•œκ΅­μ–΄ ν’ˆμ‚¬ λ§€ν•‘
650
- pos_dict = {
651
- 'Noun': 'λͺ…사', 'NNG': 'λͺ…사', 'NNP': '고유λͺ…사',
652
- 'Verb': '동사', 'VV': '동사', 'VA': 'ν˜•μš©μ‚¬',
653
- 'Adjective': 'ν˜•μš©μ‚¬',
654
- 'Adverb': '뢀사',
655
- 'Josa': '쑰사', 'Punctuation': 'ꡬ두점',
656
- 'Determiner': 'κ΄€ν˜•μ‚¬', 'Exclamation': '감탄사'
657
- }
658
-
659
- pos_counts = {'λͺ…사': 0, '동사': 0, 'ν˜•μš©μ‚¬': 0, '뢀사': 0, '쑰사': 0, 'ꡬ두점': 0, 'κ΄€ν˜•μ‚¬': 0, '감탄사': 0, '기타': 0}
660
-
661
- for _, pos in tagged:
662
- if pos in pos_dict:
663
- pos_counts[pos_dict[pos]] += 1
664
- elif pos.startswith('N'): # 기타 λͺ…사λ₯˜
665
- pos_counts['λͺ…사'] += 1
666
- elif pos.startswith('V'): # 기타 동사λ₯˜
667
- pos_counts['동사'] += 1
668
- else:
669
- pos_counts['기타'] += 1
670
-
671
- else:
672
- # μ˜μ–΄ POS νƒœκΉ…
673
- tokens = word_tokenize(content.lower())
674
- tagged = pos_tag(tokens)
675
-
676
- # μ˜μ–΄ ν’ˆμ‚¬ λ§€ν•‘
677
- pos_dict = {
678
- 'NN': 'λͺ…사', 'NNS': 'λͺ…사', 'NNP': '고유λͺ…사', 'NNPS': '고유λͺ…사',
679
- 'VB': '동사', 'VBD': '동사', 'VBG': '동사', 'VBN': '동사', 'VBP': '동사', 'VBZ': '동사',
680
- 'JJ': 'ν˜•μš©μ‚¬', 'JJR': 'ν˜•μš©μ‚¬', 'JJS': 'ν˜•μš©μ‚¬',
681
- 'RB': '뢀사', 'RBR': '뢀사', 'RBS': '뢀사'
682
- }
683
-
684
- pos_counts = {'λͺ…사': 0, '동사': 0, 'ν˜•μš©μ‚¬': 0, '뢀사': 0, '기타': 0}
685
-
686
- for _, pos in tagged:
687
- if pos in pos_dict:
688
- pos_counts[pos_dict[pos]] += 1
689
- else:
690
- pos_counts['기타'] += 1
691
-
692
- # κ²°κ³Ό μ‹œκ°ν™”
693
- pos_df = pd.DataFrame({
694
- 'ν’ˆμ‚¬': list(pos_counts.keys()),
695
- 'λΉˆλ„': list(pos_counts.values())
696
- })
697
-
698
- st.bar_chart(pos_df.set_index('ν’ˆμ‚¬'))
699
-
700
- if is_korean:
701
- st.info("ν•œκ΅­μ–΄ ν…μŠ€νŠΈκ°€ κ°μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
702
- else:
703
- st.info("μ˜μ–΄ ν…μŠ€νŠΈκ°€ κ°μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
704
- except Exception as e:
705
- st.error(f"ν’ˆμ‚¬ 뢄석 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
706
- st.error(traceback.format_exc())
707
-
708
- elif analysis_type == "감정 뢄석":
709
- if st.button("감정 λΆ„μ„ν•˜κΈ°"):
710
- if st.session_state.openai_api_key:
711
- with st.spinner("κΈ°μ‚¬μ˜ 감정을 뢄석 μ€‘μž…λ‹ˆλ‹€..."):
712
- try:
713
- # 감정 뢄석 ν”„λ‘¬ν”„νŠΈ μ„€μ • (ꡬ 버전 방식)
714
- response = openai.ChatCompletion.create(
715
- model="gpt-4.1-mini",
716
- messages=[
717
- {"role": "system", "content": "당신은 ν…μŠ€νŠΈμ˜ 감정과 λ…Όμ‘°λ₯Ό λΆ„μ„ν•˜λŠ” μ „λ¬Έκ°€μž…λ‹ˆλ‹€. λ‹€μŒ λ‰΄μŠ€ κΈ°μ‚¬μ˜ 감정과 λ…Όμ‘°λ₯Ό λΆ„μ„ν•˜κ³ , '긍정적', '뢀정적', '쀑립적' 쀑 ν•˜λ‚˜λ‘œ λΆ„λ₯˜ν•΄ μ£Όμ„Έμš”. λ˜ν•œ κΈ°μ‚¬μ—μ„œ λ“œλŸ¬λ‚˜λŠ” 핡심 감정 ν‚€μ›Œλ“œλ₯Ό 5개 μΆ”μΆœν•˜κ³ , 각 ν‚€μ›Œλ“œλ³„λ‘œ 1-10 μ‚¬μ΄μ˜ 강도 점수λ₯Ό λ§€κ²¨μ£Όμ„Έμš”. JSON ν˜•μ‹μœΌλ‘œ λ‹€μŒκ³Ό 같이 μ‘λ‹΅ν•΄μ£Όμ„Έμš”: {'sentiment': '긍정적/뢀정적/쀑립적', 'reason': '이유 μ„€λͺ…...', 'keywords': [{'word': 'ν‚€μ›Œλ“œ1', 'score': 8}, {'word': 'ν‚€μ›Œλ“œ2', 'score': 7}, ...]}"},
718
- {"role": "user", "content": f"λ‹€μŒ λ‰΄μŠ€ 기사λ₯Ό 뢄석해 μ£Όμ„Έμš”:\n\n제λͺ©: {selected_article['title']}\n\nλ‚΄μš©: {selected_article['content'][:1500]}"}
719
- ],
720
- max_tokens=800
721
- )
722
-
723
- # JSON νŒŒμ‹± (ꡬ 버전 방식)
724
- analysis_result = json.loads(response.choices[0].message['content'])
725
-
726
- # κ²°κ³Ό μ‹œκ°ν™”
727
- st.subheader("감정 뢄석 κ²°κ³Ό")
728
-
729
- # 1. 감정 νƒ€μž…μ— λ”°λ₯Έ μ‹œκ°μ  ν‘œν˜„
730
- sentiment_type = analysis_result.get('sentiment', '쀑립적')
731
- col1, col2, col3 = st.columns([1, 3, 1])
732
-
733
- with col2:
734
- if sentiment_type == "긍정적":
735
- st.markdown(f"""
736
- <div style="background-color:#DCEDC8; padding:20px; border-radius:10px; text-align:center;">
737
- <h1 style="color:#388E3C; font-size:28px;">πŸ˜€ 긍정적 λ…Όμ‘° πŸ˜€</h1>
738
- <p style="font-size:16px;">감정 강도: λ†’μŒ</p>
739
- </div>
740
- """, unsafe_allow_html=True)
741
- elif sentiment_type == "뢀정적":
742
- st.markdown(f"""
743
- <div style="background-color:#FFCDD2; padding:20px; border-radius:10px; text-align:center;">
744
- <h1 style="color:#D32F2F; font-size:28px;">😞 뢀정적 λ…Όμ‘° 😞</h1>
745
- <p style="font-size:16px;">감정 강도: λ†’μŒ</p>
746
- </div>
747
- """, unsafe_allow_html=True)
748
- else:
749
- st.markdown(f"""
750
- <div style="background-color:#E0E0E0; padding:20px; border-radius:10px; text-align:center;">
751
- <h1 style="color:#616161; font-size:28px;">😐 쀑립적 λ…Όμ‘° 😐</h1>
752
- <p style="font-size:16px;">감정 강도: 쀑간</p>
753
- </div>
754
- """, unsafe_allow_html=True)
755
-
756
- # 2. 이유 μ„€λͺ…
757
- st.markdown("### 뢄석 κ·Όκ±°")
758
- st.markdown(f"<div style='background-color:#F5F5F5; padding:15px; border-radius:5px;'>{analysis_result.get('reason', '')}</div>", unsafe_allow_html=True)
759
-
760
- # 3. 감정 ν‚€μ›Œλ“œ μ‹œκ°ν™”
761
- st.markdown("### 핡심 감정 ν‚€μ›Œλ“œ")
762
-
763
- # ν‚€μ›Œλ“œ 데이터 μ€€λΉ„
764
- keywords = analysis_result.get('keywords', [])
765
- if keywords:
766
- # λ§‰λŒ€ 차트용 데이터
767
- keyword_names = [item.get('word', '') for item in keywords]
768
- keyword_scores = [item.get('score', 0) for item in keywords]
769
-
770
- # λ ˆμ΄λ” 차트 생성
771
- fig = go.Figure()
772
-
773
- # 색상 μ„€μ •
774
- if sentiment_type == "긍정적":
775
- fill_color = 'rgba(76, 175, 80, 0.3)' # μ—°ν•œ μ΄ˆλ‘μƒ‰
776
- line_color = 'rgba(76, 175, 80, 1)' # μ§„ν•œ μ΄ˆλ‘μƒ‰
777
- elif sentiment_type == "뢀정적":
778
- fill_color = 'rgba(244, 67, 54, 0.3)' # μ—°ν•œ 빨간색
779
- line_color = 'rgba(244, 67, 54, 1)' # μ§„ν•œ 빨간색
780
- else:
781
- fill_color = 'rgba(158, 158, 158, 0.3)' # μ—°ν•œ νšŒμƒ‰
782
- line_color = 'rgba(158, 158, 158, 1)' # μ§„ν•œ νšŒμƒ‰
783
-
784
- # λ ˆμ΄λ” 차트 데이터 μ€€λΉ„ - λ§ˆμ§€λ§‰ 점이 첫 점과 μ—°κ²°λ˜λ„λ‘ 데이터 μΆ”κ°€
785
- radar_keywords = keyword_names.copy()
786
- radar_scores = keyword_scores.copy()
787
-
788
- # λ ˆμ΄λ” 차트 생성
789
- fig.add_trace(go.Scatterpolar(
790
- r=radar_scores,
791
- theta=radar_keywords,
792
- fill='toself',
793
- fillcolor=fill_color,
794
- line=dict(color=line_color, width=2),
795
- name='감정 ν‚€μ›Œλ“œ'
796
- ))
797
-
798
- # λ ˆμ΄λ” 차트 λ ˆμ΄μ•„μ›ƒ μ„€μ •
799
- fig.update_layout(
800
- polar=dict(
801
- radialaxis=dict(
802
- visible=True,
803
- range=[0, 10],
804
- tickmode='linear',
805
- tick0=0,
806
- dtick=2
807
- )
808
- ),
809
- showlegend=False,
810
- title={
811
- 'text': '감정 ν‚€μ›Œλ“œ λ ˆμ΄λ” 뢄석',
812
- 'y':0.95,
813
- 'x':0.5,
814
- 'xanchor': 'center',
815
- 'yanchor': 'top'
816
- },
817
- height=500,
818
- width=500,
819
- margin=dict(l=80, r=80, t=80, b=80)
820
- )
821
-
822
- # 차트 쀑앙에 ν‘œμ‹œ
823
- col1, col2, col3 = st.columns([1, 2, 1])
824
- with col2:
825
- st.plotly_chart(fig)
826
-
827
- # ν‚€μ›Œλ“œ μΉ΄λ“œλ‘œ ν‘œμ‹œ
828
- st.markdown("#### ν‚€μ›Œλ“œ μ„ΈλΆ€ μ„€λͺ…")
829
- cols = st.columns(min(len(keywords), 5))
830
- for i, keyword in enumerate(keywords):
831
- with cols[i % len(cols)]:
832
- word = keyword.get('word', '')
833
- score = keyword.get('score', 0)
834
-
835
- # μ μˆ˜μ— λ”°λ₯Έ 색상 계산
836
- r, g, b = 0, 0, 0
837
- if sentiment_type == "긍정적":
838
- g = min(200 + score * 5, 255)
839
- r = max(255 - score * 20, 100)
840
- elif sentiment_type == "뢀정적":
841
- r = min(200 + score * 5, 255)
842
- g = max(255 - score * 20, 100)
843
- else:
844
- r = g = b = 128
845
-
846
- # μΉ΄λ“œ 생성
847
- st.markdown(f"""
848
- <div style="background-color:rgba({r},{g},{b},0.2); padding:10px; border-radius:5px; text-align:center; margin:5px;">
849
- <h3 style="margin:0;">{word}</h3>
850
- <div style="background-color:#E0E0E0; border-radius:3px; margin-top:5px;">
851
- <div style="width:{score*10}%; background-color:rgba({r},{g},{b},0.8); height:10px; border-radius:3px;"></div>
852
- </div>
853
- <p style="margin:2px; font-size:12px;">강도: {score}/10</p>
854
- </div>
855
- """, unsafe_allow_html=True)
856
-
857
- else:
858
- st.info("ν‚€μ›Œλ“œλ₯Ό μΆ”μΆœν•˜μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")
859
-
860
- # 4. μš”μ•½ 톡계
861
- st.markdown("### μ£Όμš” 톡계")
862
- col1, col2, col3 = st.columns(3)
863
- with col1:
864
- st.metric(label="긍정/λΆ€μ • 점수", value=f"{7 if sentiment_type == '긍정적' else 3 if sentiment_type == '뢀정적' else 5}/10")
865
- with col2:
866
- st.metric(label="ν‚€μ›Œλ“œ 수", value=len(keywords))
867
- with col3:
868
- avg_score = sum(keyword_scores) / len(keyword_scores) if keyword_scores else 0
869
- st.metric(label="평균 강도", value=f"{avg_score:.1f}/10")
870
-
871
- except Exception as e:
872
- st.error(f"감정 뢄석 였λ₯˜: {str(e)}")
873
- st.code(traceback.format_exc())
874
- else:
875
- st.warning("OpenAI API ν‚€κ°€ μ„€μ •λ˜μ–΄ μžˆμ§€ μ•ŠμŠ΅λ‹ˆλ‹€. μ‚¬μ΄λ“œλ°”μ—μ„œ API ν‚€λ₯Ό μ„€μ •ν•΄μ£Όμ„Έμš”.")
876
 
877
  elif menu == "μƒˆ 기사 μƒμ„±ν•˜κΈ°":
878
  st.header("μƒˆ 기사 μƒμ„±ν•˜κΈ°")
879
-
880
- articles = load_saved_articles()
881
- if not articles:
882
- st.warning("μ €μž₯된 기사가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 'λ‰΄μŠ€ 기사 크둀링' λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
883
  else:
884
- # 기사 선택
885
- titles = [article['title'] for article in articles]
886
- selected_title = st.selectbox("원본 기사 선택", titles)
887
-
888
- selected_article = next((a for a in articles if a['title'] == selected_title), None)
889
-
890
- if selected_article:
891
- st.write(f"**원본 제λͺ©:** {selected_article['title']}")
892
-
893
- with st.expander("원본 기사 λ‚΄μš©"):
894
- st.write(selected_article['content'])
895
-
896
- prompt_text ="""λ‹€μŒ 기사 양식을 λ”°λΌμ„œ λ‹€μ‹œ μž‘μ„±ν•΄μ€˜.
897
- μ—­ν• : 당신은 μ‹ λ¬Έμ‚¬μ˜ κΈ°μžμž…λ‹ˆλ‹€.
898
- μž‘μ—…: 졜근 μΌμ–΄λ‚œ 사건에 λŒ€ν•œ λ³΄λ„μžλ£Œλ₯Ό μž‘μ„±ν•΄μ•Ό ν•©λ‹ˆλ‹€. μžλ£ŒλŠ” 사싀을 기반으둜 ν•˜λ©°, 객관적이고 μ •ν™•ν•΄μ•Ό ν•©λ‹ˆλ‹€.
899
- μ§€μΉ¨:
900
- 제곡된 정보λ₯Ό λ°”νƒ•μœΌλ‘œ μ‹ λ¬Έ λ³΄λ„μžλ£Œ ν˜•μ‹μ— 맞좰 기사λ₯Ό μž‘μ„±ν•˜μ„Έμš”.
901
- 기사 제λͺ©μ€ 주제λ₯Ό λͺ…ν™•νžˆ λ°˜μ˜ν•˜κ³  λ…μžμ˜ 관심을 끌 수 μžˆλ„λ‘ μž‘μ„±ν•©λ‹ˆλ‹€.
902
- 기사 λ‚΄μš©μ€ μ •ν™•ν•˜κ³  κ°„κ²°ν•˜λ©° 섀득λ ₯ μžˆλŠ” λ¬Έμž₯으둜 κ΅¬μ„±ν•©λ‹ˆλ‹€.
903
- κ΄€λ ¨μžμ˜ 인터뷰λ₯Ό 인용 ν˜•νƒœλ‘œ λ„£μ–΄μ£Όμ„Έμš”.
904
- μœ„μ˜ 정보와 지침을 μ°Έκ³ ν•˜μ—¬ μ‹ λ¬Έ λ³΄λ„μžλ£Œ ν˜•μ‹μ˜ 기사λ₯Ό μž‘μ„±ν•΄ μ£Όμ„Έμš”"""
905
-
906
- # 이미지 생성 μ—¬λΆ€ 선택 μ˜΅μ…˜ μΆ”κ°€
907
- generate_image_too = st.checkbox("기사 생성 ν›„ 이미지도 ν•¨κ»˜ μƒμ„±ν•˜κΈ°", value=True)
908
-
909
- if st.button("μƒˆ 기사 μƒμ„±ν•˜κΈ°"):
910
- if st.session_state.openai_api_key:
911
- with st.spinner("기사λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
912
- new_article = generate_article(selected_article['content'], prompt_text)
913
-
914
- st.write("**μƒμ„±λœ 기사:**")
915
- st.write(new_article)
916
-
917
- # 이미지 μƒμ„±ν•˜κΈ° (μ˜΅μ…˜μ΄ μ„ νƒλœ 경우)
918
- if generate_image_too:
919
- with st.spinner("기사 κ΄€λ ¨ 이미지λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
920
- # 이미지 생성 ν”„λ‘¬ν”„νŠΈ μ€€λΉ„
921
- image_prompt = f"""신문기사 제λͺ© "{selected_article['title']}" 을 보고 이미지λ₯Ό λ§Œλ“€μ–΄μ€˜
922
- μ΄λ―Έμ§€μ—λŠ” λ‹€μŒ μš”μ†Œκ°€ ν¬ν•¨λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€:
923
- - 기사λ₯Ό 이해할 수 μžˆλŠ” 도식
924
- - 기사 λ‚΄μš©κ³Ό κ΄€λ ¨λœ ν…μŠ€νŠΈ
925
- - μ‹¬ν”Œν•˜κ²Œ 처리
926
- """
927
-
928
- # 이미지 생성
929
- image_url = generate_image(image_prompt)
930
-
931
- if image_url and not image_url.startswith("이미지 생성 였λ₯˜"):
932
- st.subheader("μƒμ„±λœ 이미지:")
933
- st.image(image_url)
934
- else:
935
- st.error(image_url)
936
-
937
- # μƒμ„±λœ 기사 μ €μž₯ μ˜΅μ…˜
938
- if st.button("μƒμ„±λœ 기사 μ €μž₯"):
939
- new_article_data = {
940
- 'title': f"[생성됨] {selected_article['title']}",
941
- 'source': f"AI 생성 (원본: {selected_article['source']})",
942
- 'date': datetime.now().strftime("%Y-%m-%d %H:%M"),
943
- 'description': new_article[:100] + "...",
944
- 'link': "",
945
- 'content': new_article
946
- }
947
- articles.append(new_article_data)
948
- save_articles(articles)
949
- st.success("μƒμ„±λœ 기사가 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
950
- else:
951
- st.warning("OpenAI API ν‚€λ₯Ό μ‚¬μ΄λ“œλ°”μ—μ„œ μ„€μ •ν•΄μ£Όμ„Έμš”.")
952
-
953
-
954
 
955
  elif menu == "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°":
956
  st.header("λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°")
957
-
958
- # νƒ­ 생성
959
- tab1, tab2, tab3 = st.tabs(["일별 μ˜ˆμ•½", "μ‹œκ°„ 간격 μ˜ˆμ•½", "μŠ€μΌ€μ€„λŸ¬ μƒνƒœ"])
960
-
961
- # 일별 μ˜ˆμ•½ νƒ­
962
  with tab1:
963
- st.subheader("맀일 μ •ν•΄μ§„ μ‹œκ°„μ— 기사 μˆ˜μ§‘ν•˜κΈ°")
964
-
965
- # ν‚€μ›Œλ“œ μž…λ ₯
966
- daily_keyword = st.text_input("검색 ν‚€μ›Œλ“œ", value="인곡지λŠ₯", key="daily_keyword")
967
- daily_num_articles = st.slider("μˆ˜μ§‘ν•  기사 수", min_value=1, max_value=20, value=5, key="daily_num_articles")
968
-
969
- # μ‹œκ°„ μ„€μ •
970
- daily_col1, daily_col2 = st.columns(2)
971
- with daily_col1:
972
- daily_hour = st.selectbox("μ‹œ", range(24), format_func=lambda x: f"{x:02d}μ‹œ", key="daily_hour")
973
- with daily_col2:
974
- daily_minute = st.selectbox("λΆ„", range(0, 60, 5), format_func=lambda x: f"{x:02d}λΆ„", key="daily_minute")
975
-
976
- # 일별 μ˜ˆμ•½ 리슀트
977
- if 'daily_tasks' not in st.session_state:
978
- st.session_state.daily_tasks = []
979
-
980
- if st.button("일별 μ˜ˆμ•½ μΆ”κ°€"):
981
- st.session_state.daily_tasks.append({
982
- 'hour': daily_hour,
983
- 'minute': daily_minute,
984
- 'keyword': daily_keyword,
985
- 'num_articles': daily_num_articles
986
  })
987
- st.success(f"일별 μ˜ˆμ•½μ΄ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€: 맀일 {daily_hour:02d}:{daily_minute:02d} - '{daily_keyword}'")
988
-
989
- # μ˜ˆμ•½ λͺ©λ‘ ν‘œμ‹œ
990
- if st.session_state.daily_tasks:
991
- st.subheader("일별 μ˜ˆμ•½ λͺ©λ‘")
992
- for i, task in enumerate(st.session_state.daily_tasks):
993
- st.write(f"{i+1}. 맀일 {task['hour']:02d}:{task['minute']:02d} - '{task['keyword']}' ({task['num_articles']}개)")
994
-
995
- if st.button("일별 μ˜ˆμ•½ μ΄ˆκΈ°ν™”"):
996
- st.session_state.daily_tasks = []
997
- st.warning("일별 μ˜ˆμ•½μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
998
-
999
- # μ‹œκ°„ 간격 μ˜ˆμ•½ νƒ­
1000
  with tab2:
1001
- st.subheader("μ‹œκ°„ κ°„κ²©μœΌλ‘œ 기사 μˆ˜μ§‘ν•˜κΈ°")
1002
-
1003
- # ν‚€μ›Œλ“œ μž…λ ₯
1004
- interval_keyword = st.text_input("검색 ν‚€μ›Œλ“œ", value="빅데이터", key="interval_keyword")
1005
- interval_num_articles = st.slider("μˆ˜μ§‘ν•  기사 수", min_value=1, max_value=20, value=5, key="interval_num_articles")
1006
-
1007
- # μ‹œκ°„ 간격 μ„€μ •
1008
- interval_minutes = st.number_input("μ‹€ν–‰ 간격(λΆ„)", min_value=1, max_value=60*24, value=30, key="interval_minutes")
1009
-
1010
- # μ¦‰μ‹œ μ‹€ν–‰ μ—¬λΆ€
1011
- run_immediately = st.checkbox("μ¦‰μ‹œ μ‹€ν–‰", value=True, help="μ²΄ν¬ν•˜λ©΄ μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘ μ‹œ μ¦‰μ‹œ μ‹€ν–‰ν•©λ‹ˆλ‹€.")
1012
-
1013
- # μ‹œκ°„ 간격 μ˜ˆμ•½ 리슀트
1014
- if 'interval_tasks' not in st.session_state:
1015
- st.session_state.interval_tasks = []
1016
-
1017
- if st.button("μ‹œκ°„ 간격 μ˜ˆμ•½ μΆ”κ°€"):
1018
- st.session_state.interval_tasks.append({
1019
- 'interval_minutes': interval_minutes,
1020
- 'keyword': interval_keyword,
1021
- 'num_articles': interval_num_articles,
1022
- 'run_immediately': run_immediately
1023
  })
1024
- st.success(f"μ‹œκ°„ 간격 μ˜ˆμ•½μ΄ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€: {interval_minutes}λΆ„λ§ˆλ‹€ - '{interval_keyword}'")
1025
-
1026
- # μ˜ˆμ•½ λͺ©λ‘ ν‘œμ‹œ
1027
- if st.session_state.interval_tasks:
1028
- st.subheader("μ‹œκ°„ 간격 μ˜ˆμ•½ λͺ©λ‘")
1029
- for i, task in enumerate(st.session_state.interval_tasks):
1030
- immediate_text = "μ¦‰μ‹œ μ‹€ν–‰ ν›„ " if task['run_immediately'] else ""
1031
- st.write(f"{i+1}. {immediate_text}{task['interval_minutes']}λΆ„λ§ˆλ‹€ - '{task['keyword']}' ({task['num_articles']}개)")
1032
-
1033
- if st.button("μ‹œκ°„ 간격 μ˜ˆμ•½ μ΄ˆκΈ°ν™”"):
1034
- st.session_state.interval_tasks = []
1035
- st.warning("μ‹œκ°„ 간격 μ˜ˆμ•½μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
1036
-
1037
- # μŠ€μΌ€μ€„λŸ¬ μƒνƒœ νƒ­
1038
  with tab3:
1039
- st.subheader("μŠ€μΌ€μ€„λŸ¬ μ œμ–΄ 및 μƒνƒœ")
1040
-
1041
- col1, col2 = st.columns(2)
1042
-
1043
- with col1:
1044
- # μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘/쀑지 λ²„νŠΌ
1045
- if not global_scheduler_state.is_running:
1046
- if st.button("μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘"):
1047
- if not st.session_state.daily_tasks and not st.session_state.interval_tasks:
1048
- st.error("μ˜ˆμ•½λœ μž‘μ—…μ΄ μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 일별 μ˜ˆμ•½ λ˜λŠ” μ‹œκ°„ 간격 μ˜ˆμ•½μ„ μΆ”κ°€ν•΄μ£Όμ„Έμš”.")
1049
- else:
1050
- start_scheduler(st.session_state.daily_tasks, st.session_state.interval_tasks)
1051
- st.success("μŠ€μΌ€μ€„λŸ¬κ°€ μ‹œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
1052
- else:
1053
- if st.button("μŠ€μΌ€μ€„λŸ¬ 쀑지"):
1054
- stop_scheduler()
1055
- st.warning("μŠ€μΌ€μ€„λŸ¬κ°€ μ€‘μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
1056
-
1057
- with col2:
1058
- # μŠ€μΌ€μ€„λŸ¬ μƒνƒœ ν‘œμ‹œ
1059
- if 'scheduler_status' in st.session_state:
1060
- st.write(f"μƒνƒœ: {'싀행쀑' if global_scheduler_state.is_running else '쀑지'}")
1061
- if global_scheduler_state.last_run:
1062
- st.write(f"λ§ˆμ§€λ§‰ μ‹€ν–‰: {global_scheduler_state.last_run.strftime('%Y-%m-%d %H:%M:%S')}")
1063
- if global_scheduler_state.next_run and global_scheduler_state.is_running:
1064
- st.write(f"λ‹€μŒ μ‹€ν–‰: {global_scheduler_state.next_run.strftime('%Y-%m-%d %H:%M:%S')}")
1065
- else:
1066
- st.write("μƒνƒœ: 쀑지")
1067
-
1068
- # μ˜ˆμ•½λœ μž‘μ—… λͺ©οΏ½οΏ½οΏ½
1069
- if global_scheduler_state.scheduled_jobs:
1070
- st.subheader("ν˜„μž¬ μ‹€ν–‰ 쀑인 μ˜ˆμ•½ μž‘μ—…")
1071
- for i, job in enumerate(global_scheduler_state.scheduled_jobs):
1072
- if job['type'] == 'daily':
1073
- st.write(f"{i+1}. [일별] 맀일 {job['time']} - '{job['keyword']}' ({job['num_articles']}개)")
1074
- else:
1075
- immediate_text = "[μ¦‰μ‹œ μ‹€ν–‰ ν›„] " if job.get('run_immediately', False) else ""
1076
- st.write(f"{i+1}. [간격] {immediate_text}{job['interval']} - '{job['keyword']}' ({job['num_articles']}개)")
1077
-
1078
- # μŠ€μΌ€μ€„λŸ¬ μ‹€ν–‰ κ²°κ³Ό
1079
- if global_scheduler_state.scheduled_results:
1080
- st.subheader("μŠ€μΌ€μ€„λŸ¬ μ‹€ν–‰ κ²°κ³Ό")
1081
-
1082
- # κ²°κ³Όλ₯Ό UI에 ν‘œμ‹œν•˜κΈ° 전에 볡사
1083
- results_for_display = global_scheduler_state.scheduled_results.copy()
1084
-
1085
- if results_for_display:
1086
- result_df = pd.DataFrame(results_for_display)
1087
- result_df['μ‹€ν–‰μ‹œκ°„'] = result_df['timestamp'].apply(lambda x: datetime.strptime(x, "%Y%m%d_%H%M%S").strftime("%Y-%m-%d %H:%M:%S"))
1088
- result_df = result_df.rename(columns={
1089
- 'task_type': 'μž‘μ—…μœ ν˜•',
1090
- 'keyword': 'ν‚€μ›Œλ“œ',
1091
- 'num_articles': 'κΈ°μ‚¬μˆ˜',
1092
- 'filename': '파일λͺ…'
1093
- })
1094
- result_df['μž‘μ—…μœ ν˜•'] = result_df['μž‘μ—…μœ ν˜•'].apply(lambda x: '일별' if x == 'daily' else 'μ‹œκ°„κ°„κ²©')
1095
-
1096
- st.dataframe(
1097
- result_df[['μž‘μ—…μœ ν˜•', 'ν‚€μ›Œλ“œ', 'κΈ°μ‚¬μˆ˜', 'μ‹€ν–‰μ‹œκ°„', '파일λͺ…']],
1098
- hide_index=True
1099
- )
1100
-
1101
- # μˆ˜μ§‘λœ 파일 보기
1102
- if os.path.exists('/tmp/scheduled_news'):
1103
- files = [f for f in os.listdir('/tmp/scheduled_news') if f.endswith('.json')]
1104
- if files:
1105
- st.subheader("μˆ˜μ§‘λœ 파일 μ—΄κΈ°")
1106
- selected_file = st.selectbox("파일 선택", files, index=len(files)-1)
1107
- if selected_file and st.button("파일 λ‚΄μš© 보기"):
1108
- with open(os.path.join('/tmp/scheduled_news', selected_file), 'r', encoding='utf-8') as f:
1109
- articles = json.load(f)
1110
-
1111
- st.write(f"**파일λͺ…:** {selected_file}")
1112
- st.write(f"**μˆ˜μ§‘ 기사 수:** {len(articles)}개")
1113
-
1114
- for article in articles:
1115
- with st.expander(f"{article['title']} - {article['source']}"):
1116
- st.write(f"**좜처:** {article['source']}")
1117
- st.write(f"**λ‚ μ§œ:** {article['date']}")
1118
- st.write(f"**링크:** {article['link']}")
1119
- st.write("**λ³Έλ¬Έ:**")
1120
- st.write(article['content'][:500] + "..." if len(article['content']) > 500 else article['content'])
1121
-
1122
- # ν‘Έν„°
1123
  st.markdown("---")
1124
- st.markdown("Β© λ‰΄μŠ€ 기사 도ꡬ @conanssam")
 
1
+ # app.py
2
+ import os
3
  import streamlit as st
4
  import pandas as pd
5
  import requests
 
11
  from nltk.corpus import stopwords
12
  from collections import Counter
13
  import json
 
14
  from datetime import datetime, timedelta
15
+ import openai
 
 
 
16
  import schedule
17
  import threading
18
  import matplotlib.pyplot as plt
19
+ from wordcloud import WordCloud
20
 
21
+ # ─── μ„€μ •: μž„μ‹œ 디렉토리, NLTK 데이터 ─────────────────────────────────────────
22
+ # μž„μ‹œ 디렉토리 생성
23
+ TMP = "/tmp"
24
+ NLP_DATA = os.path.join(TMP, "nltk_data")
25
+ os.makedirs(NLP_DATA, exist_ok=True)
 
26
 
27
+ # NLTK 데이터 검색 κ²½λ‘œμ— μΆ”κ°€
28
+ nltk.data.path.insert(0, NLP_DATA)
 
 
 
 
 
 
 
29
 
30
+ # ν•„μš”ν•œ NLTK λ¦¬μ†ŒμŠ€ λ‹€μš΄λ‘œλ“œ
31
+ for pkg in ["punkt", "stopwords"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  try:
33
+ nltk.data.find(f"tokenizers/{pkg}")
34
+ except LookupError:
35
+ nltk.download(pkg, download_dir=NLP_DATA)
36
+
37
+ # ─── OpenAI API ν‚€ 뢈러였기 ────────────────────────────────────────────────────
38
+ # μš°μ„  ν™˜κ²½ λ³€μˆ˜, κ·Έλ‹€μŒ st.secrets, λ§ˆμ§€λ§‰μœΌλ‘œ μ‚¬μ΄λ“œλ°” μž…λ ₯
39
+ OPENAI_KEY = os.getenv("OPENAI_API_KEY") or st.secrets.get("OPENAI_API_KEY")
40
+ if not OPENAI_KEY:
41
+ # μ•± μ‹€ν–‰ 쀑 μ‚¬μ΄λ“œλ°”μ—μ„œ μž…λ ₯ λ°›κΈ°
42
+ with st.sidebar:
43
+ st.markdown("### πŸ”‘ OpenAI API Key")
44
+ key_input = st.text_input("Enter your OpenAI API Key:", type="password")
45
+ if key_input:
46
+ OPENAI_KEY = key_input
47
+
48
+ if OPENAI_KEY:
49
+ openai.api_key = OPENAI_KEY
50
+ else:
51
+ st.sidebar.error("OpenAI API Keyκ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
52
+
53
+ # ─── Streamlit νŽ˜μ΄μ§€ & 메뉴 ꡬ성 ─────────────────────────────────────────────
54
+ st.set_page_config(page_title="πŸ“° News Tool", layout="wide")
55
 
 
 
 
 
56
  with st.sidebar:
57
  st.title("λ‰΄μŠ€ 기사 도ꡬ")
58
+ menu = st.radio("메뉴 선택", [
59
+ "λ‰΄μŠ€ 기사 크둀링", "기사 λΆ„μ„ν•˜κΈ°", "μƒˆ 기사 μƒμ„±ν•˜κΈ°", "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°"
60
+ ])
61
+
62
+ # ─── 파일 경둜 헬퍼 ─────────────────────────────��────────────────────────────
63
+ def _tmp_path(*paths):
64
+ """/tmp ν•˜μœ„ 경둜 μ‘°ν•©"""
65
+ full = os.path.join(TMP, *paths)
66
+ os.makedirs(os.path.dirname(full), exist_ok=True)
67
+ return full
68
+
69
+ # ─── μ €μž₯된 기사 λ‘œλ“œ/μ €μž₯ ───────────────────────────────────────────────────
 
70
  def load_saved_articles():
71
+ path = _tmp_path("saved_articles", "articles.json")
72
+ if os.path.exists(path):
73
+ with open(path, "r", encoding="utf-8") as f:
74
  return json.load(f)
75
  return []
76
 
 
77
  def save_articles(articles):
78
+ path = _tmp_path("saved_articles", "articles.json")
79
+ with open(path, "w", encoding="utf-8") as f:
80
  json.dump(articles, f, ensure_ascii=False, indent=2)
81
 
82
+ # ─── 넀이버 λ‰΄μŠ€ 크둀러 ─────────────────────────────────────────────────────
83
  @st.cache_data
84
  def crawl_naver_news(keyword, num_articles=5):
 
 
 
85
  url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
86
  results = []
 
87
  try:
88
+ resp = requests.get(url, timeout=5)
89
+ soup = BeautifulSoup(resp.text, "html.parser")
90
+ items = soup.select("div.sds-comps-base-layout.sds-comps-full-layout")
91
+ for i, it in enumerate(items):
92
+ if i >= num_articles: break
93
+ title_el = it.select_one("a.X0fMYp2dHd0TCUS2hjww span")
94
+ link_el = it.select_one("a.X0fMYp2dHd0TCUS2hjww")
95
+ src_el = it.select_one("div.sds-comps-profile-info-title span")
96
+ date_el = it.select_one("span.r0VOr")
97
+ desc_el = it.select_one("a.X0fMYp2dHd0TCUS2hjww.IaKmSOGPdofdPwPE6cyU > span")
98
+ if not title_el or not link_el: continue
99
+ results.append({
100
+ "title": title_el.text.strip(),
101
+ "link": link_el["href"],
102
+ "source": src_el.text.strip() if src_el else "μ•Œ 수 μ—†μŒ",
103
+ "date": date_el.text.strip() if date_el else "μ•Œ 수 μ—†μŒ",
104
+ "description": desc_el.text.strip() if desc_el else "",
105
+ "content": ""
106
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
+ st.error(f"크둀링 였λ₯˜: {e}")
 
109
  return results
110
 
111
+ # ─── 기사 λ³Έλ¬Έ κ°€μ Έμ˜€κΈ° ───────────────��───────────────────────────────────────
112
  def get_article_content(url):
113
  try:
114
+ resp = requests.get(url, timeout=5)
115
+ soup = BeautifulSoup(resp.text, "html.parser")
116
+ cont = soup.select_one("#dic_area") or soup.select_one(".article_body, .news-content-inner")
117
+ if cont:
118
+ text = re.sub(r"\s+", " ", cont.text.strip())
 
 
 
 
 
 
 
 
 
 
119
  return text
120
+ except Exception:
121
+ pass
122
+ return "본문을 κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
 
123
 
124
+ # ─── ν‚€μ›Œλ“œ 뢄석 & μ›Œλ“œν΄λΌμš°λ“œ ───────────────────────────────────────────────
125
  def analyze_keywords(text, top_n=10):
126
+ stop_kr = ["이","κ·Έ","μ €","것","및","λ“±","λ₯Ό","을","에","μ—μ„œ","의","으둜","둜"]
127
+ tokens = [w for w in word_tokenize(text) if w.isalnum() and len(w)>1 and w not in stop_kr]
128
+ freq = Counter(tokens)
129
+ return freq.most_common(top_n)
130
+
131
+ def extract_for_wordcloud(text, top_n=50):
132
+ tokens = [w for w in word_tokenize(text.lower()) if w.isalnum()]
133
+ stop_en = set(stopwords.words("english"))
134
+ korea_sw = {"및","λ“±","λ₯Ό","이","의","κ°€","에","λŠ”"}
135
+ sw = stop_en.union(korea_sw)
136
+ filtered = [w for w in tokens if w not in sw and len(w)>1]
137
+ freq = Counter(filtered)
138
+ return dict(freq.most_common(top_n))
139
+
140
+ def generate_wordcloud(freq_dict):
 
141
  try:
142
+ wc = WordCloud(width=800, height=400, background_color="white")\
143
+ .generate_from_frequencies(freq_dict)
144
+ return wc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  except Exception as e:
146
+ st.error(f"μ›Œλ“œν΄λΌμš°λ“œ 생성 였λ₯˜: {e}")
147
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ # ─── OpenAI 기반 μƒˆ 기사 & 이미지 생성 ───────────────────────────────────────
150
+ def generate_article(orig, prompt_text):
151
+ if not openai.api_key:
152
+ return "API Keyκ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  try:
154
+ resp = openai.ChatCompletion.create(
155
+ model="gpt-3.5-turbo",
 
 
 
156
  messages=[
157
+ {"role":"system","content":"당신은 μ „λ¬Έ λ‰΄μŠ€ κΈ°μžμž…λ‹ˆλ‹€."},
158
+ {"role":"user", "content":f"{prompt_text}\n\n{orig[:1000]}"}
159
  ],
160
+ max_tokens=1500
161
  )
162
+ return resp.choices[0].message["content"]
163
  except Exception as e:
164
+ return f"기사 생성 였λ₯˜: {e}"
165
 
 
166
  def generate_image(prompt):
167
+ if not openai.api_key:
168
+ return None
169
  try:
170
+ resp = openai.Image.create(prompt=prompt, n=1, size="512x512")
171
+ return resp["data"][0]["url"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  except Exception as e:
173
+ st.error(f"이미지 생성 였λ₯˜: {e}")
174
+ return None
175
 
176
+ # ─── μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 클래슀 ────────────────────��──────────────────────────────
177
+ class SchedulerState:
178
+ def __init__(self):
179
+ self.is_running = False
180
+ self.thread = None
181
+ self.last_run = None
182
+ self.next_run = None
183
+ self.jobs = []
184
+ self.results = []
185
+ global_scheduler = SchedulerState()
186
+
187
+ def perform_news_task(task_type, kw, n, prefix):
188
+ arts = crawl_naver_news(kw, n)
189
+ for a in arts:
190
+ a["content"] = get_article_content(a["link"])
191
+ time.sleep(0.5)
192
+ fname = _tmp_path("scheduled_news", f"{prefix}_{task_type}_{datetime.now():%Y%m%d_%H%M%S}.json")
193
+ with open(fname,"w",encoding="utf-8") as f:
194
+ json.dump(arts, f, ensure_ascii=False, indent=2)
195
+ global_scheduler.last_run = datetime.now()
196
+ global_scheduler.results.append({
197
+ "type":task_type, "keyword":kw,
198
+ "count":len(arts), "file":fname,
199
+ "timestamp":global_scheduler.last_run
200
+ })
201
+
202
+ def run_scheduler():
203
+ while global_scheduler.is_running:
204
+ schedule.run_pending()
205
+ time.sleep(1)
206
+
207
+ def start_scheduler(daily, interval):
208
+ if global_scheduler.is_running: return
209
+ schedule.clear(); global_scheduler.jobs=[]
210
+ # 일별
211
+ for t in daily:
212
+ hh, mm = t["hour"], t["minute"]
213
+ tag = f"d_{t['keyword']}_{hh}{mm}"
214
+ schedule.every().day.at(f"{hh:02d}:{mm:02d}")\
215
+ .do(perform_news_task,"daily",t["keyword"],t["num_articles"],tag).tag(tag)
216
+ global_scheduler.jobs.append(tag)
217
+ # 간격
218
+ for t in interval:
219
+ tag = f"i_{t['keyword']}_{t['interval']}"
220
+ if t["immediate"]:
221
+ perform_news_task("interval", t["keyword"], t["num_articles"], tag)
222
+ schedule.every(t["interval"]).minutes\
223
+ .do(perform_news_task,"interval",t["keyword"],t["num_articles"],tag).tag(tag)
224
+ global_scheduler.jobs.append(tag)
225
+
226
+ global_scheduler.next_run = schedule.next_run()
227
+ global_scheduler.is_running = True
228
+ th = threading.Thread(target=run_scheduler, daemon=True)
229
+ th.start(); global_scheduler.thread = th
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  def stop_scheduler():
232
+ global_scheduler.is_running = False
233
+ schedule.clear()
234
+ global_scheduler.jobs=[]
 
 
 
 
 
 
 
 
235
 
236
+ # ─── ν™”λ©΄ 그리기: 메뉴별 κΈ°λŠ₯ ────────────────────────────────────────────────
237
  if menu == "λ‰΄μŠ€ 기사 크둀링":
238
  st.header("λ‰΄μŠ€ 기사 크둀링")
239
+ kw = st.text_input("πŸ” 검색어", "인곡지λŠ₯")
240
+ num = st.slider("κ°€μ Έμ˜¬ 기사 수", 1, 20, 5)
 
 
241
  if st.button("기사 κ°€μ Έμ˜€κΈ°"):
242
+ arts = crawl_naver_news(kw, num)
243
+ for i,a in enumerate(arts):
244
+ st.progress((i+1)/len(arts))
245
+ a["content"] = get_article_content(a["link"])
246
+ time.sleep(0.3)
247
+ save_articles(arts)
248
+ st.success(f"{len(arts)}개 기사 μ €μž₯됨")
249
+ for a in arts:
250
+ with st.expander(a["title"]):
251
+ st.write(f"좜처: {a['source']} | λ‚ μ§œ: {a['date']}")
252
+ st.write(a["description"])
253
+ st.write(a["content"][:300]+"…")
 
 
 
 
 
 
 
 
 
 
254
 
255
  elif menu == "기사 뢄석���기":
256
  st.header("기사 λΆ„μ„ν•˜κΈ°")
257
+ arts = load_saved_articles()
258
+ if not arts:
259
+ st.warning("λ¨Όμ € β€˜λ‰΄μŠ€ 기사 크둀링’ λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•˜μ„Έμš”.")
 
260
  else:
261
+ titles = [a["title"] for a in arts]
262
+ sel = st.selectbox("뢄석할 기사 선택", titles)
263
+ art = next(a for a in arts if a["title"]==sel)
264
+ st.subheader(art["title"])
265
+ with st.expander("본문 보기"):
266
+ st.write(art["content"])
267
+ mode = st.radio("뢄석 방식", ["ν‚€μ›Œλ“œ 뢄석", "ν…μŠ€νŠΈ 톡계"])
268
+ if mode=="ν‚€μ›Œλ“œ 뢄석" and st.button("μ‹€ν–‰"):
269
+ kw_list = analyze_keywords(art["content"])
270
+ df = pd.DataFrame(kw_list, columns=["단어","λΉˆλ„"])
271
+ st.bar_chart(df.set_index("단어"))
272
+ st.write("μƒμœ„ ν‚€μ›Œλ“œ:")
273
+ for w,c in kw_list: st.write(f"- {w}: {c}")
274
+ # μ›Œλ“œν΄λΌμš°λ“œ
275
+ wc_data = extract_for_wordcloud(art["content"])
276
+ wc = generate_wordcloud(wc_data)
277
+ if wc:
278
+ fig,ax = plt.subplots(figsize=(8,4))
279
+ ax.imshow(wc,interp="bilinear"); ax.axis("off")
280
+ st.pyplot(fig)
281
+ if mode=="ν…μŠ€νŠΈ 톡계" and st.button("μ‹€ν–‰"):
282
+ txt=art["content"]
283
+ wcnt=len(re.findall(r"\\w+",txt))
284
+ scnt=len(re.split(r"[.!?]+",txt))
285
+ st.metric("단어 수",wcnt); st.metric("λ¬Έμž₯ 수",scnt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  elif menu == "μƒˆ 기사 μƒμ„±ν•˜κΈ°":
288
  st.header("μƒˆ 기사 μƒμ„±ν•˜κΈ°")
289
+ arts = load_saved_articles()
290
+ if not arts:
291
+ st.warning("λ¨Όμ € 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
 
292
  else:
293
+ sel = st.selectbox("원본 기사 선택", [a["title"] for a in arts])
294
+ art = next(a for a in arts if a["title"]==sel)
295
+ st.write(art["content"][:200]+"…")
296
+ prompt = st.text_area("기사 μž‘μ„± μ§€μΉ¨", "기사 ν˜•μ‹μ— 맞좰 μƒˆλ‘œ μž‘μ„±ν•΄ μ£Όμ„Έμš”.")
297
+ gen_img = st.checkbox("이미지도 생성", value=True)
298
+ if st.button("생성"):
299
+ new = generate_article(art["content"], prompt)
300
+ st.subheader("μƒμ„±λœ 기사")
301
+ st.write(new)
302
+ if gen_img:
303
+ url = generate_image(f"기사 제λͺ©: {art['title']}\n\n{prompt}")
304
+ if url: st.image(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
  elif menu == "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°":
307
  st.header("λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°")
308
+ tab1,tab2,tab3 = st.tabs(["일별 μ˜ˆμ•½","간격 μ˜ˆμ•½","μƒνƒœ"])
309
+ # 일별
 
 
 
310
  with tab1:
311
+ dkw = st.text_input("ν‚€μ›Œλ“œ(일별)", "인곡지λŠ₯", key="dk")
312
+ dnum = st.number_input("기사 수",1,20,5,key="dn")
313
+ dhh = st.number_input("μ‹œ",0,23,9,key="dh")
314
+ dmm = st.number_input("λΆ„",0,59,0,key="dm")
315
+ if st.button("μΆ”κ°€",key="addd"):
316
+ st.session_state.setdefault("daily",[]).append({
317
+ "keyword":dkw,"num_articles":dnum,
318
+ "hour":dhh,"minute":dmm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  })
320
+ if st.session_state.get("daily"):
321
+ st.write(st.session_state["daily"])
322
+ # 간격
 
 
 
 
 
 
 
 
 
 
323
  with tab2:
324
+ ikw = st.text_input("ν‚€μ›Œλ“œ(간격)", "빅데이터", key="ik")
325
+ inum = st.number_input("기사 수",1,20,5,key="in")
326
+ inter= st.number_input("간격(λΆ„)",1,1440,60,key="ii")
327
+ imm = st.checkbox("μ¦‰μ‹œ μ‹€ν–‰",True,key="im")
328
+ if st.button("μΆ”κ°€",key="addi"):
329
+ st.session_state.setdefault("interval",[]).append({
330
+ "keyword":ikw,"num_articles":inum,
331
+ "interval":inter,"immediate":imm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  })
333
+ if st.session_state.get("interval"):
334
+ st.write(st.session_state["interval"])
335
+ # μƒνƒœ
 
 
 
 
 
 
 
 
 
 
 
336
  with tab3:
337
+ if not global_scheduler.is_running and st.button("μ‹œμž‘"):
338
+ start_scheduler(st.session_state.get("daily",[]),
339
+ st.session_state.get("interval",[]))
340
+ if global_scheduler.is_running and st.button("쀑지"):
341
+ stop_scheduler()
342
+ st.write("싀행쀑:", global_scheduler.is_running)
343
+ st.write("λ§ˆμ§€λ§‰ μ‹€ν–‰:", global_scheduler.last_run)
344
+ st.write("λ‹€μŒ μ‹€ν–‰:", global_scheduler.next_run)
345
+ st.write("작 수:", global_scheduler.jobs)
346
+ st.dataframe(pd.DataFrame(global_scheduler.results))
347
+
348
+ # ─── ν‘Έν„° ────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  st.markdown("---")
350
+ st.markdown("Β© 2025 News Tool @conanssam")