Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +239 -258
src/streamlit_app.py
CHANGED
@@ -4,73 +4,33 @@ import requests
|
|
4 |
from bs4 import BeautifulSoup
|
5 |
import re
|
6 |
import time
|
|
|
|
|
|
|
|
|
7 |
import json
|
8 |
import os
|
9 |
from datetime import datetime, timedelta
|
|
|
|
|
10 |
import traceback
|
11 |
import plotly.graph_objects as go
|
12 |
import schedule
|
13 |
import threading
|
14 |
import matplotlib.pyplot as plt
|
15 |
-
from pathlib import Path
|
16 |
-
import openai
|
17 |
-
from dotenv import load_dotenv
|
18 |
-
|
19 |
-
# νκΉ
νμ΄μ€ Spaces νκ²½μ λ§κ² μμ λλ ν 리 μ€μ
|
20 |
-
# /tmp ν΄λλ μ‘΄μ¬ν μ μμ§λ§ κΆν λ¬Έμ κ° μμ μ μμΌλ―λ‘ νμ¬ μμ
λλ ν 리 κΈ°λ°μΌλ‘ λ³κ²½
|
21 |
-
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
|
22 |
-
DATA_DIR = os.path.join(CURRENT_DIR, "data")
|
23 |
-
SAVED_ARTICLES_PATH = os.path.join(DATA_DIR, "saved_articles.json")
|
24 |
-
SCHEDULED_NEWS_DIR = os.path.join(DATA_DIR, "scheduled_news")
|
25 |
-
|
26 |
-
# λλ ν 리 μμ± ν¨μ
|
27 |
-
def ensure_directory(directory):
|
28 |
-
try:
|
29 |
-
os.makedirs(directory, exist_ok=True)
|
30 |
-
return True
|
31 |
-
except Exception as e:
|
32 |
-
st.error(f"λλ ν 리 μμ± μ€ μ€λ₯ λ°μ: {str(e)}")
|
33 |
-
return False
|
34 |
|
35 |
-
#
|
36 |
-
|
37 |
-
|
|
|
38 |
|
39 |
-
#
|
40 |
-
try:
|
41 |
-
import kss
|
42 |
-
kss_available = True
|
43 |
-
except ImportError:
|
44 |
-
st.warning("KSS λΌμ΄λΈλ¬λ¦¬κ° μ€μΉλμ΄ μμ§ μμ΅λλ€. 'pip install kss'λ‘ μ€μΉνμΈμ.")
|
45 |
-
kss_available = False
|
46 |
-
|
47 |
-
# νκ΅μ΄ ν ν¬λμ΄μ§ ν¨μ (KSS μ¬μ©)
|
48 |
-
def tokenize_korean(text):
|
49 |
-
try:
|
50 |
-
if kss_available:
|
51 |
-
tokens = []
|
52 |
-
# λ¬Έμ₯ λΆλ¦¬ ν κ° λ¬Έμ₯μμ λ¨μ΄ μΆμΆ
|
53 |
-
for sentence in kss.split_sentences(text):
|
54 |
-
# κΈ°λ³Έ 곡백 κΈ°λ° ν ν°νμ μ κ·μ ν¨ν΄ μΆκ°νμ¬ λ μ κ΅νκ² μ²λ¦¬
|
55 |
-
raw_tokens = sentence.split()
|
56 |
-
for token in raw_tokens:
|
57 |
-
# μ‘°μ¬, νΉμλ¬Έμ λ±μ λΆλ¦¬
|
58 |
-
sub_tokens = re.findall(r'[κ°-ν£]+|[a-zA-Z]+|[0-9]+|[^\sκ°-ν£a-zA-Z0-9]+', token)
|
59 |
-
tokens.extend(sub_tokens)
|
60 |
-
return tokens
|
61 |
-
except Exception as e:
|
62 |
-
st.debug(f"KSS ν ν¬λμ΄μ§ μ€ν¨: {str(e)}")
|
63 |
-
|
64 |
-
# KSS μ¬μ© λΆκ°λ₯νκ±°λ μ€λ₯ λ°μμ κΈ°λ³Έ μ κ·μ κΈ°λ° ν ν¬λμ΄μ μ¬μ©
|
65 |
-
return re.findall(r'[κ°-ν£]+|[a-zA-Z]+|[0-9]+|[^\sκ°-ν£a-zA-Z0-9]+', text)
|
66 |
-
|
67 |
-
# μλν΄λΌμ°λ μΆκ° (μ νμ μ¬μ©)
|
68 |
try:
|
69 |
from wordcloud import WordCloud
|
70 |
-
wordcloud_available = True
|
71 |
except ImportError:
|
72 |
-
|
73 |
-
|
|
|
74 |
# μ€μΌμ€λ¬ μν ν΄λμ€ μΆκ°
|
75 |
class SchedulerState:
|
76 |
def __init__(self):
|
@@ -101,6 +61,31 @@ if st.session_state.openai_api_key is None:
|
|
101 |
load_dotenv() # λ‘컬 .env νμΌ
|
102 |
st.session_state.openai_api_key = os.getenv('OPENAI_API_KEY')
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
# νμ΄μ§ μ€μ
|
105 |
st.set_page_config(page_title="λ΄μ€ κΈ°μ¬ λꡬ", page_icon="π°", layout="wide")
|
106 |
|
@@ -111,17 +96,21 @@ menu = st.sidebar.radio(
|
|
111 |
["λ΄μ€ κΈ°μ¬ ν¬λ‘€λ§", "κΈ°μ¬ λΆμνκΈ°", "μ κΈ°μ¬ μμ±νκΈ°", "λ΄μ€ κΈ°μ¬ μμ½νκΈ°"]
|
112 |
)
|
113 |
|
114 |
-
#
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
122 |
# μ μ₯λ κΈ°μ¬λ₯Ό λΆλ¬μ€λ ν¨μ
|
123 |
def load_saved_articles():
|
124 |
try:
|
|
|
125 |
if os.path.exists(SAVED_ARTICLES_PATH):
|
126 |
with open(SAVED_ARTICLES_PATH, 'r', encoding='utf-8') as f:
|
127 |
return json.load(f)
|
@@ -133,12 +122,15 @@ def load_saved_articles():
|
|
133 |
# κΈ°μ¬λ₯Ό μ μ₯νλ ν¨μ
|
134 |
def save_articles(articles):
|
135 |
try:
|
|
|
136 |
with open(SAVED_ARTICLES_PATH, 'w', encoding='utf-8') as f:
|
137 |
json.dump(articles, f, ensure_ascii=False, indent=2)
|
138 |
-
|
|
|
139 |
except Exception as e:
|
140 |
st.error(f"κΈ°μ¬ μ μ₯ μ€ μ€λ₯ λ°μ: {str(e)}")
|
141 |
return False
|
|
|
142 |
|
143 |
@st.cache_data
|
144 |
def crawl_naver_news(keyword, num_articles=5):
|
@@ -225,89 +217,49 @@ def get_article_content(url):
|
|
225 |
except Exception as e:
|
226 |
return f"μ€λ₯ λ°μ: {str(e)}"
|
227 |
|
228 |
-
#
|
229 |
def analyze_keywords(text, top_n=10):
|
230 |
-
# νκ΅μ΄ λΆμ©μ΄ λͺ©λ‘ (
|
231 |
-
korean_stopwords = [
|
232 |
-
'μ΄', 'κ·Έ', 'μ ', 'κ²', 'λ°', 'λ±', 'λ₯Ό', 'μ', 'μ', 'μμ', 'μ', 'μΌλ‘', 'λ‘',
|
233 |
-
'μκ²', 'λΏ', 'λ€', 'λ', 'κ°', 'μ΄λ€', 'μκ²μ', 'κ»', 'κ»μ', 'λΆν°', 'κΉμ§',
|
234 |
-
'μ΄λ°', 'μ λ°', 'κ·Έλ°', 'μ΄λ€', '무μ¨', 'μ΄κ²', 'μ κ²', 'κ·Έκ²', 'μ΄λ²', 'μ λ²', 'κ·Έλ²',
|
235 |
-
'μ΄κ±°', 'μ κ±°', 'κ·Έκ±°', 'νλ€', 'λλ€', 'μλ€', 'μλ€', 'κ°λ€', '보λ€', 'μ΄λ λ€', 'κ·Έλ λ€',
|
236 |
-
'νλ', 'λλ', 'μλ', 'μλ', 'κ°μ', '보λ', 'μ΄λ°', 'κ·Έλ°', 'μ λ°', 'νλ€', 'λλ€',
|
237 |
-
'μμλ€', 'μμλ€', 'κ°μλ€', 'λ΄€λ€', 'λ', 'λν', 'κ·Έλ¦¬κ³ ', 'νμ§λ§', 'κ·Έλ¬λ', 'κ·Έλμ',
|
238 |
-
'λλ¬Έμ', 'λ°λΌμ', 'νλ©°', 'λλ©°', 'μμΌλ©°', 'μμΌλ©°', 'κ°μΌλ©°', '보며', 'νκ³ ', 'λκ³ ',
|
239 |
-
'μκ³ ', 'μκ³ ', 'κ°κ³ ', 'λ³΄κ³ ', 'ν΅ν΄', 'μν΄', 'λ', 'μ€', 'ν'
|
240 |
-
]
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
'a', 'an', 'the', 'and', 'or', 'but', 'if', 'because', 'as', 'what',
|
245 |
-
'when', 'where', 'how', 'who', 'which', 'this', 'that', 'these', 'those',
|
246 |
-
'then', 'just', 'so', 'than', 'such', 'both', 'through', 'about', 'for',
|
247 |
-
'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
|
248 |
-
'having', 'do', 'does', 'did', 'doing', 'would', 'should', 'could', 'might',
|
249 |
-
'will', 'shall', 'can', 'may', 'must', 'ought'
|
250 |
-
]
|
251 |
|
252 |
-
# μΈμ΄ κ°μ§ (κ°λ¨νκ² νκΈ ν¬ν¨ μ¬λΆλ‘ 체ν¬)
|
253 |
-
is_korean = bool(re.search(r'[κ°-ν£]', text))
|
254 |
-
|
255 |
-
if is_korean:
|
256 |
-
# νκ΅μ΄ ν
μ€νΈμΈ κ²½μ° KSS κΈ°λ° ν ν¬λμ΄μ μ¬μ©
|
257 |
-
tokens = tokenize_korean(text)
|
258 |
-
else:
|
259 |
-
# μμ΄ λλ κΈ°ν μΈμ΄λ κ°λ¨ν μ κ·μ ν ν°ν
|
260 |
-
tokens = re.findall(r'\b\w+\b', text.lower())
|
261 |
-
|
262 |
-
# λΆμ©μ΄ νν°λ§ (μΈμ΄μ λ°λΌ λ€λ₯Έ λΆμ©μ΄ μ μ©)
|
263 |
-
stopwords = korean_stopwords if is_korean else english_stopwords
|
264 |
-
tokens = [word for word in tokens if len(word) > 1 and word.lower() not in stopwords]
|
265 |
-
|
266 |
-
# λΉλ κ³μ°
|
267 |
-
from collections import Counter
|
268 |
word_count = Counter(tokens)
|
269 |
top_keywords = word_count.most_common(top_n)
|
270 |
|
271 |
return top_keywords
|
272 |
|
273 |
-
|
274 |
def extract_keywords_for_wordcloud(text, top_n=50):
|
275 |
if not text or len(text.strip()) < 10:
|
276 |
return {}
|
277 |
|
278 |
try:
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
'then', 'just', 'so', 'than', 'such', 'both', 'through', 'about', 'for',
|
291 |
-
'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
|
292 |
-
'having', 'do', 'does', 'did', 'doing', 'would', 'should', 'could', 'might',
|
293 |
-
'will', 'shall', 'can', 'may', 'must', 'ought'
|
294 |
-
}
|
295 |
|
296 |
-
|
297 |
-
korean_stopwords = {
|
298 |
'λ°', 'λ±', 'λ₯Ό', 'μ΄', 'μ', 'κ°', 'μ', 'λ', 'μΌλ‘', 'μμ', 'κ·Έ', 'λ', 'λλ', 'νλ', 'ν ', 'νκ³ ',
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
}
|
308 |
-
|
309 |
-
# μΈμ΄μ λ°λΌ λΆμ©μ΄ μ ν
|
310 |
-
stop_words = korean_stopwords if is_korean else english_stopwords
|
311 |
|
312 |
# 1κΈμ μ΄μμ΄κ³ λΆμ©μ΄κ° μλ ν ν°λ§ νν°λ§
|
313 |
filtered_tokens = [word for word in tokens if len(word) > 1 and word not in stop_words]
|
@@ -327,45 +279,51 @@ def extract_keywords_for_wordcloud(text, top_n=50):
|
|
327 |
return dict(sorted_words[:top_n])
|
328 |
|
329 |
except Exception as e:
|
330 |
-
st.error(f"
|
331 |
return {"data": 1, "analysis": 1, "news": 1}
|
|
|
332 |
|
333 |
# μλ ν΄λΌμ°λ μμ± ν¨μ
|
|
|
334 |
def generate_wordcloud(keywords_dict):
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
363 |
|
364 |
-
|
365 |
|
366 |
-
|
367 |
-
|
368 |
-
|
|
|
|
|
369 |
|
370 |
# λ΄μ€ λΆμ ν¨μ
|
371 |
def analyze_news_content(news_df):
|
@@ -373,37 +331,32 @@ def analyze_news_content(news_df):
|
|
373 |
return "λ°μ΄ν°κ° μμ΅λλ€"
|
374 |
|
375 |
results = {}
|
376 |
-
|
377 |
-
# μΉ΄ν
κ³ λ¦¬λ³ λΆμ
|
378 |
if 'source' in news_df.columns:
|
379 |
-
|
|
|
380 |
if 'date' in news_df.columns:
|
381 |
-
|
382 |
|
383 |
-
|
384 |
all_text = " ".join(news_df['title'].fillna('') + " " + news_df['content'].fillna(''))
|
385 |
|
386 |
if len(all_text.strip()) > 0:
|
387 |
-
results['top_keywords_for_wordcloud']
|
388 |
results['top_keywords'] = analyze_keywords(all_text)
|
389 |
else:
|
390 |
-
results['top_keywords_for_wordcloud']
|
391 |
results['top_keywords'] = []
|
392 |
-
|
393 |
return results
|
394 |
|
395 |
# OpenAI APIλ₯Ό μ΄μ©ν μ κΈ°μ¬ μμ±
|
396 |
def generate_article(original_content, prompt_text):
|
397 |
if not st.session_state.openai_api_key:
|
398 |
return "μ€λ₯: OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€. μ¬μ΄λλ°μμ ν€λ₯Ό μ
λ ₯νκ±°λ νκ²½ λ³μλ₯Ό μ€μ ν΄μ£ΌμΈμ."
|
399 |
-
|
400 |
try:
|
401 |
-
# API ν€ μ€μ
|
402 |
-
openai.api_key = st.session_state.openai_api_key
|
403 |
-
|
404 |
-
# API νΈμΆ
|
405 |
response = openai.chat.completions.create(
|
406 |
-
model="gpt-4.1-mini",
|
407 |
messages=[
|
408 |
{"role": "system", "content": "λΉμ μ μ λ¬Έμ μΈ λ΄μ€ κΈ°μμ
λλ€. μ£Όμ΄μ§ λ΄μ©μ λ°νμΌλ‘ μλ‘μ΄ κΈ°μ¬λ₯Ό μμ±ν΄μ£ΌμΈμ."},
|
409 |
{"role": "user", "content": f"λ€μ λ΄μ©μ λ°νμΌλ‘ {prompt_text}\n\n{original_content[:1000]}"}
|
@@ -418,17 +371,13 @@ def generate_article(original_content, prompt_text):
|
|
418 |
def generate_image(prompt):
|
419 |
if not st.session_state.openai_api_key:
|
420 |
return "μ€λ₯: OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€. μ¬μ΄λλ°μμ ν€λ₯Ό μ
λ ₯νκ±°λ νκ²½ λ³μλ₯Ό μ€μ ν΄μ£ΌμΈμ."
|
421 |
-
|
422 |
try:
|
423 |
-
# API ν€ μ€μ
|
424 |
-
openai.api_key = st.session_state.openai_api_key
|
425 |
-
|
426 |
-
# API νΈμΆ
|
427 |
response = openai.images.generate(
|
428 |
model="gpt-image-1",
|
429 |
prompt=prompt
|
430 |
)
|
431 |
-
image_base64
|
432 |
return f"data:image/png;base64,{image_base64}"
|
433 |
except Exception as e:
|
434 |
return f"μ΄λ―Έμ§ μμ± μ€λ₯: {str(e)}"
|
@@ -460,12 +409,18 @@ def perform_news_task(task_type, keyword, num_articles, file_prefix):
|
|
460 |
time.sleep(0.5) # μλ² λΆν λ°©μ§
|
461 |
|
462 |
# κ²°κ³Ό μ μ₯
|
|
|
|
|
|
|
|
|
463 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
464 |
filename = os.path.join(SCHEDULED_NEWS_DIR, f"{file_prefix}_{task_type}_{timestamp}.json")
|
465 |
|
466 |
try:
|
467 |
with open(filename, 'w', encoding='utf-8') as f:
|
468 |
json.dump(articles, f, ensure_ascii=False, indent=2)
|
|
|
|
|
469 |
except Exception as e:
|
470 |
print(f"νμΌ μ μ₯ μ€ μ€λ₯ λ°μ: {e}")
|
471 |
return
|
@@ -587,9 +542,8 @@ if menu == "λ΄μ€ κΈ°μ¬ ν¬λ‘€λ§":
|
|
587 |
articles = crawl_naver_news(keyword, num_articles)
|
588 |
|
589 |
# κΈ°μ¬ λ΄μ© κ°μ Έμ€κΈ°
|
590 |
-
progress_bar = st.progress(0)
|
591 |
for i, article in enumerate(articles):
|
592 |
-
|
593 |
article['content'] = get_article_content(article['link'])
|
594 |
time.sleep(0.5) # μλ² λΆν λ°©μ§
|
595 |
|
@@ -605,7 +559,7 @@ if menu == "λ΄μ€ κΈ°μ¬ ν¬λ‘€λ§":
|
|
605 |
st.write(f"**μμ½:** {article['description']}")
|
606 |
st.write(f"**λ§ν¬:** {article['link']}")
|
607 |
st.write("**본문 미리보기:**")
|
608 |
-
st.write(article['content'][:300] + "..."
|
609 |
|
610 |
elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
611 |
st.header("κΈ°μ¬ λΆμνκΈ°")
|
@@ -640,6 +594,7 @@ elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
|
640 |
keyword_tab1, keyword_tab2 = st.tabs(["ν€μλ λΉλ", "μλν΄λΌμ°λ"])
|
641 |
|
642 |
with keyword_tab1:
|
|
|
643 |
keywords = analyze_keywords(selected_article['content'])
|
644 |
|
645 |
# μκ°ν
|
@@ -649,38 +604,23 @@ elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
|
649 |
st.write("**μ£Όμ ν€μλ:**")
|
650 |
for word, count in keywords:
|
651 |
st.write(f"- {word}: {count}ν")
|
652 |
-
|
653 |
with keyword_tab2:
|
654 |
keyword_dict = extract_keywords_for_wordcloud(selected_article['content'])
|
|
|
655 |
|
656 |
-
if
|
657 |
-
|
|
|
|
|
|
|
658 |
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
ax.axis('off')
|
663 |
-
st.pyplot(fig)
|
664 |
-
|
665 |
-
# ν€μλ μμ 20κ° νμ
|
666 |
-
st.write("**μμ 20κ° ν€μλ:**")
|
667 |
-
top_keywords = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True)[:20]
|
668 |
-
keyword_df = pd.DataFrame(top_keywords, columns=['ν€μλ', 'λΉλ'])
|
669 |
-
st.dataframe(keyword_df)
|
670 |
-
else:
|
671 |
-
st.error("μλν΄λΌμ°λλ₯Ό μμ±ν μ μμ΅λλ€.")
|
672 |
-
else:
|
673 |
-
# μλν΄λΌμ°λλ₯Ό μ¬μ©ν μ μλ κ²½μ° λ체 νμ
|
674 |
-
st.warning("μλν΄λΌμ°λ κΈ°λ₯μ μ¬μ©ν μ μμ΅λλ€. νμν ν¨ν€μ§κ° μ€μΉλμ§ μμμ΅λλ€.")
|
675 |
-
|
676 |
-
# λμ ν€μλλ§ νμ
|
677 |
-
st.write("**μμ ν€μλ:**")
|
678 |
-
top_keywords = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True)[:30]
|
679 |
keyword_df = pd.DataFrame(top_keywords, columns=['ν€μλ', 'λΉλ'])
|
680 |
st.dataframe(keyword_df)
|
681 |
-
|
682 |
-
|
683 |
-
st.bar_chart(keyword_df.set_index('ν€μλ').head(15))
|
684 |
|
685 |
elif analysis_type == "ν
μ€νΈ ν΅κ³":
|
686 |
if st.button("ν
μ€νΈ ν΅κ³ λΆμ"):
|
@@ -689,18 +629,7 @@ elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
|
689 |
# ν
μ€νΈ ν΅κ³ κ³μ°
|
690 |
word_count = len(re.findall(r'\b\w+\b', content))
|
691 |
char_count = len(content)
|
692 |
-
|
693 |
-
# KSSλ₯Ό μ¬μ©νμ¬ λ¬Έμ₯ λΆλ¦¬
|
694 |
-
if kss_available:
|
695 |
-
try:
|
696 |
-
sentences = kss.split_sentences(content)
|
697 |
-
sentence_count = len(sentences)
|
698 |
-
except Exception:
|
699 |
-
# KSS μ€ν¨ μ κ°λ¨ν λ¬Έμ₯ λΆλ¦¬
|
700 |
-
sentence_count = len(re.split(r'[.!?]+', content))
|
701 |
-
else:
|
702 |
-
sentence_count = len(re.split(r'[.!?]+', content))
|
703 |
-
|
704 |
avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
|
705 |
avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
|
706 |
|
@@ -726,38 +655,79 @@ elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
|
726 |
st.write(f"ν
μ€νΈ 볡μ‘μ± μ μ: {complexity_score:.1f}/10")
|
727 |
|
728 |
# μΆν λΉλ λ§λ κ·Έλν
|
729 |
-
st.subheader("νμ¬λ³ λΆν¬")
|
730 |
-
|
731 |
-
# μΈμ΄ κ°μ§ (κ°λ¨νκ² νκΈ ν¬ν¨ μ¬λΆλ‘ 체ν¬)
|
732 |
-
is_korean = bool(re.search(r'[κ°-ν£]', content))
|
733 |
-
|
734 |
try:
|
735 |
-
#
|
736 |
-
|
|
|
|
|
|
|
|
|
|
|
737 |
|
738 |
-
|
739 |
-
|
740 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
741 |
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
pos_counts[
|
747 |
-
elif
|
748 |
-
pos_counts['
|
749 |
-
elif
|
750 |
-
pos_counts['
|
751 |
else:
|
752 |
pos_counts['κΈ°ν'] += 1
|
|
|
753 |
else:
|
754 |
-
# μμ΄
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
|
|
|
|
|
|
|
|
760 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
761 |
|
762 |
# κ²°κ³Ό μκ°ν
|
763 |
pos_df = pd.DataFrame({
|
@@ -780,10 +750,14 @@ elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
|
780 |
if st.session_state.openai_api_key:
|
781 |
with st.spinner("κΈ°μ¬μ κ°μ μ λΆμ μ€μ
λλ€..."):
|
782 |
try:
|
783 |
-
# API ν€ μ€μ
|
784 |
-
openai.api_key
|
785 |
-
|
786 |
-
|
|
|
|
|
|
|
|
|
787 |
response = openai.chat.completions.create(
|
788 |
model="gpt-4.1-mini",
|
789 |
messages=[
|
@@ -855,7 +829,7 @@ elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
|
855 |
fill_color = 'rgba(158, 158, 158, 0.3)' # μ°ν νμ
|
856 |
line_color = 'rgba(158, 158, 158, 1)' # μ§ν νμ
|
857 |
|
858 |
-
# λ μ΄λ μ°¨νΈ λ°μ΄ν° μ€λΉ
|
859 |
radar_keywords = keyword_names.copy()
|
860 |
radar_scores = keyword_scores.copy()
|
861 |
|
@@ -967,8 +941,7 @@ elif menu == "μ κΈ°μ¬ μμ±νκΈ°":
|
|
967 |
with st.expander("μλ³Έ κΈ°μ¬ λ΄μ©"):
|
968 |
st.write(selected_article['content'])
|
969 |
|
970 |
-
prompt_text =
|
971 |
-
"""λ€μ κΈ°μ¬ μμμ λ°λΌμ λ€μ μμ±ν΄μ€.
|
972 |
μν : λΉμ μ μ λ¬Έμ¬μ κΈ°μμ
λλ€.
|
973 |
μμ
: μ΅κ·Ό μΌμ΄λ μ¬κ±΄μ λν 보λμλ£λ₯Ό μμ±ν΄μΌ ν©λλ€. μλ£λ μ¬μ€μ κΈ°λ°μΌλ‘ νλ©°, κ°κ΄μ μ΄κ³ μ νν΄μΌ ν©λλ€.
|
974 |
μ§μΉ¨:
|
@@ -976,13 +949,14 @@ elif menu == "μ κΈ°μ¬ μμ±νκΈ°":
|
|
976 |
κΈ°μ¬ μ λͺ©μ μ£Όμ λ₯Ό λͺ
νν λ°μνκ³ λ
μμ κ΄μ¬μ λ μ μλλ‘ μμ±ν©λλ€.
|
977 |
κΈ°μ¬ λ΄μ©μ μ ννκ³ κ°κ²°νλ©° μ€λλ ₯ μλ λ¬Έμ₯μΌλ‘ ꡬμ±ν©λλ€.
|
978 |
κ΄λ ¨μμ μΈν°λ·°λ₯Ό μΈμ© ννλ‘ λ£μ΄μ£ΌμΈμ.
|
979 |
-
μμ μ 보μ μ§μΉ¨μ μ°Έκ³ νμ¬ μ λ¬Έ 보λμλ£ νμμ κΈ°μ¬λ₯Ό μμ±ν΄ μ£ΌμΈμ"""
|
980 |
|
981 |
# μ΄λ―Έμ§ μμ± μ¬λΆ μ ν μ΅μ
μΆκ°
|
982 |
generate_image_too = st.checkbox("κΈ°μ¬ μμ± ν μ΄λ―Έμ§λ ν¨κ» μμ±νκΈ°", value=True)
|
983 |
|
984 |
if st.button("μ κΈ°μ¬ μμ±νκΈ°"):
|
985 |
if st.session_state.openai_api_key:
|
|
|
986 |
with st.spinner("κΈ°μ¬λ₯Ό μμ± μ€μ
λλ€..."):
|
987 |
new_article = generate_article(selected_article['content'], prompt_text)
|
988 |
|
@@ -1001,6 +975,13 @@ elif menu == "μ κΈ°μ¬ μμ±νκΈ°":
|
|
1001 |
"""
|
1002 |
|
1003 |
# μ΄λ―Έμ§ μμ±
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1004 |
image_url = generate_image(image_prompt)
|
1005 |
|
1006 |
if image_url and not image_url.startswith("μ΄λ―Έμ§ μμ± μ€λ₯") and not image_url.startswith("μ€λ₯: OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€."):
|
@@ -1176,7 +1157,7 @@ elif menu == "λ΄μ€ κΈ°μ¬ μμ½νκΈ°":
|
|
1176 |
files = [f for f in os.listdir(SCHEDULED_NEWS_DIR) if f.endswith('.json')]
|
1177 |
if files:
|
1178 |
st.subheader("μμ§λ νμΌ μ΄κΈ°")
|
1179 |
-
selected_file = st.selectbox("νμΌ μ ν", files, index=len(files)-1 if files else 0)
|
1180 |
if selected_file and st.button("νμΌ λ΄μ© 보기"):
|
1181 |
with open(os.path.join(SCHEDULED_NEWS_DIR, selected_file), 'r', encoding='utf-8') as f:
|
1182 |
articles = json.load(f)
|
@@ -1194,4 +1175,4 @@ elif menu == "λ΄μ€ κΈ°μ¬ μμ½νκΈ°":
|
|
1194 |
|
1195 |
# νΈν°
|
1196 |
st.markdown("---")
|
1197 |
-
st.markdown("Β© λ΄μ€ κΈ°μ¬ λꡬ @conanssam")
|
|
|
4 |
from bs4 import BeautifulSoup
|
5 |
import re
|
6 |
import time
|
7 |
+
import nltk
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
from nltk.corpus import stopwords
|
10 |
+
from collections import Counter
|
11 |
import json
|
12 |
import os
|
13 |
from datetime import datetime, timedelta
|
14 |
+
import openai
|
15 |
+
from dotenv import load_dotenv
|
16 |
import traceback
|
17 |
import plotly.graph_objects as go
|
18 |
import schedule
|
19 |
import threading
|
20 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
# /tmp κ²½λ‘ μ€μ
|
23 |
+
TMP_DIR = "/tmp"
|
24 |
+
SAVED_ARTICLES_PATH = os.path.join(TMP_DIR, "saved_articles.json")
|
25 |
+
SCHEDULED_NEWS_DIR = os.path.join(TMP_DIR, "scheduled_news")
|
26 |
|
27 |
+
# μλν΄λΌμ°λ μΆκ°
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
try:
|
29 |
from wordcloud import WordCloud
|
|
|
30 |
except ImportError:
|
31 |
+
st.error("wordcloud ν¨ν€μ§λ₯Ό μ€μΉν΄μ£ΌμΈμ: pip install wordcloud")
|
32 |
+
WordCloud = None
|
33 |
+
|
34 |
# μ€μΌμ€λ¬ μν ν΄λμ€ μΆκ°
|
35 |
class SchedulerState:
|
36 |
def __init__(self):
|
|
|
61 |
load_dotenv() # λ‘컬 .env νμΌ
|
62 |
st.session_state.openai_api_key = os.getenv('OPENAI_API_KEY')
|
63 |
|
64 |
+
# νμν NLTK λ°μ΄ν° λ€μ΄λ‘λ
|
65 |
+
try:
|
66 |
+
nltk.data.find('tokenizers/punkt')
|
67 |
+
except LookupError:
|
68 |
+
nltk.download('punkt')
|
69 |
+
|
70 |
+
try:
|
71 |
+
nltk.data.find('tokenizers/punkt_tab')
|
72 |
+
except LookupError:
|
73 |
+
nltk.download('punkt_tab')
|
74 |
+
|
75 |
+
try:
|
76 |
+
nltk.data.find('corpora/stopwords')
|
77 |
+
except LookupError:
|
78 |
+
nltk.download('stopwords')
|
79 |
+
|
80 |
+
# OpenAI API ν€ μ€μ
|
81 |
+
# openai.api_key μ€μ μ κ° API νΈμΆ μ§μ μ st.session_state.openai_api_key μ¬μ©νλλ‘ λ³κ²½νκ±°λ,
|
82 |
+
# μ± μμ μμ μ ν λ² μ€μ ν©λλ€. μ¬κΈ°μλ νμλ₯Ό μ νν©λλ€.
|
83 |
+
if st.session_state.openai_api_key:
|
84 |
+
openai.api_key = st.session_state.openai_api_key
|
85 |
+
else:
|
86 |
+
# UI μ΄κΈ°μλ ν€κ° μμ μ μμΌλ―λ‘, λμ€μ ν€ μ
λ ₯ μ openai.api_keyκ° μ€μ λλλ‘ μ λ
|
87 |
+
pass
|
88 |
+
|
89 |
# νμ΄μ§ μ€μ
|
90 |
st.set_page_config(page_title="λ΄μ€ κΈ°μ¬ λꡬ", page_icon="π°", layout="wide")
|
91 |
|
|
|
96 |
["λ΄μ€ κΈ°μ¬ ν¬λ‘€λ§", "κΈ°μ¬ λΆμνκΈ°", "μ κΈ°μ¬ μμ±νκΈ°", "λ΄μ€ κΈ°μ¬ μμ½νκΈ°"]
|
97 |
)
|
98 |
|
99 |
+
# λλ ν 리 μμ± ν¨μ
|
100 |
+
def ensure_directory(directory):
|
101 |
+
try:
|
102 |
+
os.makedirs(directory, mode=0o777, exist_ok=True)
|
103 |
+
# λλ ν 리 κΆν μ€μ
|
104 |
+
os.chmod(directory, 0o777)
|
105 |
+
except Exception as e:
|
106 |
+
st.error(f"λλ ν 리 μμ± μ€ μ€λ₯ λ°μ: {str(e)}")
|
107 |
+
return False
|
108 |
+
return True
|
109 |
+
|
110 |
# μ μ₯λ κΈ°μ¬λ₯Ό λΆλ¬μ€λ ν¨μ
|
111 |
def load_saved_articles():
|
112 |
try:
|
113 |
+
ensure_directory(TMP_DIR)
|
114 |
if os.path.exists(SAVED_ARTICLES_PATH):
|
115 |
with open(SAVED_ARTICLES_PATH, 'r', encoding='utf-8') as f:
|
116 |
return json.load(f)
|
|
|
122 |
# κΈ°μ¬λ₯Ό μ μ₯νλ ν¨μ
|
123 |
def save_articles(articles):
|
124 |
try:
|
125 |
+
ensure_directory(TMP_DIR)
|
126 |
with open(SAVED_ARTICLES_PATH, 'w', encoding='utf-8') as f:
|
127 |
json.dump(articles, f, ensure_ascii=False, indent=2)
|
128 |
+
# νμΌ κΆν μ€μ
|
129 |
+
os.chmod(SAVED_ARTICLES_PATH, 0o666)
|
130 |
except Exception as e:
|
131 |
st.error(f"κΈ°μ¬ μ μ₯ μ€ μ€λ₯ λ°μ: {str(e)}")
|
132 |
return False
|
133 |
+
return True
|
134 |
|
135 |
@st.cache_data
|
136 |
def crawl_naver_news(keyword, num_articles=5):
|
|
|
217 |
except Exception as e:
|
218 |
return f"μ€λ₯ λ°μ: {str(e)}"
|
219 |
|
220 |
+
# NLTKλ₯Ό μ΄μ©ν ν€μλ λΆμ
|
221 |
def analyze_keywords(text, top_n=10):
|
222 |
+
# νκ΅μ΄ λΆμ©μ΄ λͺ©λ‘ (μ§μ μ μν΄μΌ ν©λλ€)
|
223 |
+
korean_stopwords = ['μ΄', 'κ·Έ', 'μ ', 'κ²', 'λ°', 'λ±', 'λ₯Ό', 'μ', 'μ', 'μμ', 'μ', 'μΌλ‘', 'λ‘']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
+
tokens = word_tokenize(text)
|
226 |
+
tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
word_count = Counter(tokens)
|
229 |
top_keywords = word_count.most_common(top_n)
|
230 |
|
231 |
return top_keywords
|
232 |
|
233 |
+
#μλ ν΄λΌμ°λμ© λΆμ
|
234 |
def extract_keywords_for_wordcloud(text, top_n=50):
|
235 |
if not text or len(text.strip()) < 10:
|
236 |
return {}
|
237 |
|
238 |
try:
|
239 |
+
try:
|
240 |
+
tokens = word_tokenize(text.lower())
|
241 |
+
except Exception as e:
|
242 |
+
st.warning(f"{str(e)} μ€λ₯λ°μ")
|
243 |
+
tokens = text.lower().split()
|
244 |
|
245 |
+
stop_words = set()
|
246 |
+
try:
|
247 |
+
stop_words = set(stopwords.words('english'))
|
248 |
+
except Exception:
|
249 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
+
korea_stop_words = {
|
|
|
252 |
'λ°', 'λ±', 'λ₯Ό', 'μ΄', 'μ', 'κ°', 'μ', 'λ', 'μΌλ‘', 'μμ', 'κ·Έ', 'λ', 'λλ', 'νλ', 'ν ', 'νκ³ ',
|
253 |
+
'μλ€', 'μ΄λ€', 'μν΄', 'κ²μ΄λ€', 'κ²μ', 'λν', 'λλ¬Έ', 'κ·Έλ¦¬κ³ ', 'νμ§λ§', 'κ·Έλ¬λ', 'κ·Έλμ',
|
254 |
+
'μ
λλ€', 'ν©λλ€', 'μ΅λλ€', 'μ', 'μ£ ', 'κ³ ', 'κ³Ό', 'μ', 'λ', 'μ', 'μ', 'κ²', 'λ€', 'μ ', 'μ ',
|
255 |
+
'λ
', 'μ', 'μΌ', 'μ', 'λΆ', 'μ΄', 'μ§λ', 'μ¬ν΄', 'λ΄λ
', 'μ΅κ·Ό', 'νμ¬', 'μ€λ', 'λ΄μΌ', 'μ΄μ ',
|
256 |
+
'μ€μ ', 'μ€ν', 'λΆν°', 'κΉμ§', 'μκ²', 'κ»μ', 'μ΄λΌκ³ ', 'λΌκ³ ', 'νλ©°', 'νλ©΄μ', 'λ°λΌ', 'ν΅ν΄',
|
257 |
+
'κ΄λ ¨', 'ννΈ', 'νΉν', 'κ°μ₯', 'λ§€μ°', 'λ', 'λ', 'λ§μ΄', 'μ‘°κΈ', 'νμ', 'μμ£Ό', 'κ°λ', 'κ±°μ',
|
258 |
+
'μ ν', 'λ°λ‘', 'μ λ§', 'λ§μ½', 'λΉλ‘―ν', 'λ±μ', 'λ±μ΄', 'λ±μ', 'λ±κ³Ό', 'λ±λ', 'λ±μ', 'λ±μμ',
|
259 |
+
'κΈ°μ', 'λ΄μ€', 'μ¬μ§', 'μ°ν©λ΄μ€', 'λ΄μμ€', 'μ 곡', '무λ¨', 'μ μ¬', 'μ¬λ°°ν¬', 'κΈμ§', 'μ΅μ»€', 'λ©νΈ',
|
260 |
+
'μΌλ³΄', 'λ°μΌλ¦¬', 'κ²½μ ', 'μ¬ν', 'μ μΉ', 'μΈκ³', 'κ³Όν', 'μμ΄ν°', 'λ·μ»΄', 'μ¨λ·', 'λΈλ‘ν°', 'μ μμ λ¬Έ'
|
261 |
}
|
262 |
+
stop_words.update(korea_stop_words)
|
|
|
|
|
263 |
|
264 |
# 1κΈμ μ΄μμ΄κ³ λΆμ©μ΄κ° μλ ν ν°λ§ νν°λ§
|
265 |
filtered_tokens = [word for word in tokens if len(word) > 1 and word not in stop_words]
|
|
|
279 |
return dict(sorted_words[:top_n])
|
280 |
|
281 |
except Exception as e:
|
282 |
+
st.error(f"μ€λ₯λ°μ {str(e)}")
|
283 |
return {"data": 1, "analysis": 1, "news": 1}
|
284 |
+
|
285 |
|
286 |
# μλ ν΄λΌμ°λ μμ± ν¨μ
|
287 |
+
|
288 |
def generate_wordcloud(keywords_dict):
|
289 |
+
if not WordCloud:
|
290 |
+
st.warning("μλν΄λΌμ°λ μ€μΉμλμ΄ μμ΅λλ€.")
|
291 |
+
return None
|
292 |
+
try:
|
293 |
+
# νλ‘μ νΈ λ£¨νΈμ NanumGothic.ttfκ° μλ€κ³ κ°μ
|
294 |
+
font_path = "NanumGothic.ttf"
|
295 |
+
|
296 |
+
# λ‘컬μ ν°νΈ νμΌμ΄ μλμ§ νμΈ, μμΌλ©΄ κΈ°λ³ΈμΌλ‘ μλ
|
297 |
+
if not os.path.exists(font_path):
|
298 |
+
st.warning(f"ν°νΈ νμΌ({font_path})μ μ°Ύμ μ μμ΅λλ€. κΈ°λ³Έ ν°νΈλ‘ μλν΄λΌμ°λλ₯Ό μμ±ν©λλ€. νκΈμ΄ κΉ¨μ§ μ μμ΅λλ€.")
|
299 |
+
# font_path = None # λλ μμ€ν
κΈ°λ³Έ ν°νΈ κ²½λ‘λ₯Ό μ§μ (νλ«νΌοΏ½οΏ½οΏ½λ€ λ€λ¦)
|
300 |
+
# WordCloud μμ±μμμ font_pathλ₯Ό NoneμΌλ‘ λλ©΄ μμ€ν
κΈ°λ³Έκ°μ μλνκ±°λ, μμ λΉΌκ³ νΈμΆ
|
301 |
+
wc = WordCloud(
|
302 |
+
width=800,
|
303 |
+
height=400,
|
304 |
+
background_color='white',
|
305 |
+
colormap='viridis',
|
306 |
+
max_font_size=150,
|
307 |
+
random_state=42
|
308 |
+
).generate_from_frequencies(keywords_dict)
|
309 |
+
else:
|
310 |
+
wc= WordCloud(
|
311 |
+
font_path=font_path,
|
312 |
+
width=800,
|
313 |
+
height=400,
|
314 |
+
background_color = 'white',
|
315 |
+
colormap = 'viridis',
|
316 |
+
max_font_size=150,
|
317 |
+
random_state=42
|
318 |
+
).generate_from_frequencies(keywords_dict)
|
319 |
|
320 |
+
return wc
|
321 |
|
322 |
+
except Exception as e:
|
323 |
+
st.error(f"μλν΄λΌμ°λ μμ± μ€ μ€λ₯ λ°μ: {str(e)}")
|
324 |
+
# traceback.print_exc() # λλ²κΉ
μ μ¬μ©
|
325 |
+
st.warning("μλν΄λΌμ°λ μμ±μ μ€ν¨νμ΅λλ€. ν°νΈ λ¬Έμ μΌ μ μμ΅λλ€. NanumGothic.ttf νμΌμ΄ νλ‘μ νΈ λ£¨νΈμ μλμ§ νμΈν΄μ£ΌμΈμ.")
|
326 |
+
return None
|
327 |
|
328 |
# λ΄μ€ λΆμ ν¨μ
|
329 |
def analyze_news_content(news_df):
|
|
|
331 |
return "λ°μ΄ν°κ° μμ΅λλ€"
|
332 |
|
333 |
results = {}
|
334 |
+
#μΉ΄ν
κ³ λ¦¬λ³
|
|
|
335 |
if 'source' in news_df.columns:
|
336 |
+
results['source_counts'] = news_df['source'].value_counts().to_dict()
|
337 |
+
#μΉ΄ν
κ³ λ¦¬λ³
|
338 |
if 'date' in news_df.columns:
|
339 |
+
results['date_counts'] = news_df['date'].value_counts().to_dict()
|
340 |
|
341 |
+
#ν€μλλΆμ
|
342 |
all_text = " ".join(news_df['title'].fillna('') + " " + news_df['content'].fillna(''))
|
343 |
|
344 |
if len(all_text.strip()) > 0:
|
345 |
+
results['top_keywords_for_wordcloud']= extract_keywords_for_wordcloud(all_text, top_n=50)
|
346 |
results['top_keywords'] = analyze_keywords(all_text)
|
347 |
else:
|
348 |
+
results['top_keywords_for_wordcloud']={}
|
349 |
results['top_keywords'] = []
|
|
|
350 |
return results
|
351 |
|
352 |
# OpenAI APIλ₯Ό μ΄μ©ν μ κΈ°μ¬ μμ±
|
353 |
def generate_article(original_content, prompt_text):
|
354 |
if not st.session_state.openai_api_key:
|
355 |
return "μ€λ₯: OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€. μ¬μ΄λλ°μμ ν€λ₯Ό μ
λ ₯νκ±°λ νκ²½ λ³μλ₯Ό μ€μ ν΄μ£ΌμΈμ."
|
356 |
+
openai.api_key = st.session_state.openai_api_key
|
357 |
try:
|
|
|
|
|
|
|
|
|
358 |
response = openai.chat.completions.create(
|
359 |
+
model="gpt-4.1-mini",
|
360 |
messages=[
|
361 |
{"role": "system", "content": "λΉμ μ μ λ¬Έμ μΈ λ΄μ€ κΈ°μμ
λλ€. μ£Όμ΄μ§ λ΄μ©μ λ°νμΌλ‘ μλ‘μ΄ κΈ°μ¬λ₯Ό μμ±ν΄μ£ΌμΈμ."},
|
362 |
{"role": "user", "content": f"λ€μ λ΄μ©μ λ°νμΌλ‘ {prompt_text}\n\n{original_content[:1000]}"}
|
|
|
371 |
def generate_image(prompt):
|
372 |
if not st.session_state.openai_api_key:
|
373 |
return "μ€λ₯: OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€. μ¬μ΄λλ°μμ ν€λ₯Ό μ
λ ₯νκ±°λ νκ²½ λ³μλ₯Ό μ€μ ν΄μ£ΌμΈμ."
|
374 |
+
openai.api_key = st.session_state.openai_api_key
|
375 |
try:
|
|
|
|
|
|
|
|
|
376 |
response = openai.images.generate(
|
377 |
model="gpt-image-1",
|
378 |
prompt=prompt
|
379 |
)
|
380 |
+
image_base64=response.data[0].b64_json
|
381 |
return f"data:image/png;base64,{image_base64}"
|
382 |
except Exception as e:
|
383 |
return f"μ΄λ―Έμ§ μμ± μ€λ₯: {str(e)}"
|
|
|
409 |
time.sleep(0.5) # μλ² λΆν λ°©μ§
|
410 |
|
411 |
# κ²°κ³Ό μ μ₯
|
412 |
+
if not ensure_directory(SCHEDULED_NEWS_DIR):
|
413 |
+
print(f"μ€μΌμ€λ λ΄μ€ λλ ν 리 μμ± μ€ν¨")
|
414 |
+
return
|
415 |
+
|
416 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
417 |
filename = os.path.join(SCHEDULED_NEWS_DIR, f"{file_prefix}_{task_type}_{timestamp}.json")
|
418 |
|
419 |
try:
|
420 |
with open(filename, 'w', encoding='utf-8') as f:
|
421 |
json.dump(articles, f, ensure_ascii=False, indent=2)
|
422 |
+
# νμΌ κΆν μ€μ
|
423 |
+
os.chmod(filename, 0o666)
|
424 |
except Exception as e:
|
425 |
print(f"νμΌ μ μ₯ μ€ μ€λ₯ λ°μ: {e}")
|
426 |
return
|
|
|
542 |
articles = crawl_naver_news(keyword, num_articles)
|
543 |
|
544 |
# κΈ°μ¬ λ΄μ© κ°μ Έμ€κΈ°
|
|
|
545 |
for i, article in enumerate(articles):
|
546 |
+
st.progress((i + 1) / len(articles))
|
547 |
article['content'] = get_article_content(article['link'])
|
548 |
time.sleep(0.5) # μλ² λΆν λ°©μ§
|
549 |
|
|
|
559 |
st.write(f"**μμ½:** {article['description']}")
|
560 |
st.write(f"**λ§ν¬:** {article['link']}")
|
561 |
st.write("**본문 미리보기:**")
|
562 |
+
st.write(article['content'][:300] + "...")
|
563 |
|
564 |
elif menu == "κΈ°μ¬ λΆμνκΈ°":
|
565 |
st.header("κΈ°μ¬ λΆμνκΈ°")
|
|
|
594 |
keyword_tab1, keyword_tab2 = st.tabs(["ν€μλ λΉλ", "μλν΄λΌμ°λ"])
|
595 |
|
596 |
with keyword_tab1:
|
597 |
+
|
598 |
keywords = analyze_keywords(selected_article['content'])
|
599 |
|
600 |
# μκ°ν
|
|
|
604 |
st.write("**μ£Όμ ν€μλ:**")
|
605 |
for word, count in keywords:
|
606 |
st.write(f"- {word}: {count}ν")
|
|
|
607 |
with keyword_tab2:
|
608 |
keyword_dict = extract_keywords_for_wordcloud(selected_article['content'])
|
609 |
+
wc = generate_wordcloud(keyword_dict)
|
610 |
|
611 |
+
if wc:
|
612 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
613 |
+
ax.imshow(wc, interpolation='bilinear')
|
614 |
+
ax.axis('off')
|
615 |
+
st.pyplot(fig)
|
616 |
|
617 |
+
# ν€μλ μμ 20κ° νμ
|
618 |
+
st.write("**μμ 20κ° ν€μλ:**")
|
619 |
+
top_keywords = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True)[:20]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
keyword_df = pd.DataFrame(top_keywords, columns=['ν€μλ', 'λΉλ'])
|
621 |
st.dataframe(keyword_df)
|
622 |
+
else:
|
623 |
+
st.error("μλν΄λΌμ°λλ₯Ό μμ±ν μ μμ΅λλ€.")
|
|
|
624 |
|
625 |
elif analysis_type == "ν
μ€νΈ ν΅κ³":
|
626 |
if st.button("ν
μ€νΈ ν΅κ³ λΆμ"):
|
|
|
629 |
# ν
μ€νΈ ν΅κ³ κ³μ°
|
630 |
word_count = len(re.findall(r'\b\w+\b', content))
|
631 |
char_count = len(content)
|
632 |
+
sentence_count = len(re.split(r'[.!?]+', content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
633 |
avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
|
634 |
avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
|
635 |
|
|
|
655 |
st.write(f"ν
μ€νΈ 볡μ‘μ± μ μ: {complexity_score:.1f}/10")
|
656 |
|
657 |
# μΆν λΉλ λ§λ κ·Έλν
|
658 |
+
st.subheader("νμ¬λ³ λΆν¬ (νκ΅μ΄/μμ΄ μ§μ)")
|
|
|
|
|
|
|
|
|
659 |
try:
|
660 |
+
# KoNLPy μ€μΉ νμΈ
|
661 |
+
try:
|
662 |
+
from konlpy.tag import Okt
|
663 |
+
konlpy_installed = True
|
664 |
+
except ImportError:
|
665 |
+
konlpy_installed = False
|
666 |
+
st.warning("νκ΅μ΄ ννμ λΆμμ μν΄ KoNLPyλ₯Ό μ€μΉν΄μ£ΌμΈμ: pip install konlpy")
|
667 |
|
668 |
+
# μμ΄ POS tagger μ€λΉ
|
669 |
+
from nltk import pos_tag
|
670 |
+
try:
|
671 |
+
nltk.data.find('taggers/averaged_perceptron_tagger')
|
672 |
+
except LookupError:
|
673 |
+
nltk.download('averaged_perceptron_tagger')
|
674 |
+
|
675 |
+
# Try using the correct resource name as shown in the error message
|
676 |
+
try:
|
677 |
+
nltk.data.find('averaged_perceptron_tagger_eng')
|
678 |
+
except LookupError:
|
679 |
+
nltk.download('averaged_perceptron_tagger_eng')
|
680 |
+
|
681 |
+
# μΈμ΄ κ°μ§ (κ°λ¨ν λ°©μ)
|
682 |
+
is_korean = bool(re.search(r'[κ°-ν£]', content))
|
683 |
+
|
684 |
+
if is_korean and konlpy_installed:
|
685 |
+
# νκ΅μ΄ ννμ λΆμ
|
686 |
+
okt = Okt()
|
687 |
+
tagged = okt.pos(content)
|
688 |
+
|
689 |
+
# νκ΅μ΄ νμ¬ λ§€ν
|
690 |
+
pos_dict = {
|
691 |
+
'Noun': 'λͺ
μ¬', 'NNG': 'λͺ
μ¬', 'NNP': 'κ³ μ λͺ
μ¬',
|
692 |
+
'Verb': 'λμ¬', 'VV': 'λμ¬', 'VA': 'νμ©μ¬',
|
693 |
+
'Adjective': 'νμ©μ¬',
|
694 |
+
'Adverb': 'λΆμ¬',
|
695 |
+
'Josa': 'μ‘°μ¬', 'Punctuation': 'ꡬλμ ',
|
696 |
+
'Determiner': 'κ΄νμ¬', 'Exclamation': 'κ°νμ¬'
|
697 |
+
}
|
698 |
|
699 |
+
pos_counts = {'λͺ
μ¬': 0, 'λμ¬': 0, 'νμ©μ¬': 0, 'λΆμ¬': 0, 'μ‘°μ¬': 0, 'ꡬλμ ': 0, 'κ΄νμ¬': 0, 'κ°νμ¬': 0, 'κΈ°ν': 0}
|
700 |
+
|
701 |
+
for _, pos in tagged:
|
702 |
+
if pos in pos_dict:
|
703 |
+
pos_counts[pos_dict[pos]] += 1
|
704 |
+
elif pos.startswith('N'): # κΈ°ν λͺ
μ¬λ₯
|
705 |
+
pos_counts['λͺ
μ¬'] += 1
|
706 |
+
elif pos.startswith('V'): # κΈ°ν λμ¬λ₯
|
707 |
+
pos_counts['λμ¬'] += 1
|
708 |
else:
|
709 |
pos_counts['κΈ°ν'] += 1
|
710 |
+
|
711 |
else:
|
712 |
+
# μμ΄ POS νκΉ
|
713 |
+
tokens = word_tokenize(content.lower())
|
714 |
+
tagged = pos_tag(tokens)
|
715 |
+
|
716 |
+
# μμ΄ νμ¬ λ§€ν
|
717 |
+
pos_dict = {
|
718 |
+
'NN': 'λͺ
μ¬', 'NNS': 'λͺ
μ¬', 'NNP': 'κ³ μ λͺ
μ¬', 'NNPS': 'κ³ μ λͺ
μ¬',
|
719 |
+
'VB': 'λμ¬', 'VBD': 'λμ¬', 'VBG': 'λμ¬', 'VBN': 'λμ¬', 'VBP': 'λμ¬', 'VBZ': 'λμ¬',
|
720 |
+
'JJ': 'νμ©μ¬', 'JJR': 'νμ©μ¬', 'JJS': 'νμ©μ¬',
|
721 |
+
'RB': 'λΆμ¬', 'RBR': 'λΆμ¬', 'RBS': 'λΆμ¬'
|
722 |
}
|
723 |
+
|
724 |
+
pos_counts = {'λͺ
μ¬': 0, 'λμ¬': 0, 'νμ©μ¬': 0, 'λΆμ¬': 0, 'κΈ°ν': 0}
|
725 |
+
|
726 |
+
for _, pos in tagged:
|
727 |
+
if pos in pos_dict:
|
728 |
+
pos_counts[pos_dict[pos]] += 1
|
729 |
+
else:
|
730 |
+
pos_counts['κΈ°ν'] += 1
|
731 |
|
732 |
# κ²°κ³Ό μκ°ν
|
733 |
pos_df = pd.DataFrame({
|
|
|
750 |
if st.session_state.openai_api_key:
|
751 |
with st.spinner("κΈ°μ¬μ κ°μ μ λΆμ μ€μ
λλ€..."):
|
752 |
try:
|
753 |
+
# κ°μ λΆμ API νΈμΆ μ μ ν€ νμΈ λ° μ€μ
|
754 |
+
if not openai.api_key:
|
755 |
+
if st.session_state.openai_api_key:
|
756 |
+
openai.api_key = st.session_state.openai_api_key
|
757 |
+
else:
|
758 |
+
st.error("OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
759 |
+
st.stop()
|
760 |
+
|
761 |
response = openai.chat.completions.create(
|
762 |
model="gpt-4.1-mini",
|
763 |
messages=[
|
|
|
829 |
fill_color = 'rgba(158, 158, 158, 0.3)' # μ°ν νμ
|
830 |
line_color = 'rgba(158, 158, 158, 1)' # μ§ν νμ
|
831 |
|
832 |
+
# λ μ΄λ μ°¨νΈ λ°μ΄ν° μ€λΉ - λ§μ§λ§ μ μ΄ μ²« μ κ³Ό μ°κ²°λλλ‘ λ°μ΄ν° μΆκ°
|
833 |
radar_keywords = keyword_names.copy()
|
834 |
radar_scores = keyword_scores.copy()
|
835 |
|
|
|
941 |
with st.expander("μλ³Έ κΈ°μ¬ λ΄μ©"):
|
942 |
st.write(selected_article['content'])
|
943 |
|
944 |
+
prompt_text ="""λ€μ κΈ°μ¬ μμμ λ°λΌμ λ€μ μμ±ν΄μ€.
|
|
|
945 |
μν : λΉμ μ μ λ¬Έμ¬μ κΈ°μμ
λλ€.
|
946 |
μμ
: μ΅κ·Ό μΌμ΄λ μ¬κ±΄μ λν 보λμλ£λ₯Ό μμ±ν΄μΌ ν©λλ€. μλ£λ μ¬μ€μ κΈ°λ°μΌλ‘ νλ©°, κ°κ΄μ μ΄κ³ μ νν΄μΌ ν©λλ€.
|
947 |
μ§μΉ¨:
|
|
|
949 |
κΈ°μ¬ μ λͺ©μ μ£Όμ λ₯Ό λͺ
νν λ°μνκ³ λ
μμ κ΄μ¬μ λ μ μλλ‘ μμ±ν©λλ€.
|
950 |
κΈ°μ¬ λ΄μ©μ μ ννκ³ κ°κ²°νλ©° μ€λλ ₯ μλ λ¬Έμ₯μΌλ‘ ꡬμ±ν©λλ€.
|
951 |
κ΄λ ¨μμ μΈν°λ·°λ₯Ό μΈμ© ννλ‘ λ£μ΄μ£ΌμΈμ.
|
952 |
+
μμ μ 보μ μ§μΉ¨μ μ°Έκ³ νμ¬ μ λ¬Έ 보λμλ£ νμμ κΈ°μ¬λ₯Ό μμ±ν΄ μ£ΌμΈμ"""
|
953 |
|
954 |
# μ΄λ―Έμ§ μμ± μ¬λΆ μ ν μ΅μ
μΆκ°
|
955 |
generate_image_too = st.checkbox("κΈ°μ¬ μμ± ν μ΄λ―Έμ§λ ν¨κ» μμ±νκΈ°", value=True)
|
956 |
|
957 |
if st.button("μ κΈ°μ¬ μμ±νκΈ°"):
|
958 |
if st.session_state.openai_api_key:
|
959 |
+
# openai.api_key = st.session_state.openai_api_key # μ΄λ―Έ μλ¨μμ μ€μ λ¨ λλ κ° ν¨μ νΈμΆ μ μ€μ
|
960 |
with st.spinner("κΈ°μ¬λ₯Ό μμ± μ€μ
λλ€..."):
|
961 |
new_article = generate_article(selected_article['content'], prompt_text)
|
962 |
|
|
|
975 |
"""
|
976 |
|
977 |
# μ΄λ―Έμ§ μμ±
|
978 |
+
# μ΄λ―Έμ§ μμ± API νΈμΆ μ μ ν€ νμΈ λ° μ€μ
|
979 |
+
if not openai.api_key:
|
980 |
+
if st.session_state.openai_api_key:
|
981 |
+
openai.api_key = st.session_state.openai_api_key
|
982 |
+
else:
|
983 |
+
st.error("OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
984 |
+
st.stop()
|
985 |
image_url = generate_image(image_prompt)
|
986 |
|
987 |
if image_url and not image_url.startswith("μ΄λ―Έμ§ μμ± μ€λ₯") and not image_url.startswith("μ€λ₯: OpenAI API ν€κ° μ€μ λμ§ μμμ΅λλ€."):
|
|
|
1157 |
files = [f for f in os.listdir(SCHEDULED_NEWS_DIR) if f.endswith('.json')]
|
1158 |
if files:
|
1159 |
st.subheader("μμ§λ νμΌ μ΄κΈ°")
|
1160 |
+
selected_file = st.selectbox("νμΌ μ ν", files, index=len(files)-1 if files else 0) # filesκ° λΉμ΄μμ κ²½μ° λλΉ
|
1161 |
if selected_file and st.button("νμΌ λ΄μ© 보기"):
|
1162 |
with open(os.path.join(SCHEDULED_NEWS_DIR, selected_file), 'r', encoding='utf-8') as f:
|
1163 |
articles = json.load(f)
|
|
|
1175 |
|
1176 |
# νΈν°
|
1177 |
st.markdown("---")
|
1178 |
+
st.markdown("Β© λ΄μ€ κΈ°μ¬ λꡬ @conanssam")
|