example-ai-crawler / src /streamlit_app.py
fitz87's picture
Update src/streamlit_app.py
6347527 verified
raw
history blame
65.2 kB
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import time
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
import json
import os
from datetime import datetime, timedelta
from openai import OpenAI # μƒˆλ‘œμš΄ import 방식
from dotenv import load_dotenv
import traceback
import plotly.graph_objects as go
import schedule
import threading
import matplotlib.pyplot as plt
import kss # KoNLPy λŒ€μ‹  KSS μ‚¬μš©
from PIL import Image
import base64
from io import BytesIO
import logging
# λ‘œκΉ… μ„€μ •
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler('/tmp/crawler.log')
]
)
# μ›Œλ“œν΄λΌμš°λ“œ μΆ”κ°€
try:
from wordcloud import WordCloud
except ImportError:
st.error("wordcloud νŒ¨ν‚€μ§€λ₯Ό μ„€μΉ˜ν•΄μ£Όμ„Έμš”: pip install wordcloud")
WordCloud = None
# μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 클래슀 μΆ”κ°€
class SchedulerState:
def __init__(self):
self.is_running = False
self.thread = None
self.last_run = None
self.next_run = None
self.scheduled_jobs = []
self.scheduled_results = []
# μ „μ—­ μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 객체 생성 (μŠ€λ ˆλ“œ μ•ˆμ—μ„œ μ‚¬μš©)
global_scheduler_state = SchedulerState()
# API ν‚€ 관리λ₯Ό μœ„ν•œ μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™”
if 'openai_client' not in st.session_state:
st.session_state.openai_client = None
# μ—¬λŸ¬ λ°©λ²•μœΌλ‘œ API ν‚€ λ‘œλ“œ μ‹œλ„
load_dotenv() # .env νŒŒμΌμ—μ„œ λ‘œλ“œ μ‹œλ„
# OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™”λ₯Ό μœ„ν•œ ν•¨μˆ˜
def init_openai_client(api_key=None):
try:
if api_key:
client = OpenAI(api_key=api_key)
# κ°„λ‹¨ν•œ API ν‚€ μœ νš¨μ„± 검사
client.models.list() # API ν‚€κ°€ μœ νš¨ν•œμ§€ ν…ŒμŠ€νŠΈ
return client
return None
except Exception as e:
st.error(f"API ν‚€ μ΄ˆκΈ°ν™” 였λ₯˜: {str(e)}")
return None
# 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ API ν‚€ 확인
api_key = os.environ.get('OPENAI_API_KEY')
if api_key:
st.session_state.openai_client = init_openai_client(api_key)
# 2. Streamlit secretsμ—μ„œ API ν‚€ 확인
if not st.session_state.openai_client:
try:
if 'OPENAI_API_KEY' in st.secrets:
st.session_state.openai_client = init_openai_client(st.secrets['OPENAI_API_KEY'])
except Exception as e:
pass # secrets 파일이 없어도 였λ₯˜ λ°œμƒν•˜μ§€ μ•ŠμŒ
# NLTK 데이터 경둜 μ„€μ • - ν˜„μž¬ μ›Œν¬μŠ€νŽ˜μ΄μŠ€μ˜ nltk_data μ‚¬μš©
nltk_data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'nltk_data')
nltk.data.path.insert(0, nltk_data_path)
# ν•„μš”ν•œ NLTK 데이터 확인
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt', download_dir=nltk_data_path)
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords', download_dir=nltk_data_path)
# νŽ˜μ΄μ§€ μ„€μ •
st.set_page_config(page_title="λ‰΄μŠ€ 기사 도ꡬ", page_icon="πŸ“°", layout="wide")
# μ‚¬μ΄λ“œλ°”μ— API ν‚€ μž…λ ₯ ν•„λ“œ μΆ”κ°€
with st.sidebar:
st.title("λ‰΄μŠ€ 기사 도ꡬ")
menu = st.radio(
"메뉴 선택",
["λ‰΄μŠ€ 기사 크둀링", "기사 λΆ„μ„ν•˜κΈ°", "μƒˆ 기사 μƒμ„±ν•˜κΈ°", "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°"]
)
st.divider()
api_key = st.text_input("OpenAI API ν‚€ μž…λ ₯", type="password")
if api_key:
client = init_openai_client(api_key)
if client:
st.session_state.openai_client = client
st.success("API ν‚€κ°€ μ„±κ³΅μ μœΌλ‘œ μ„€μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
else:
st.error("μœ νš¨ν•˜μ§€ μ•Šμ€ API ν‚€μž…λ‹ˆλ‹€.")
# μ €μž₯된 기사λ₯Ό λΆˆλŸ¬μ˜€λŠ” ν•¨μˆ˜
def load_saved_articles():
if os.path.exists('/tmp/saved_articles/articles.json'):
with open('/tmp/saved_articles/articles.json', 'r', encoding='utf-8') as f:
return json.load(f)
return []
# 기사λ₯Ό μ €μž₯ν•˜λŠ” ν•¨μˆ˜
def save_articles(articles):
os.makedirs('/tmp/saved_articles', exist_ok=True)
with open('/tmp/saved_articles/articles.json', 'w', encoding='utf-8') as f:
json.dump(articles, f, ensure_ascii=False, indent=2)
@st.cache_data
def crawl_naver_news(keyword, num_articles=5):
"""
넀이버 λ‰΄μŠ€ 기사λ₯Ό μˆ˜μ§‘ν•˜λŠ” ν•¨μˆ˜
"""
logging.info(f"크둀링 μ‹œμž‘: ν‚€μ›Œλ“œ={keyword}, 기사 수={num_articles}")
url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
results = []
try:
# νŽ˜μ΄μ§€ μš”μ²­
logging.info(f"μš”μ²­ URL: {url}")
response = requests.get(url)
logging.info(f"응닡 μƒνƒœ μ½”λ“œ: {response.status_code}")
soup = BeautifulSoup(response.text, 'html.parser')
# λ‰΄μŠ€ μ•„μ΄ν…œ μ°ΎκΈ°
news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
logging.info(f"찾은 λ‰΄μŠ€ μ•„μ΄ν…œ 수: {len(news_items)}")
# 각 λ‰΄μŠ€ μ•„μ΄ν…œμ—μ„œ 정보 μΆ”μΆœ
for i, item in enumerate(news_items):
if i >= num_articles:
break
try:
# 제λͺ©κ³Ό 링크 μΆ”μΆœ
title_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww span')
if not title_element:
continue
title = title_element.text.strip()
link_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww')
link = link_element['href'] if link_element else ""
# 언둠사 μΆ”μΆœ
press_element = item.select_one('div.sds-comps-profile-info-title span.sds-comps-text-type-body2')
source = press_element.text.strip() if press_element else "μ•Œ 수 μ—†μŒ"
# λ‚ μ§œ μΆ”μΆœ
date_element = item.select_one('span.r0VOr')
date = date_element.text.strip() if date_element else "μ•Œ 수 μ—†μŒ"
# 미리보기 λ‚΄μš© μΆ”μΆœ
desc_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww.IaKmSOGPdofdPwPE6cyU > span')
description = desc_element.text.strip() if desc_element else "λ‚΄μš© μ—†μŒ"
results.append({
'title': title,
'link': link,
'description': description,
'source': source,
'date': date,
'content': ""
})
logging.info(f"기사 μΆ”μΆœ 성곡: {title}")
except Exception as e:
logging.error(f"기사 정보 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ: {str(e)}", exc_info=True)
continue
except Exception as e:
logging.error(f"νŽ˜μ΄μ§€ μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}", exc_info=True)
logging.info(f"크둀링 μ™„λ£Œ: {len(results)}개 기사 μˆ˜μ§‘")
return results
# 기사 원문 κ°€μ Έμ˜€κΈ°
def get_article_content(url):
logging.info(f"기사 원문 κ°€μ Έμ˜€κΈ° μ‹œμž‘: {url}")
try:
response = requests.get(url, timeout=5)
logging.info(f"원문 μš”μ²­ μƒνƒœ μ½”λ“œ: {response.status_code}")
soup = BeautifulSoup(response.text, 'html.parser')
# 넀이버 λ‰΄μŠ€ λ³Έλ¬Έ μ°ΎκΈ°
content = soup.select_one('#dic_area')
if content:
text = content.text.strip()
text = re.sub(r'\s+', ' ', text)
logging.info("넀이버 λ‰΄μŠ€ λ³Έλ¬Έ μΆ”μΆœ 성곡")
return text
# λ‹€λ₯Έ λ‰΄μŠ€ μ‚¬μ΄νŠΈ λ³Έλ¬Έ μ°ΎκΈ°
content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
if content:
text = content.text.strip()
text = re.sub(r'\s+', ' ', text)
logging.info("일반 λ‰΄μŠ€ λ³Έλ¬Έ μΆ”μΆœ 성곡")
return text
logging.warning("본문을 찾을 수 μ—†μŒ")
return "본문을 κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
except Exception as e:
logging.error(f"원문 κ°€μ Έμ˜€κΈ° 였λ₯˜: {str(e)}", exc_info=True)
return f"였λ₯˜ λ°œμƒ: {str(e)}"
# NLTKλ₯Ό μ΄μš©ν•œ ν‚€μ›Œλ“œ 뢄석 (KSS ν™œμš©)
def analyze_keywords(text, top_n=10):
# ν•œκ΅­μ–΄ λΆˆμš©μ–΄ λͺ©λ‘
korean_stopwords = ['이', 'κ·Έ', 'μ €', '것', '및', 'λ“±', 'λ₯Ό', '을', '에', 'μ—μ„œ', '의', '으둜', '둜']
# KSSλ₯Ό μ‚¬μš©ν•œ λ¬Έμž₯ 뢄리 및 토큰화
try:
sentences = kss.split_sentences(text)
tokens = []
for sentence in sentences:
# κ°„λ‹¨ν•œ 토큰화 (곡백 κΈ°μ€€)
tokens.extend(sentence.split())
except:
# KSS μ‹€νŒ¨μ‹œ κΈ°λ³Έ 토큰화
tokens = text.split()
tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
word_count = Counter(tokens)
top_keywords = word_count.most_common(top_n)
return top_keywords
#μ›Œλ“œ ν΄λΌμš°λ“œμš© 뢄석
def extract_keywords_for_wordcloud(text, top_n=50):
if not text or len(text.strip()) < 10:
return {}
try:
try:
tokens = word_tokenize(text.lower())
except Exception as e:
st.warning(f"{str(e)} 였λ₯˜λ°œμƒ")
tokens = text.lower().split()
stop_words = set()
try:
stop_words = set(stopwords.words('english'))
except Exception:
pass
korea_stop_words = {
'및', 'λ“±', 'λ₯Ό', '이', '의', 'κ°€', '에', 'λŠ”', '으둜', 'μ—μ„œ', 'κ·Έ', '또', 'λ˜λŠ”', 'ν•˜λŠ”', 'ν• ', 'ν•˜κ³ ',
'μžˆλ‹€', '이닀', 'μœ„ν•΄', '것이닀', '것은', 'λŒ€ν•œ', 'λ•Œλ¬Έ', '그리고', 'ν•˜μ§€λ§Œ', 'κ·ΈλŸ¬λ‚˜', 'κ·Έλž˜μ„œ',
'μž…λ‹ˆλ‹€', 'ν•©λ‹ˆλ‹€', 'μŠ΅λ‹ˆλ‹€', 'μš”', 'μ£ ', 'κ³ ', 'κ³Ό', '와', '도', '은', '수', '것', 'λ“€', '제', 'μ €',
'λ…„', 'μ›”', '일', 'μ‹œ', 'λΆ„', '초', 'μ§€λ‚œ', 'μ˜¬ν•΄', 'λ‚΄λ…„', '졜근', 'ν˜„μž¬', '였늘', '내일', 'μ–΄μ œ',
'μ˜€μ „', 'μ˜€ν›„', 'λΆ€ν„°', 'κΉŒμ§€', 'μ—κ²Œ', 'κ»˜μ„œ', '이라고', '라고', 'ν•˜λ©°', 'ν•˜λ©΄μ„œ', '따라', '톡해',
'κ΄€λ ¨', 'ν•œνŽΈ', '특히', 'κ°€μž₯', '맀우', '더', '덜', '많이', '쑰금', '항상', '자주', '가끔', '거의',
'μ „ν˜€', 'λ°”λ‘œ', '정말', 'λ§Œμ•½', 'λΉ„λ‘―ν•œ', '등을', '등이', 'λ“±μ˜', 'λ“±κ³Ό', '등도', '등에', 'λ“±μ—μ„œ',
'기자', 'λ‰΄μŠ€', '사진', 'μ—°ν•©λ‰΄μŠ€', 'λ‰΄μ‹œμŠ€', '제곡', '무단', 'μ „μž¬', '재배포', 'κΈˆμ§€', '액컀', '멘트',
'일보', '데일리', '경제', 'μ‚¬νšŒ', 'μ •μΉ˜', '세계', 'κ³Όν•™', '아이티', 'λ‹·μ»΄', '씨넷', 'λΈ”λ‘œν„°', 'μ „μžμ‹ λ¬Έ'
}
stop_words.update(korea_stop_words)
# 1κΈ€μž 이상이고 λΆˆμš©μ–΄κ°€ μ•„λ‹Œ ν† ν°λ§Œ 필터링
filtered_tokens = [word for word in tokens if len(word) > 1 and word not in stop_words]
# 단어 λΉˆλ„ 계산
word_freq = {}
for word in filtered_tokens:
if word.isalnum(): # μ•ŒνŒŒλ²³κ³Ό 숫자만 ν¬ν•¨λœ λ‹¨μ–΄λ§Œ ν—ˆμš©
word_freq[word] = word_freq.get(word, 0) + 1
# λΉˆλ„μˆœμœΌλ‘œ μ •λ ¬ν•˜μ—¬ μƒμœ„ n개 λ°˜ν™˜
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
if not sorted_words:
return {"data": 1, "analysis": 1, "news": 1}
return dict(sorted_words[:top_n])
except Exception as e:
st.error(f"였λ₯˜λ°œμƒ {str(e)}")
return {"data": 1, "analysis": 1, "news": 1}
# μ›Œλ“œ ν΄λΌμš°λ“œ 생성 ν•¨μˆ˜
def generate_wordcloud(keywords_dict):
if not WordCloud:
st.warning("μ›Œλ“œν΄λΌμš°λ“œ μ„€μΉ˜μ•ˆλ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.")
return None
try:
# κΈ°λ³Έ WordCloud 객체 (폰트 경둜 없이)
wc = WordCloud(
width=800,
height=400,
background_color='white',
colormap='viridis',
max_font_size=150,
random_state=42
)
try:
import os
script_dir = os.path.dirname(os.path.abspath(__file__))
# μ‚¬μš©μžκ°€ λ£¨νŠΈμ— 넣은 폰트 파일 이름을 μ§€μ •ν•©λ‹ˆλ‹€.
# λ§Œμ•½ λ‹€λ₯Έ μ΄λ¦„μ˜ 폰트λ₯Ό μ‚¬μš©ν–ˆλ‹€λ©΄ 이 뢀뢄을 μˆ˜μ •ν•΄μ£Όμ„Έμš”. (예: "YourFontName.ttf")
possible_font_paths = ["NanumGothic.ttf"]
font_path = None
for path_segment in possible_font_paths:
candidate = os.path.join(script_dir, path_segment)
if os.path.exists(candidate):
font_path = candidate
break
# font_pathκ°€ μ„±κ³΅μ μœΌλ‘œ μ°Ύμ•„μ§„ κ²½μš°μ—λ§Œ 폰트 경둜λ₯Ό ν¬ν•¨ν•˜μ—¬ WordCloud μž¬μƒμ„±
if font_path:
wc = WordCloud(
font_path=font_path,
width=800,
height=400,
background_color='white',
colormap='viridis',
max_font_size=150,
random_state=42
).generate_from_frequencies(keywords_dict)
else:
st.warning(f"μ§€μ •λœ ν•œκ΅­μ–΄ κΈ€κΌ΄ 파일({', '.join(possible_font_paths)})을 슀크립트 λ””λ ‰ν„°λ¦¬μ—μ„œ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. μ›Œλ“œν΄λΌμš°λ“œκ°€ 깨질 수 μžˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"κΈ€κΌ΄ λ‘œλ”© 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
st.warning(f"κΈ€κΌ΄ λ‘œλ”© 쀑 μ˜ˆμƒμΉ˜ λͺ»ν•œ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}") # μ‚¬μš©μžμ—κ²Œλ„ κ²½κ³  ν‘œμ‹œ
# μ΅œμ’…μ μœΌλ‘œ wc 객체 λ°˜ν™˜ (ν°νŠΈκ°€ μ μš©λ˜μ—ˆκ±°λ‚˜, κΈ°λ³Έ κ°μ²΄μ΄κ±°λ‚˜)
return wc.generate_from_frequencies(keywords_dict) if isinstance(wc, WordCloud) else None
except Exception as e:
st.error(f"μ›Œλ“œν΄λΌμš°λ“œ 생성 쀑 였λ₯˜λ°œμƒ: {str(e)}")
return None
# λ‰΄μŠ€ 뢄석 ν•¨μˆ˜
def analyze_news_content(news_df):
if news_df.empty:
return "데이터가 μ—†μŠ΅λ‹ˆλ‹€"
results = {}
#μΉ΄ν…Œκ³ λ¦¬λ³„
if 'source' in news_df.columns:
results['source_counts'] = news_df['source'].value_counts().to_dict()
#μΉ΄ν…Œκ³ λ¦¬λ³„
if 'date' in news_df.columns:
results['date_counts'] = news_df['date'].value_counts().to_dict()
#ν‚€μ›Œλ“œλΆ„μ„
all_text = " ".join(news_df['title'].fillna('') + " " + news_df['content'].fillna(''))
if len(all_text.strip()) > 0:
results['top_keywords_for_wordcloud']= extract_keywords_for_wordcloud(all_text, top_n=50)
results['top_keywords'] = analyze_keywords(all_text)
else:
results['top_keywords_for_wordcloud']={}
results['top_keywords'] = []
return results
# OpenAI APIλ₯Ό μ΄μš©ν•œ μƒˆ 기사 생성 (μƒˆλ‘œμš΄ 버전 방식)
def generate_article(original_content, prompt_text):
try:
if not st.session_state.openai_client:
return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
response = st.session_state.openai_client.chat.completions.create(
model="gpt-4.1-nano", # λ˜λŠ” μ‚¬μš© κ°€λŠ₯ν•œ μ μ ˆν•œ λͺ¨λΈ
messages=[
{"role": "system", "content": "당신은 전문적인 λ‰΄μŠ€ κΈ°μžμž…λ‹ˆλ‹€. μ£Όμ–΄μ§„ λ‚΄μš©μ„ λ°”νƒ•μœΌλ‘œ μƒˆλ‘œμš΄ 기사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”."},
{"role": "user", "content": f"λ‹€μŒ λ‚΄μš©μ„ λ°”νƒ•μœΌλ‘œ {prompt_text}\n\n{original_content[:1000]}"}
],
max_tokens=2000
)
return response.choices[0].message.content
except Exception as e:
return f"기사 생성 였λ₯˜: {str(e)}"
# μ—¬λŸ¬ 제λͺ©μœΌλ‘œλΆ€ν„° 기사 μƒμ„±ν•˜λŠ” ν•¨μˆ˜ μΆ”κ°€
def generate_article_from_titles(titles, prompt_text):
try:
if not st.session_state.openai_client:
return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
titles_text = "\n".join([f"- {title}" for title in titles])
response = st.session_state.openai_client.chat.completions.create(
model="gpt-4.1-nano", # λ˜λŠ” μ‚¬μš© κ°€λŠ₯ν•œ μ μ ˆν•œ λͺ¨λΈ
messages=[
{"role": "system", "content": "당신은 전문적인 λ‰΄μŠ€ κΈ°μžμž…λ‹ˆλ‹€. μ£Όμ–΄μ§„ μ—¬λŸ¬ λ‰΄μŠ€ 제λͺ©μ„ λ°”νƒ•μœΌλ‘œ μƒˆλ‘œμš΄ 톡합 기사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”."},
{"role": "user", "content": f"λ‹€μŒ λ‰΄μŠ€ 제λͺ©λ“€μ„ λ°”νƒ•μœΌλ‘œ {prompt_text}\n\n{titles_text}"}
],
max_tokens=2000
)
return response.choices[0].message.content
except Exception as e:
return f"기사 생성 였λ₯˜: {str(e)}"
# OpenAI APIλ₯Ό μ΄μš©ν•œ 이미지 생성 (μƒˆλ‘œμš΄ 버전 방식)
def generate_image(prompt):
try:
if not st.session_state.openai_client:
return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
# GPT Image 1 λͺ¨λΈλ‘œ 이미지 생성
result = st.session_state.openai_client.images.generate(
model="gpt-image-1", # μƒˆλ‘œμš΄ λͺ¨λΈλͺ… μ‚¬μš©
prompt=prompt,
size="1024x1024"
)
# base64 이미지 데이터λ₯Ό λ””μ½”λ”©
image_base64 = result.data[0].b64_json
image_bytes = base64.b64decode(image_base64)
# BytesIO 객체둜 λ³€ν™˜
image = BytesIO(image_bytes)
# PIL Image둜 λ³€ν™˜ν•˜μ—¬ 크기 μ‘°μ • (선택사항)
pil_image = Image.open(image)
pil_image = pil_image.resize((800, 800), Image.LANCZOS) # 크기 μ‘°μ •
# λ‹€μ‹œ BytesIO둜 λ³€ν™˜
output = BytesIO()
pil_image.save(output, format="JPEG", quality=80, optimize=True)
output.seek(0)
return output
except Exception as e:
return f"이미지 생성 였λ₯˜: {str(e)}"
# μŠ€μΌ€μ€„λŸ¬ κ΄€λ ¨ ν•¨μˆ˜λ“€
def get_next_run_time(hour, minute):
now = datetime.now()
next_run = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if next_run <= now:
next_run += timedelta(days=1)
return next_run
def run_scheduled_task():
try:
while global_scheduler_state.is_running:
schedule.run_pending()
time.sleep(1)
except Exception as e:
print(f"μŠ€μΌ€μ€„λŸ¬ μ—λŸ¬ λ°œμƒ: {e}")
traceback.print_exc()
def perform_news_task(task_type, keyword, num_articles, file_prefix):
logging.info(f"μŠ€μΌ€μ€„λŸ¬ μž‘μ—… μ‹œμž‘: {task_type}, ν‚€μ›Œλ“œ={keyword}")
try:
articles = crawl_naver_news(keyword, num_articles)
logging.info(f"μˆ˜μ§‘λœ 기사 수: {len(articles)}")
# 기사 λ‚΄μš© κ°€μ Έμ˜€κΈ°
for i, article in enumerate(articles):
logging.info(f"기사 {i+1}/{len(articles)} 원문 κ°€μ Έμ˜€κΈ°: {article['title']}")
article['content'] = get_article_content(article['link'])
time.sleep(0.5) # μ„œλ²„ λΆ€ν•˜ λ°©μ§€
# κ²°κ³Ό μ €μž₯
os.makedirs('/tmp/scheduled_news', exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"/tmp/scheduled_news/{file_prefix}_{task_type}_{timestamp}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(articles, f, ensure_ascii=False, indent=2)
logging.info(f"κ²°κ³Ό μ €μž₯ μ™„λ£Œ: {filename}")
global_scheduler_state.last_run = datetime.now()
print(f"{datetime.now()} - {task_type} λ‰΄μŠ€ 기사 μˆ˜μ§‘ μ™„λ£Œ: {keyword}")
# μ „μ—­ μƒνƒœμ— μˆ˜μ§‘ κ²°κ³Όλ₯Ό μ €μž₯
result_item = {
'task_type': task_type,
'keyword': keyword,
'timestamp': timestamp,
'num_articles': len(articles),
'filename': filename
}
global_scheduler_state.scheduled_results.append(result_item)
except Exception as e:
logging.error(f"μž‘μ—… μ‹€ν–‰ 쀑 였λ₯˜ λ°œμƒ: {str(e)}", exc_info=True)
traceback.print_exc()
def start_scheduler(daily_tasks, interval_tasks):
if not global_scheduler_state.is_running:
schedule.clear()
global_scheduler_state.scheduled_jobs = []
# 일별 νƒœμŠ€ν¬ 등둝
for task in daily_tasks:
hour = task['hour']
minute = task['minute']
keyword = task['keyword']
num_articles = task['num_articles']
job_id = f"daily_{keyword}_{hour}_{minute}"
schedule.every().day.at(f"{hour:02d}:{minute:02d}").do(
perform_news_task, "daily", keyword, num_articles, job_id
).tag(job_id)
global_scheduler_state.scheduled_jobs.append({
'id': job_id,
'type': 'daily',
'time': f"{hour:02d}:{minute:02d}",
'keyword': keyword,
'num_articles': num_articles
})
# μ‹œκ°„ 간격 νƒœμŠ€ν¬ 등둝
for task in interval_tasks:
interval_minutes = task['interval_minutes']
keyword = task['keyword']
num_articles = task['num_articles']
run_immediately = task['run_immediately']
job_id = f"interval_{keyword}_{interval_minutes}"
if run_immediately:
# μ¦‰μ‹œ μ‹€ν–‰
perform_news_task("interval", keyword, num_articles, job_id)
# λΆ„ κ°„κ²©μœΌλ‘œ μ˜ˆμ•½
schedule.every(interval_minutes).minutes.do(
perform_news_task, "interval", keyword, num_articles, job_id
).tag(job_id)
global_scheduler_state.scheduled_jobs.append({
'id': job_id,
'type': 'interval',
'interval': f"{interval_minutes}λΆ„λ§ˆλ‹€",
'keyword': keyword,
'num_articles': num_articles,
'run_immediately': run_immediately
})
# λ‹€μŒ μ‹€ν–‰ μ‹œκ°„ 계산
next_run = schedule.next_run()
if next_run:
global_scheduler_state.next_run = next_run
# μŠ€μΌ€μ€„λŸ¬ μ“°λ ˆλ“œ μ‹œμž‘
global_scheduler_state.is_running = True
global_scheduler_state.thread = threading.Thread(
target=run_scheduled_task, daemon=True
)
global_scheduler_state.thread.start()
# μƒνƒœλ₯Ό μ„Έμ…˜ μƒνƒœλ‘œλ„ 볡사 (UI ν‘œμ‹œμš©)
if 'scheduler_status' not in st.session_state:
st.session_state.scheduler_status = {}
st.session_state.scheduler_status = {
'is_running': global_scheduler_state.is_running,
'last_run': global_scheduler_state.last_run,
'next_run': global_scheduler_state.next_run,
'jobs_count': len(global_scheduler_state.scheduled_jobs)
}
def stop_scheduler():
if global_scheduler_state.is_running:
global_scheduler_state.is_running = False
schedule.clear()
if global_scheduler_state.thread:
global_scheduler_state.thread.join(timeout=1)
global_scheduler_state.next_run = None
global_scheduler_state.scheduled_jobs = []
# UI μƒνƒœ μ—…λ°μ΄νŠΈ
if 'scheduler_status' in st.session_state:
st.session_state.scheduler_status['is_running'] = False
# 메뉴에 λ”°λ₯Έ ν™”λ©΄ ν‘œμ‹œ
if menu == "λ‰΄μŠ€ 기사 크둀링":
st.header("λ‰΄μŠ€ 기사 크둀링")
keyword = st.text_input("검색어 μž…λ ₯", "인곡지λŠ₯")
num_articles = st.slider("κ°€μ Έμ˜¬ 기사 수", min_value=1, max_value=20, value=5)
if st.button("기사 κ°€μ Έμ˜€κΈ°"):
with st.spinner("기사λ₯Ό μˆ˜μ§‘ μ€‘μž…λ‹ˆλ‹€..."):
articles = crawl_naver_news(keyword, num_articles)
# 기사 λ‚΄μš© κ°€μ Έμ˜€κΈ°
for i, article in enumerate(articles):
st.progress((i + 1) / len(articles))
article['content'] = get_article_content(article['link'])
time.sleep(0.5) # μ„œλ²„ λΆ€ν•˜ λ°©μ§€
# κ²°κ³Ό μ €μž₯ 및 ν‘œμ‹œ
save_articles(articles)
st.success(f"{len(articles)}개의 기사λ₯Ό μˆ˜μ§‘ν–ˆμŠ΅λ‹ˆλ‹€!")
# μˆ˜μ§‘ν•œ 기사 ν‘œμ‹œ
for article in articles:
with st.expander(f"{article['title']} - {article['source']}"):
st.write(f"**좜처:** {article['source']}")
st.write(f"**λ‚ μ§œ:** {article['date']}")
st.write(f"**μš”μ•½:** {article['description']}")
st.write(f"**링크:** {article['link']}")
st.write("**본문 미리보기:**")
st.write(article['content'][:300] + "..." if len(article['content']) > 300 else article['content'])
elif menu == "기사 λΆ„μ„ν•˜κΈ°":
st.header("기사 λΆ„μ„ν•˜κΈ°")
articles = load_saved_articles()
if not articles:
st.warning("μ €μž₯된 기사가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 'λ‰΄μŠ€ 기사 크둀링' λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
else:
# 기사 선택
titles = [article['title'] for article in articles]
selected_title = st.selectbox("뢄석할 기사 선택", titles)
selected_article = next((a for a in articles if a['title'] == selected_title), None)
if selected_article:
st.write(f"**제λͺ©:** {selected_article['title']}")
st.write(f"**좜처:** {selected_article['source']}")
# λ³Έλ¬Έ ν‘œμ‹œ
with st.expander("기사 λ³Έλ¬Έ 보기"):
st.write(selected_article['content'])
# 뢄석 방법 선택
analysis_type = st.radio(
"뢄석 방법",
["ν‚€μ›Œλ“œ 뢄석", "감정 뢄석", "ν…μŠ€νŠΈ 톡계"]
)
if analysis_type == "ν‚€μ›Œλ“œ 뢄석":
if st.button("ν‚€μ›Œλ“œ λΆ„μ„ν•˜κΈ°"):
with st.spinner("ν‚€μ›Œλ“œλ₯Ό 뢄석 μ€‘μž…λ‹ˆλ‹€..."):
keyword_tab1, keyword_tab2 = st.tabs(["ν‚€μ›Œλ“œ λΉˆλ„", "μ›Œλ“œν΄λΌμš°λ“œ"])
with keyword_tab1:
keywords = analyze_keywords(selected_article['content'])
# Plotlyλ₯Ό μ‚¬μš©ν•œ μ‹œκ°ν™”
df = pd.DataFrame(keywords, columns=['단어', 'λΉˆλ„μˆ˜'])
fig = go.Figure(data=[
go.Bar(
x=df['단어'],
y=df['λΉˆλ„μˆ˜'],
marker_color='rgb(55, 83, 109)'
)
])
fig.update_layout(
title='ν‚€μ›Œλ“œ λΉˆλ„ 뢄석',
xaxis_title='ν‚€μ›Œλ“œ',
yaxis_title='λΉˆλ„μˆ˜',
height=500,
margin=dict(l=50, r=50, t=80, b=50)
)
st.plotly_chart(fig, use_container_width=True)
st.write("**μ£Όμš” ν‚€μ›Œλ“œ:**")
for word, count in keywords:
st.write(f"- {word}: {count}회")
with keyword_tab2:
keyword_dict = extract_keywords_for_wordcloud(selected_article['content'])
wc = generate_wordcloud(keyword_dict)
if wc:
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wc, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
# ν‚€μ›Œλ“œ μƒμœ„ 20개 ν‘œμ‹œ
st.write("**μƒμœ„ 20개 ν‚€μ›Œλ“œ:**")
top_keywords = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True)[:20]
keyword_df = pd.DataFrame(top_keywords, columns=['ν‚€μ›Œλ“œ', 'λΉˆλ„'])
st.dataframe(keyword_df)
else:
st.error("μ›Œλ“œν΄λΌμš°λ“œλ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
elif analysis_type == "ν…μŠ€νŠΈ 톡계":
if st.button("ν…μŠ€νŠΈ 톡계 뢄석"):
content = selected_article['content']
# ν…μŠ€νŠΈ 톡계 계산
word_count = len(re.findall(r'\b\w+\b', content))
char_count = len(content)
try:
# KSS둜 λ¬Έμž₯ 뢄리
sentences = kss.split_sentences(content)
sentence_count = len(sentences)
except:
# KSS μ‹€νŒ¨μ‹œ κΈ°λ³Έ λ¬Έμž₯ 뢄리
sentence_count = len(re.split(r'[.!?]+', content))
avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
# 톡계 ν‘œμ‹œ
st.subheader("ν…μŠ€νŠΈ 톡계")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("단어 수", f"{word_count:,}")
with col2:
st.metric("문자 수", f"{char_count:,}")
with col3:
st.metric("λ¬Έμž₯ 수", f"{sentence_count:,}")
col1, col2 = st.columns(2)
with col1:
st.metric("평균 단어 길이", f"{avg_word_length:.1f}자")
with col2:
st.metric("평균 λ¬Έμž₯ 길이", f"{avg_sentence_length:.1f}단어")
# ν…μŠ€νŠΈ λ³΅μž‘μ„± 점수
complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
st.progress(complexity_score / 10)
st.write(f"ν…μŠ€νŠΈ λ³΅μž‘μ„± 점수: {complexity_score:.1f}/10")
# ν’ˆμ‚¬ 뢄석 λΆ€λΆ„ 제거 (KoNLPy μ˜μ‘΄μ„± 제거)
st.info("상세 ν’ˆμ‚¬ 뢄석은 ν˜„μž¬ μ§€μ›λ˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
elif analysis_type == "감정 뢄석":
if st.button("감정 λΆ„μ„ν•˜κΈ°"):
if st.session_state.openai_client:
with st.spinner("κΈ°μ‚¬μ˜ 감정을 뢄석 μ€‘μž…λ‹ˆλ‹€..."):
try:
response = st.session_state.openai_client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": """당신은 ν…μŠ€νŠΈμ˜ 감정과 λ…Όμ‘°λ₯Ό λΆ„μ„ν•˜λŠ” μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
λ‹€μŒ λ‰΄μŠ€ κΈ°μ‚¬μ˜ 감정과 λ…Όμ‘°λ₯Ό λΆ„μ„ν•˜κ³ , λ°˜λ“œμ‹œ μ•„λž˜ ν˜•μ‹μ˜ JSON으둜 μ‘λ‹΅ν•΄μ£Όμ„Έμš”:
{
"sentiment": "긍정적/뢀정적/쀑립적",
"reason": "이유 μ„€λͺ…...",
"keywords": [
{"word": "ν‚€μ›Œλ“œ1", "score": 8},
{"word": "ν‚€μ›Œλ“œ2", "score": 7}
]
}"""},
{"role": "user", "content": f"λ‹€μŒ λ‰΄μŠ€ 기사λ₯Ό 뢄석해 μ£Όμ„Έμš”:\n\n제λͺ©: {selected_article['title']}\n\nλ‚΄μš©: {selected_article['content'][:1500]}"}
],
max_tokens=800,
response_format={ "type": "json_object" } # JSON 응닡 ν˜•μ‹ κ°•μ œ
)
# 응닡 λ‚΄μš© 확인 및 디버깅
content = response.choices[0].message.content
logging.info(f"API 응닡: {content}")
# JSON νŒŒμ‹±
try:
analysis_result = json.loads(content)
except json.JSONDecodeError as e:
logging.error(f"JSON νŒŒμ‹± 였λ₯˜: {str(e)}")
logging.error(f"νŒŒμ‹± μ‹œλ„ν•œ λ‚΄μš©: {content}")
st.error("API 응닡을 νŒŒμ‹±ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. 응닡 ν˜•μ‹μ΄ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
st.stop() # return λŒ€μ‹  st.stop() μ‚¬μš©
# κ²°κ³Ό μ‹œκ°ν™”
st.subheader("감정 뢄석 κ²°κ³Ό")
# 1. 감정 νƒ€μž…μ— λ”°λ₯Έ μ‹œκ°μ  ν‘œν˜„
sentiment_type = analysis_result.get('sentiment', '쀑립적')
col1, col2, col3 = st.columns([1, 3, 1])
with col2:
if sentiment_type == "긍정적":
st.markdown(f"""
<div style="background-color:#DCEDC8; padding:20px; border-radius:10px; text-align:center;">
<h1 style="color:#388E3C; font-size:28px;">πŸ˜€ 긍정적 λ…Όμ‘° πŸ˜€</h1>
<p style="font-size:16px;">감정 강도: λ†’μŒ</p>
</div>
""", unsafe_allow_html=True)
elif sentiment_type == "뢀정적":
st.markdown(f"""
<div style="background-color:#FFCDD2; padding:20px; border-radius:10px; text-align:center;">
<h1 style="color:#D32F2F; font-size:28px;">😞 뢀정적 λ…Όμ‘° 😞</h1>
<p style="font-size:16px;">감정 강도: λ†’μŒ</p>
</div>
""", unsafe_allow_html=True)
else:
st.markdown(f"""
<div style="background-color:#E0E0E0; padding:20px; border-radius:10px; text-align:center;">
<h1 style="color:#616161; font-size:28px;">😐 쀑립적 λ…Όμ‘° 😐</h1>
<p style="font-size:16px;">감정 강도: 쀑간</p>
</div>
""", unsafe_allow_html=True)
# 2. 이유 μ„€λͺ…
st.markdown("### 뢄석 κ·Όκ±°")
st.markdown(f"<div style='background-color:#F5F5F5; padding:15px; border-radius:5px;'>{analysis_result.get('reason', '')}</div>", unsafe_allow_html=True)
# 3. 감정 ν‚€μ›Œλ“œ μ‹œκ°ν™”
st.markdown("### 핡심 감정 ν‚€μ›Œλ“œ")
# ν‚€μ›Œλ“œ 데이터 μ€€λΉ„
keywords = analysis_result.get('keywords', [])
if keywords:
# λ§‰λŒ€ 차트용 데이터
keyword_names = [item.get('word', '') for item in keywords]
keyword_scores = [item.get('score', 0) for item in keywords]
# λ ˆμ΄λ” 차트 생성
fig = go.Figure()
# 색상 μ„€μ •
if sentiment_type == "긍정적":
fill_color = 'rgba(76, 175, 80, 0.3)' # μ—°ν•œ μ΄ˆλ‘μƒ‰
line_color = 'rgba(76, 175, 80, 1)' # μ§„ν•œ μ΄ˆλ‘μƒ‰
elif sentiment_type == "뢀정적":
fill_color = 'rgba(244, 67, 54, 0.3)' # μ—°ν•œ 빨간색
line_color = 'rgba(244, 67, 54, 1)' # μ§„ν•œ 빨간색
else:
fill_color = 'rgba(158, 158, 158, 0.3)' # μ—°ν•œ νšŒμƒ‰
line_color = 'rgba(158, 158, 158, 1)' # μ§„ν•œ νšŒμƒ‰
# λ ˆμ΄λ” 차트 데이터 μ€€λΉ„ - λ§ˆμ§€λ§‰ 점이 첫 점과 μ—°κ²°λ˜λ„λ‘ 데이터 μΆ”κ°€
radar_keywords = keyword_names.copy()
radar_scores = keyword_scores.copy()
# λ ˆμ΄λ” 차트 생성
fig.add_trace(go.Scatterpolar(
r=radar_scores,
theta=radar_keywords,
fill='toself',
fillcolor=fill_color,
line=dict(color=line_color, width=2),
name='감정 ν‚€μ›Œλ“œ'
))
# λ ˆμ΄λ” 차트 λ ˆμ΄μ•„μ›ƒ μ„€μ •
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 10],
tickmode='linear',
tick0=0,
dtick=2
)
),
showlegend=False,
title={
'text': '감정 ν‚€μ›Œλ“œ λ ˆμ΄λ” 뢄석',
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
},
height=500,
width=500,
margin=dict(l=80, r=80, t=80, b=80)
)
# 차트 쀑앙에 ν‘œμ‹œ
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.plotly_chart(fig)
# ν‚€μ›Œλ“œ μΉ΄λ“œλ‘œ ν‘œμ‹œ
st.markdown("#### ν‚€μ›Œλ“œ μ„ΈλΆ€ μ„€λͺ…")
cols = st.columns(min(len(keywords), 5))
for i, keyword in enumerate(keywords):
with cols[i % len(cols)]:
word = keyword.get('word', '')
score = keyword.get('score', 0)
# μ μˆ˜μ— λ”°λ₯Έ 색상 계산
r, g, b = 0, 0, 0
if sentiment_type == "긍정적":
g = min(200 + score * 5, 255)
r = max(255 - score * 20, 100)
elif sentiment_type == "뢀정적":
r = min(200 + score * 5, 255)
g = max(255 - score * 20, 100)
else:
r = g = b = 128
# μΉ΄λ“œ 생성
st.markdown(f"""
<div style="background-color:rgba({r},{g},{b},0.2); padding:10px; border-radius:5px; text-align:center; margin:5px;">
<h3 style="margin:0;">{word}</h3>
<div style="background-color:#E0E0E0; border-radius:3px; margin-top:5px;">
<div style="width:{score*10}%; background-color:rgba({r},{g},{b},0.8); height:10px; border-radius:3px;"></div>
</div>
<p style="margin:2px; font-size:12px;">강도: {score}/10</p>
</div>
""", unsafe_allow_html=True)
else:
st.info("ν‚€μ›Œλ“œλ₯Ό μΆ”μΆœν•˜μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")
# 4. μš”μ•½ 톡계
st.markdown("### μ£Όμš” 톡계")
col1, col2, col3 = st.columns(3)
with col1:
st.metric(label="긍정/λΆ€μ • 점수", value=f"{7 if sentiment_type == '긍정적' else 3 if sentiment_type == '뢀정적' else 5}/10")
with col2:
st.metric(label="ν‚€μ›Œλ“œ 수", value=len(keywords))
with col3:
avg_score = sum(keyword_scores) / len(keyword_scores) if keyword_scores else 0
st.metric(label="평균 강도", value=f"{avg_score:.1f}/10")
except Exception as e:
st.error(f"감정 뢄석 였λ₯˜: {str(e)}")
st.error(traceback.format_exc())
else:
st.warning("OpenAI API ν‚€λ₯Ό μ‚¬μ΄λ“œλ°”μ—μ„œ μ„€μ •ν•΄μ£Όμ„Έμš”.")
elif menu == "μƒˆ 기사 μƒμ„±ν•˜κΈ°":
st.header("μƒˆ 기사 μƒμ„±ν•˜κΈ°")
articles = load_saved_articles()
if not articles:
st.warning("μ €μž₯된 기사가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 'λ‰΄μŠ€ 기사 크둀링' λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
else:
# νƒ­ μΆ”κ°€: 단일 κΈ°μ‚¬λ‘œ 생성 vs 닀쀑 제λͺ©μœΌλ‘œ 생성
tab1, tab2 = st.tabs(["단일 κΈ°μ‚¬λ‘œ 생성", "μ—¬λŸ¬ 제λͺ©μœΌλ‘œ 생성"])
with tab1:
# κΈ°μ‘΄ μ½”λ“œ: 단일 기사 선택
titles = [article['title'] for article in articles]
selected_title = st.selectbox("원본 기사 선택", titles, key="single_article")
selected_article = next((a for a in articles if a['title'] == selected_title), None)
if selected_article:
st.write(f"**원본 제λͺ©:** {selected_article['title']}")
with st.expander("원본 기사 λ‚΄μš©"):
st.write(selected_article['content'])
prompt_text ="""λ‹€μŒ 기사 양식을 λ”°λΌμ„œ λ‹€μ‹œ μž‘μ„±ν•΄μ€˜.
μ—­ν• : 당신은 μ‹ λ¬Έμ‚¬μ˜ κΈ°μžμž…λ‹ˆλ‹€.
μž‘μ—…: 졜근 μΌμ–΄λ‚œ 사건에 λŒ€ν•œ λ³΄λ„μžλ£Œλ₯Ό μž‘μ„±ν•΄μ•Ό ν•©λ‹ˆλ‹€. μžλ£ŒλŠ” 사싀을 기반으둜 ν•˜λ©°, 객관적이고 μ •ν™•ν•΄μ•Ό ν•©λ‹ˆλ‹€.
μ§€μΉ¨:
제곡된 정보λ₯Ό λ°”νƒ•μœΌλ‘œ μ‹ λ¬Έ λ³΄λ„μžλ£Œ ν˜•μ‹μ— 맞좰 기사λ₯Ό μž‘μ„±ν•˜μ„Έμš”.
기사 제λͺ©μ€ 주제λ₯Ό λͺ…ν™•νžˆ λ°˜μ˜ν•˜κ³  λ…μžμ˜ 관심을 끌 수 μžˆλ„λ‘ μž‘μ„±ν•©λ‹ˆλ‹€.
기사 λ‚΄μš©μ€ μ •ν™•ν•˜κ³  κ°„κ²°ν•˜λ©° 섀득λ ₯ μžˆλŠ” λ¬Έμž₯으둜 κ΅¬μ„±ν•©λ‹ˆλ‹€.
κ΄€λ ¨μžμ˜ 인터뷰λ₯Ό 인용 ν˜•νƒœλ‘œ λ„£μ–΄μ£Όμ„Έμš”.
μœ„μ˜ 정보와 지침을 μ°Έκ³ ν•˜μ—¬ μ‹ λ¬Έ λ³΄λ„μžλ£Œ ν˜•μ‹μ˜ 기사λ₯Ό μž‘μ„±ν•΄ μ£Όμ„Έμš”"""
# 이미지 생성 μ—¬λΆ€ 선택 μ˜΅μ…˜ μΆ”κ°€
generate_image_too = st.checkbox("기사 생성 ν›„ 이미지도 ν•¨κ»˜ μƒμ„±ν•˜κΈ°", value=True, key="single_image")
if st.button("μƒˆ 기사 μƒμ„±ν•˜κΈ°", key="generate_single"):
if st.session_state.openai_client:
with st.spinner("기사λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
new_article = generate_article(selected_article['content'], prompt_text)
st.write("**μƒμ„±λœ 기사:**")
st.write(new_article)
# 이미지 μƒμ„±ν•˜κΈ° (μ˜΅μ…˜μ΄ μ„ νƒλœ 경우)
if generate_image_too:
with st.spinner("기사 κ΄€λ ¨ 이미지λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
image_prompt = f"""신문기사 제λͺ© "{selected_article['title']}" 을 보고 이미지λ₯Ό λ§Œλ“€μ–΄μ€˜
μ΄λ―Έμ§€μ—λŠ” λ‹€μŒ μš”μ†Œκ°€ ν¬ν•¨λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€:
- 기사λ₯Ό 이해할 수 μžˆλŠ” 도식
- 기사 λ‚΄μš©κ³Ό κ΄€λ ¨λœ ν…μŠ€νŠΈ
- μ‹¬ν”Œν•˜κ²Œ 처리
"""
# 이미지 생성
image = generate_image(image_prompt)
if isinstance(image, BytesIO):
st.subheader("μƒμ„±λœ 이미지:")
st.image(image, use_column_width=True)
else:
st.error(image)
# μƒμ„±λœ 기사 μ €μž₯ μ˜΅μ…˜
if st.button("μƒμ„±λœ 기사 μ €μž₯", key="save_single"):
new_article_data = {
'title': f"[생성됨] {selected_article['title']}",
'source': f"AI 생성 (원본: {selected_article['source']})",
'date': datetime.now().strftime("%Y-%m-%d %H:%M"),
'description': new_article[:100] + "...",
'link': "",
'content': new_article
}
articles.append(new_article_data)
save_articles(articles)
st.success("μƒμ„±λœ 기사가 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
else:
st.warning("OpenAI API ν‚€λ₯Ό μ‚¬μ΄λ“œλ°”μ—μ„œ μ„€μ •ν•΄μ£Όμ„Έμš”.")
with tab2:
# μƒˆλ‘œμš΄ κΈ°λŠ₯: μ—¬λŸ¬ 제λͺ©μœΌλ‘œ 기사 생성
st.subheader("μ—¬λŸ¬ 제λͺ©μ„ 기반으둜 ν•˜λ‚˜μ˜ 기사 μƒμ„±ν•˜κΈ°")
# 닀쀑 선택 μœ„μ ―μœΌλ‘œ μ—¬λŸ¬ 제λͺ© 선택 κ°€λŠ₯
titles = [article['title'] for article in articles]
selected_titles = st.multiselect("μ—¬λŸ¬ 기사 제λͺ© 선택 (2개 이상 ꢌμž₯)", titles)
if selected_titles:
st.write(f"**μ„ νƒλœ 제λͺ© 수:** {len(selected_titles)}개")
with st.expander("μ„ νƒλœ 제λͺ© λͺ©λ‘"):
for i, title in enumerate(selected_titles):
st.write(f"{i+1}. {title}")
multi_prompt_text = """λ‹€μŒ λ‰΄μŠ€ 제λͺ©λ“€μ„ μ’…ν•©ν•˜μ—¬ ν•˜λ‚˜μ˜ ν†΅ν•©λœ κΈ°μ‚¬λ‘œ μž‘μ„±ν•΄μ€˜.
μ—­ν• : 당신은 μ‹ λ¬Έμ‚¬μ˜ κΈ°μžμž…λ‹ˆλ‹€.
μž‘μ—…: μ—¬λŸ¬ λ‰΄μŠ€ 제λͺ©μ—μ„œ 곡톡 주제λ₯Ό νŒŒμ•…ν•˜κ³ , 이λ₯Ό μ’…ν•©ν•œ λ³΄λ„μžλ£Œλ₯Ό μž‘μ„±ν•΄μ•Ό ν•©λ‹ˆλ‹€.
μ§€μΉ¨:
- 제곡된 μ—¬λŸ¬ 제λͺ©μ„ μ’…ν•©μ μœΌλ‘œ λΆ„μ„ν•˜μ—¬ ν•˜λ‚˜μ˜ μΌκ΄€λœ 기사λ₯Ό μž‘μ„±ν•˜μ„Έμš”.
- 기사 제λͺ©μ€ 제곡된 λͺ¨λ“  제λͺ©μ˜ 핡심 주제λ₯Ό λ‹΄μ•„μ•Ό ν•©λ‹ˆλ‹€.
- 기사 λ‚΄μš©μ€ 제λͺ©λ“€μ΄ λ‹€λ£¨λŠ” λͺ¨λ“  μ£Όμš” 주제λ₯Ό 포함해야 ν•©λ‹ˆλ‹€.
- κ΄€λ ¨μžμ˜ 가상 인터뷰λ₯Ό 인용 ν˜•νƒœλ‘œ λ„£μ–΄μ£Όμ„Έμš”.
- 제곡된 제λͺ©λ“€μ˜ λ§₯락을 μœ μ§€ν•˜λ©΄μ„œ 일관성 μžˆλŠ” λ‚΄λŸ¬ν‹°λΈŒλ₯Ό κ΅¬μ„±ν•˜μ„Έμš”."""
# ν”„λ‘¬ν”„νŠΈ νŽΈμ§‘ μ˜΅μ…˜
custom_prompt = st.checkbox("직접 ν”„λ‘¬ν”„νŠΈ μž‘μ„±ν•˜κΈ°")
if custom_prompt:
multi_prompt_text = st.text_area("ν”„λ‘¬ν”„νŠΈ 직접 μž…λ ₯", multi_prompt_text, height=250)
# 이미지 생성 μ˜΅μ…˜
generate_multi_image = st.checkbox("기사 생성 ν›„ 이미지도 ν•¨κ»˜ μƒμ„±ν•˜κΈ°", value=True, key="multi_image")
if st.button("μƒˆ 기사 μƒμ„±ν•˜κΈ°", key="generate_multi"):
if st.session_state.openai_client:
if len(selected_titles) < 1:
st.error("μ΅œμ†Œ 1개 μ΄μƒμ˜ 제λͺ©μ„ μ„ νƒν•΄μ£Όμ„Έμš”.")
else:
with st.spinner("μ—¬λŸ¬ 제λͺ©μœΌλ‘œλΆ€ν„° 기사λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
# μ„ νƒλœ 제λͺ©λ“€μ„ μ΄μš©ν•˜μ—¬ μƒˆ 기사 생성
new_article = generate_article_from_titles(selected_titles, multi_prompt_text)
st.write("**μƒμ„±λœ 기사:**")
st.write(new_article)
# 이미지 생성 (μ˜΅μ…˜μ΄ μ„ νƒλœ 경우)
if generate_multi_image:
with st.spinner("기사 κ΄€λ ¨ 이미지λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
combined_titles = " / ".join(selected_titles[:3]) # 처음 3개 제λͺ©λ§Œ μ‚¬μš©
image_prompt = f"""μ—¬λŸ¬ λ‰΄μŠ€λ₯Ό μ’…ν•©ν•œ 기사 "{combined_titles}" κ΄€λ ¨ 이미지λ₯Ό λ§Œλ“€μ–΄μ€˜.
μ΄λ―Έμ§€μ—λŠ” λ‹€μŒ μš”μ†Œκ°€ ν¬ν•¨λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€:
- μ—¬λŸ¬ λ‰΄μŠ€μ˜ 곡톡 주제λ₯Ό μ‹œκ°ν™”ν•œ 도식
- 핡심 ν‚€μ›Œλ“œλ‚˜ κ°œλ…
- μ‹¬ν”Œν•˜κ³  ν†΅ν•©λœ λ””μžμΈ
"""
# 이미지 생성
image = generate_image(image_prompt)
if isinstance(image, BytesIO):
st.subheader("μƒμ„±λœ 이미지:")
st.image(image, use_column_width=True)
else:
st.error(image)
# μƒμ„±λœ 기사 μ €μž₯ μ˜΅μ…˜
if st.button("μƒμ„±λœ 기사 μ €μž₯", key="save_multi"):
# 톡합 제λͺ© 생성 (첫 번째 제λͺ© + μΆ”κ°€ 제λͺ© 수)
if len(selected_titles) > 1:
combined_title = f"{selected_titles[0]} μ™Έ {len(selected_titles)-1}건 κ΄€λ ¨ μ†Œμ‹"
else:
combined_title = selected_titles[0]
new_article_data = {
'title': f"[μ—¬λŸ¬ 제λͺ© 톡합] {combined_title}",
'source': "AI 생성 (μ—¬λŸ¬ 제λͺ© 톡합)",
'date': datetime.now().strftime("%Y-%m-%d %H:%M"),
'description': new_article[:100] + "...",
'link': "",
'content': new_article
}
articles.append(new_article_data)
save_articles(articles)
st.success("μƒμ„±λœ 기사가 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
else:
st.warning("OpenAI API ν‚€λ₯Ό μ‚¬μ΄λ“œλ°”μ—μ„œ μ„€μ •ν•΄μ£Όμ„Έμš”.")
elif menu == "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°":
st.header("λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°")
# νƒ­ 생성
tab1, tab2, tab3 = st.tabs(["일별 μ˜ˆμ•½", "μ‹œκ°„ 간격 μ˜ˆμ•½", "μŠ€μΌ€μ€„λŸ¬ μƒνƒœ"])
# 일별 μ˜ˆμ•½ νƒ­
with tab1:
st.subheader("맀일 μ •ν•΄μ§„ μ‹œκ°„μ— 기사 μˆ˜μ§‘ν•˜κΈ°")
# ν‚€μ›Œλ“œ μž…λ ₯
daily_keyword = st.text_input("검색 ν‚€μ›Œλ“œ", value="인곡지λŠ₯", key="daily_keyword")
daily_num_articles = st.slider("μˆ˜μ§‘ν•  기사 수", min_value=1, max_value=20, value=5, key="daily_num_articles")
# μ‹œκ°„ μ„€μ •
daily_col1, daily_col2 = st.columns(2)
with daily_col1:
daily_hour = st.selectbox("μ‹œ", range(24), format_func=lambda x: f"{x:02d}μ‹œ", key="daily_hour")
with daily_col2:
daily_minute = st.selectbox("λΆ„", range(0, 60, 5), format_func=lambda x: f"{x:02d}λΆ„", key="daily_minute")
# 일별 μ˜ˆμ•½ 리슀트
if 'daily_tasks' not in st.session_state:
st.session_state.daily_tasks = []
if st.button("일별 μ˜ˆμ•½ μΆ”κ°€"):
st.session_state.daily_tasks.append({
'hour': daily_hour,
'minute': daily_minute,
'keyword': daily_keyword,
'num_articles': daily_num_articles
})
st.success(f"일별 μ˜ˆμ•½μ΄ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€: 맀일 {daily_hour:02d}:{daily_minute:02d} - '{daily_keyword}'")
# μ˜ˆμ•½ λͺ©λ‘ ν‘œμ‹œ
if st.session_state.daily_tasks:
st.subheader("일별 μ˜ˆμ•½ λͺ©λ‘")
for i, task in enumerate(st.session_state.daily_tasks):
st.write(f"{i+1}. 맀일 {task['hour']:02d}:{task['minute']:02d} - '{task['keyword']}' ({task['num_articles']}개)")
if st.button("일별 μ˜ˆμ•½ μ΄ˆκΈ°ν™”"):
st.session_state.daily_tasks = []
st.warning("일별 μ˜ˆμ•½μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# μ‹œκ°„ 간격 μ˜ˆμ•½ νƒ­
with tab2:
st.subheader("μ‹œκ°„ κ°„κ²©μœΌλ‘œ 기사 μˆ˜μ§‘ν•˜κΈ°")
# ν‚€μ›Œλ“œ μž…λ ₯
interval_keyword = st.text_input("검색 ν‚€μ›Œλ“œ", value="빅데이터", key="interval_keyword")
interval_num_articles = st.slider("μˆ˜μ§‘ν•  기사 수", min_value=1, max_value=20, value=5, key="interval_num_articles")
# μ‹œκ°„ 간격 μ„€μ •
interval_minutes = st.number_input("μ‹€ν–‰ 간격(λΆ„)", min_value=1, max_value=60*24, value=30, key="interval_minutes")
# μ¦‰μ‹œ μ‹€ν–‰ μ—¬λΆ€
run_immediately = st.checkbox("μ¦‰μ‹œ μ‹€ν–‰", value=True, help="μ²΄ν¬ν•˜λ©΄ μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘ μ‹œ μ¦‰μ‹œ μ‹€ν–‰ν•©λ‹ˆλ‹€.")
# μ‹œκ°„ 간격 μ˜ˆμ•½ 리슀트
if 'interval_tasks' not in st.session_state:
st.session_state.interval_tasks = []
if st.button("μ‹œκ°„ 간격 μ˜ˆμ•½ μΆ”κ°€"):
st.session_state.interval_tasks.append({
'interval_minutes': interval_minutes,
'keyword': interval_keyword,
'num_articles': interval_num_articles,
'run_immediately': run_immediately
})
st.success(f"μ‹œκ°„ 간격 μ˜ˆμ•½μ΄ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€: {interval_minutes}λΆ„λ§ˆλ‹€ - '{interval_keyword}'")
# μ˜ˆμ•½ λͺ©λ‘ ν‘œμ‹œ
if st.session_state.interval_tasks:
st.subheader("μ‹œκ°„ 간격 μ˜ˆμ•½ λͺ©λ‘")
for i, task in enumerate(st.session_state.interval_tasks):
immediate_text = "μ¦‰μ‹œ μ‹€ν–‰ ν›„ " if task['run_immediately'] else ""
st.write(f"{i+1}. {immediate_text}{task['interval_minutes']}λΆ„λ§ˆλ‹€ - '{task['keyword']}' ({task['num_articles']}개)")
if st.button("μ‹œκ°„ 간격 μ˜ˆμ•½ μ΄ˆκΈ°ν™”"):
st.session_state.interval_tasks = []
st.warning("μ‹œκ°„ 간격 μ˜ˆμ•½μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# μŠ€μΌ€μ€„λŸ¬ μƒνƒœ νƒ­
with tab3:
st.subheader("μŠ€μΌ€μ€„λŸ¬ μ œμ–΄ 및 μƒνƒœ")
# 둜그 λ·°μ–΄λ₯Ό 상단에 배치
st.subheader("μ‹€μ‹œκ°„ 둜그")
log_container = st.empty()
def update_logs():
try:
with open('/tmp/crawler.log', 'r') as f:
logs = f.readlines()
return ''.join(logs[-100:]) # 졜근 100μ€„λ§Œ ν‘œμ‹œ
except Exception as e:
return f"둜그 νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}"
# 둜그 μžλ™ μ—…λ°μ΄νŠΈ
if st.checkbox("둜그 μžλ™ μ—…λ°μ΄νŠΈ", value=True):
log_content = update_logs()
log_container.text_area("졜근 둜그", value=log_content, height=400)
else:
if st.button("둜그 μƒˆλ‘œκ³ μΉ¨"):
log_content = update_logs()
log_container.text_area("졜근 둜그", value=log_content, height=400)
st.divider()
# μŠ€μΌ€μ€„λŸ¬ μ œμ–΄
col1, col2 = st.columns(2)
with col1:
# μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘/쀑지 λ²„νŠΌ
if not global_scheduler_state.is_running:
if st.button("μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘"):
if not st.session_state.daily_tasks and not st.session_state.interval_tasks:
st.error("μ˜ˆμ•½λœ μž‘μ—…μ΄ μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 일별 μ˜ˆμ•½ λ˜λŠ” μ‹œκ°„ 간격 μ˜ˆμ•½μ„ μΆ”κ°€ν•΄μ£Όμ„Έμš”.")
else:
start_scheduler(st.session_state.daily_tasks, st.session_state.interval_tasks)
st.success("μŠ€μΌ€μ€„λŸ¬κ°€ μ‹œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
else:
if st.button("μŠ€μΌ€μ€„λŸ¬ 쀑지"):
stop_scheduler()
st.warning("μŠ€μΌ€μ€„λŸ¬κ°€ μ€‘μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
with col2:
# μŠ€μΌ€μ€„λŸ¬ μƒνƒœ ν‘œμ‹œ
if 'scheduler_status' in st.session_state:
st.write(f"μƒνƒœ: {'싀행쀑' if global_scheduler_state.is_running else '쀑지'}")
if global_scheduler_state.last_run:
st.write(f"λ§ˆμ§€λ§‰ μ‹€ν–‰: {global_scheduler_state.last_run.strftime('%Y-%m-%d %H:%M:%S')}")
if global_scheduler_state.next_run and global_scheduler_state.is_running:
st.write(f"λ‹€μŒ μ‹€ν–‰: {global_scheduler_state.next_run.strftime('%Y-%m-%d %H:%M:%S')}")
else:
st.write("μƒνƒœ: 쀑지")
# μ˜ˆμ•½λœ μž‘μ—… λͺ©λ‘
if global_scheduler_state.scheduled_jobs:
st.subheader("ν˜„μž¬ μ‹€ν–‰ 쀑인 μ˜ˆμ•½ μž‘μ—…")
for i, job in enumerate(global_scheduler_state.scheduled_jobs):
if job['type'] == 'daily':
st.write(f"{i+1}. [일별] 맀일 {job['time']} - '{job['keyword']}' ({job['num_articles']}개)")
else:
immediate_text = "[μ¦‰μ‹œ μ‹€ν–‰ ν›„] " if job.get('run_immediately', False) else ""
st.write(f"{i+1}. [간격] {immediate_text}{job['interval']} - '{job['keyword']}' ({job['num_articles']}개)")
# μŠ€μΌ€μ€„λŸ¬ μ‹€ν–‰ κ²°κ³Ό
if global_scheduler_state.scheduled_results:
st.subheader("μŠ€μΌ€μ€„λŸ¬ μ‹€ν–‰ κ²°κ³Ό")
# κ²°κ³Όλ₯Ό UI에 ν‘œμ‹œν•˜κΈ° 전에 볡사
results_for_display = global_scheduler_state.scheduled_results.copy()
if results_for_display:
result_df = pd.DataFrame(results_for_display)
result_df['μ‹€ν–‰μ‹œκ°„'] = result_df['timestamp'].apply(lambda x: datetime.strptime(x, "%Y%m%d_%H%M%S").strftime("%Y-%m-%d %H:%M:%S"))
result_df = result_df.rename(columns={
'task_type': 'μž‘μ—…μœ ν˜•',
'keyword': 'ν‚€μ›Œλ“œ',
'num_articles': 'κΈ°μ‚¬μˆ˜',
'filename': '파일λͺ…'
})
result_df['μž‘μ—…μœ ν˜•'] = result_df['μž‘μ—…μœ ν˜•'].apply(lambda x: '일별' if x == 'daily' else 'μ‹œκ°„κ°„κ²©')
st.dataframe(
result_df[['μž‘μ—…μœ ν˜•', 'ν‚€μ›Œλ“œ', 'κΈ°μ‚¬μˆ˜', 'μ‹€ν–‰μ‹œκ°„', '파일λͺ…']],
hide_index=True
)
# μˆ˜μ§‘λœ 파일 보기
if os.path.exists('/tmp/scheduled_news'):
files = [f for f in os.listdir('/tmp/scheduled_news') if f.endswith('.json')]
if files:
st.subheader("μˆ˜μ§‘λœ 파일 μ—΄κΈ°")
selected_file = st.selectbox("파일 선택", files, index=len(files)-1)
if selected_file and st.button("파일 λ‚΄μš© 보기"):
with open(os.path.join('/tmp/scheduled_news', selected_file), 'r', encoding='utf-8') as f:
articles = json.load(f)
st.write(f"**파일λͺ…:** {selected_file}")
st.write(f"**μˆ˜μ§‘ 기사 수:** {len(articles)}개")
for article in articles:
with st.expander(f"{article['title']} - {article['source']}"):
st.write(f"**좜처:** {article['source']}")
st.write(f"**λ‚ μ§œ:** {article['date']}")
st.write(f"**링크:** {article['link']}")
st.write("**λ³Έλ¬Έ:**")
st.write(article['content'][:500] + "..." if len(article['content']) > 500 else article['content'])
# ν‘Έν„°
st.markdown("---")
st.markdown("Β© λ‰΄μŠ€ 기사 도ꡬ @conanssam")