JUNGU commited on
Commit
4413498
ยท
verified ยท
1 Parent(s): dad4fc1

Update src/streamlit_app.py

Browse files

script_dir = os.path.dirname(os.path.abspath(__file__))
candidate = os.path.join(script_dir, fname)

Files changed (1) hide show
  1. src/streamlit_app.py +214 -191
src/streamlit_app.py CHANGED
@@ -11,13 +11,28 @@ from collections import Counter
11
  import json
12
  import os
13
  from datetime import datetime, timedelta
14
- import openai # ๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹ ์‚ฌ์šฉ
15
  from dotenv import load_dotenv
16
  import traceback
17
  import plotly.graph_objects as go
18
  import schedule
19
  import threading
20
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # ์›Œ๋“œํด๋ผ์šฐ๋“œ ์ถ”๊ฐ€
23
  try:
@@ -40,41 +55,52 @@ class SchedulerState:
40
  global_scheduler_state = SchedulerState()
41
 
42
  # API ํ‚ค ๊ด€๋ฆฌ๋ฅผ ์œ„ํ•œ ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”
43
- if 'openai_api_key' not in st.session_state:
44
- st.session_state.openai_api_key = None
45
 
46
  # ์—ฌ๋Ÿฌ ๋ฐฉ๋ฒ•์œผ๋กœ API ํ‚ค ๋กœ๋“œ ์‹œ๋„
47
  load_dotenv() # .env ํŒŒ์ผ์—์„œ ๋กœ๋“œ ์‹œ๋„
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # 1. ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ํ™•์ธ
50
- if os.environ.get('OPENAI_API_KEY'):
51
- st.session_state.openai_api_key = os.environ.get('OPENAI_API_KEY')
52
- openai.api_key = st.session_state.openai_api_key
53
 
54
- # 2. Streamlit secrets์—์„œ API ํ‚ค ํ™•์ธ (try-except๋กœ ์˜ค๋ฅ˜ ๋ฐฉ์ง€)
55
- if not st.session_state.openai_api_key:
56
  try:
57
  if 'OPENAI_API_KEY' in st.secrets:
58
- st.session_state.openai_api_key = st.secrets['OPENAI_API_KEY']
59
- openai.api_key = st.session_state.openai_api_key
60
  except Exception as e:
61
  pass # secrets ํŒŒ์ผ์ด ์—†์–ด๋„ ์˜ค๋ฅ˜ ๋ฐœ์ƒํ•˜์ง€ ์•Š์Œ
62
 
63
- # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•˜๋„๋ก NLTK ๋ฐ์ดํ„ฐ ๊ฒฝ๋กœ ์„ค์ •
64
- nltk_data_dir = '/tmp/nltk_data'
65
- os.makedirs(nltk_data_dir, exist_ok=True)
66
- nltk.data.path.insert(0, nltk_data_dir) # ์ด ๊ฒฝ๋กœ๋ฅผ ์šฐ์„  ๊ฒ€์ƒ‰ํ•˜๋„๋ก ์„ค์ •
67
 
68
- # ํ•„์š”ํ•œ NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
69
  try:
70
  nltk.data.find('tokenizers/punkt')
71
  except LookupError:
72
- nltk.download('punkt', download_dir=nltk_data_dir)
73
 
74
  try:
75
  nltk.data.find('corpora/stopwords')
76
  except LookupError:
77
- nltk.download('stopwords', download_dir=nltk_data_dir)
78
 
79
  # ํŽ˜์ด์ง€ ์„ค์ •
80
  st.set_page_config(page_title="๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ", page_icon="๐Ÿ“ฐ", layout="wide")
@@ -90,9 +116,12 @@ with st.sidebar:
90
  st.divider()
91
  api_key = st.text_input("OpenAI API ํ‚ค ์ž…๋ ฅ", type="password")
92
  if api_key:
93
- st.session_state.openai_api_key = api_key
94
- openai.api_key = api_key
95
- st.success("API ํ‚ค๊ฐ€ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
 
 
 
96
 
97
  # ์ €์žฅ๋œ ๊ธฐ์‚ฌ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ํ•จ์ˆ˜
98
  def load_saved_articles():
@@ -112,16 +141,21 @@ def crawl_naver_news(keyword, num_articles=5):
112
  """
113
  ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ์ˆ˜์ง‘ํ•˜๋Š” ํ•จ์ˆ˜
114
  """
 
115
  url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
116
  results = []
117
 
118
  try:
119
  # ํŽ˜์ด์ง€ ์š”์ฒญ
 
120
  response = requests.get(url)
 
 
121
  soup = BeautifulSoup(response.text, 'html.parser')
122
 
123
  # ๋‰ด์Šค ์•„์ดํ…œ ์ฐพ๊ธฐ
124
  news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
 
125
 
126
  # ๊ฐ ๋‰ด์Šค ์•„์ดํ…œ์—์„œ ์ •๋ณด ์ถ”์ถœ
127
  for i, item in enumerate(news_items):
@@ -156,48 +190,68 @@ def crawl_naver_news(keyword, num_articles=5):
156
  'description': description,
157
  'source': source,
158
  'date': date,
159
- 'content': "" # ๋‚˜์ค‘์— ์›๋ฌธ ๋‚ด์šฉ์„ ์ €์žฅํ•  ํ•„๋“œ
160
  })
161
 
 
 
162
  except Exception as e:
163
- st.error(f"๊ธฐ์‚ฌ ์ •๋ณด ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
164
  continue
165
 
166
  except Exception as e:
167
- st.error(f"ํŽ˜์ด์ง€ ์š”์ฒญ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
168
 
 
169
  return results
170
 
171
  # ๊ธฐ์‚ฌ ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ
172
  def get_article_content(url):
 
173
  try:
174
  response = requests.get(url, timeout=5)
 
 
175
  soup = BeautifulSoup(response.text, 'html.parser')
176
 
177
  # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ณธ๋ฌธ ์ฐพ๊ธฐ
178
  content = soup.select_one('#dic_area')
179
  if content:
180
  text = content.text.strip()
181
- text = re.sub(r'\s+', ' ', text) # ์—ฌ๋Ÿฌ ๊ณต๋ฐฑ ์ œ๊ฑฐ
 
182
  return text
183
 
184
- # ๋‹ค๋ฅธ ๋‰ด์Šค ์‚ฌ์ดํŠธ ๋ณธ๋ฌธ ์ฐพ๊ธฐ (์—ฌ๋Ÿฌ ์‚ฌ์ดํŠธ ๋Œ€์‘ ํ•„์š”)
185
  content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
186
  if content:
187
  text = content.text.strip()
188
  text = re.sub(r'\s+', ' ', text)
 
189
  return text
190
 
 
191
  return "๋ณธ๋ฌธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
192
  except Exception as e:
 
193
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
194
 
195
- # NLTK๋ฅผ ์ด์šฉํ•œ ํ‚ค์›Œ๋“œ ๋ถ„์„
196
  def analyze_keywords(text, top_n=10):
197
- # ํ•œ๊ตญ์–ด ๋ถˆ์šฉ์–ด ๋ชฉ๋ก (์ง์ ‘ ์ •์˜ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
198
  korean_stopwords = ['์ด', '๊ทธ', '์ €', '๊ฒƒ', '๋ฐ', '๋“ฑ', '๋ฅผ', '์„', '์—', '์—์„œ', '์˜', '์œผ๋กœ', '๋กœ']
199
 
200
- tokens = word_tokenize(text)
 
 
 
 
 
 
 
 
 
 
201
  tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
202
 
203
  word_count = Counter(tokens)
@@ -274,12 +328,15 @@ def generate_wordcloud(keywords_dict):
274
  ).generate_from_frequencies(keywords_dict)
275
 
276
  try:
277
- possible_font_paths=["NanumGothic.ttf", "์ด๋ฆ„"]
 
 
278
 
279
  font_path = None
280
  for path in possible_font_paths:
281
- if os.path.exists(path):
282
- font_path = path
 
283
  break
284
 
285
  if font_path:
@@ -325,36 +382,55 @@ def analyze_news_content(news_df):
325
  results['top_keywords'] = []
326
  return results
327
 
328
- # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑ (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
329
  def generate_article(original_content, prompt_text):
330
  try:
331
- if not st.session_state.openai_api_key:
332
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
333
 
334
- response = openai.ChatCompletion.create(
335
- model="gpt-4.1-mini",
336
  messages=[
337
  {"role": "system", "content": "๋‹น์‹ ์€ ์ „๋ฌธ์ ์ธ ๋‰ด์Šค ๊ธฐ์ž์ž…๋‹ˆ๋‹ค. ์ฃผ์–ด์ง„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ƒˆ๋กœ์šด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."},
338
  {"role": "user", "content": f"๋‹ค์Œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ {prompt_text}\n\n{original_content[:1000]}"}
339
  ],
340
  max_tokens=2000
341
  )
342
- return response.choices[0].message['content']
343
  except Exception as e:
344
  return f"๊ธฐ์‚ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
345
 
346
- # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ด๋ฏธ์ง€ ์ƒ์„ฑ (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
347
  def generate_image(prompt):
348
  try:
349
- if not st.session_state.openai_api_key:
350
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
351
 
352
- response = openai.Image.create(
 
 
353
  prompt=prompt,
354
- n=1,
355
  size="1024x1024"
356
  )
357
- return response['data'][0]['url']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  except Exception as e:
359
  return f"์ด๋ฏธ์ง€ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
360
 
@@ -376,11 +452,14 @@ def run_scheduled_task():
376
  traceback.print_exc()
377
 
378
  def perform_news_task(task_type, keyword, num_articles, file_prefix):
 
379
  try:
380
  articles = crawl_naver_news(keyword, num_articles)
 
381
 
382
  # ๊ธฐ์‚ฌ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ
383
- for article in articles:
 
384
  article['content'] = get_article_content(article['link'])
385
  time.sleep(0.5) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
386
 
@@ -392,10 +471,12 @@ def perform_news_task(task_type, keyword, num_articles, file_prefix):
392
  with open(filename, 'w', encoding='utf-8') as f:
393
  json.dump(articles, f, ensure_ascii=False, indent=2)
394
 
 
 
395
  global_scheduler_state.last_run = datetime.now()
396
  print(f"{datetime.now()} - {task_type} ๋‰ด์Šค ๊ธฐ์‚ฌ ์ˆ˜์ง‘ ์™„๋ฃŒ: {keyword}")
397
 
398
- # ์ „์—ญ ์ƒํƒœ์— ์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅ (UI ์—…๋ฐ์ดํŠธ์šฉ)
399
  result_item = {
400
  'task_type': task_type,
401
  'keyword': keyword,
@@ -406,7 +487,7 @@ def perform_news_task(task_type, keyword, num_articles, file_prefix):
406
  global_scheduler_state.scheduled_results.append(result_item)
407
 
408
  except Exception as e:
409
- print(f"์ž‘์—… ์‹คํ–‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
410
  traceback.print_exc()
411
 
412
  def start_scheduler(daily_tasks, interval_tasks):
@@ -563,9 +644,25 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
563
  with keyword_tab1:
564
  keywords = analyze_keywords(selected_article['content'])
565
 
566
- # ์‹œ๊ฐํ™”
567
  df = pd.DataFrame(keywords, columns=['๋‹จ์–ด', '๋นˆ๋„์ˆ˜'])
568
- st.bar_chart(df.set_index('๋‹จ์–ด'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
 
570
  st.write("**์ฃผ์š” ํ‚ค์›Œ๋“œ:**")
571
  for word, count in keywords:
@@ -595,7 +692,14 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
595
  # ํ…์ŠคํŠธ ํ†ต๊ณ„ ๊ณ„์‚ฐ
596
  word_count = len(re.findall(r'\b\w+\b', content))
597
  char_count = len(content)
598
- sentence_count = len(re.split(r'[.!?]+', content))
 
 
 
 
 
 
 
599
  avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
600
  avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
601
 
@@ -615,136 +719,50 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
615
  with col2:
616
  st.metric("ํ‰๊ท  ๋ฌธ์žฅ ๊ธธ์ด", f"{avg_sentence_length:.1f}๋‹จ์–ด")
617
 
618
- # ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
619
  complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
620
  st.progress(complexity_score / 10)
621
  st.write(f"ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜: {complexity_score:.1f}/10")
622
-
623
- # ์ถœํ˜„ ๋นˆ๋„ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„
624
- st.subheader("ํ’ˆ์‚ฌ๋ณ„ ๋ถ„ํฌ (ํ•œ๊ตญ์–ด/์˜์–ด ์ง€์›)")
625
- try:
626
- # KoNLPy ์„ค์น˜ ํ™•์ธ
627
- try:
628
- from konlpy.tag import Okt
629
- konlpy_installed = True
630
- except ImportError:
631
- konlpy_installed = False
632
- st.warning("ํ•œ๊ตญ์–ด ํ˜•ํƒœ์†Œ ๋ถ„์„์„ ์œ„ํ•ด KoNLPy๋ฅผ ์„ค์น˜ํ•ด์ฃผ์„ธ์š”: pip install konlpy")
633
-
634
- # ์˜์–ด POS tagger ์ค€๋น„
635
- from nltk import pos_tag
636
- try:
637
- nltk.data.find('taggers/averaged_perceptron_tagger')
638
- except LookupError:
639
- nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
640
-
641
- # ์–ธ์–ด ๊ฐ์ง€ (๊ฐ„๋‹จํ•œ ๋ฐฉ์‹)
642
- is_korean = bool(re.search(r'[๊ฐ€-ํžฃ]', content))
643
-
644
- if is_korean and konlpy_installed:
645
- # ํ•œ๊ตญ์–ด ํ˜•ํƒœ์†Œ ๋ถ„์„
646
- okt = Okt()
647
- tagged = okt.pos(content)
648
-
649
- # ํ•œ๊ตญ์–ด ํ’ˆ์‚ฌ ๋งคํ•‘
650
- pos_dict = {
651
- 'Noun': '๋ช…์‚ฌ', 'NNG': '๋ช…์‚ฌ', 'NNP': '๊ณ ์œ ๋ช…์‚ฌ',
652
- 'Verb': '๋™์‚ฌ', 'VV': '๋™์‚ฌ', 'VA': 'ํ˜•์šฉ์‚ฌ',
653
- 'Adjective': 'ํ˜•์šฉ์‚ฌ',
654
- 'Adverb': '๋ถ€์‚ฌ',
655
- 'Josa': '์กฐ์‚ฌ', 'Punctuation': '๊ตฌ๋‘์ ',
656
- 'Determiner': '๊ด€ํ˜•์‚ฌ', 'Exclamation': '๊ฐํƒ„์‚ฌ'
657
- }
658
-
659
- pos_counts = {'๋ช…์‚ฌ': 0, '๋™์‚ฌ': 0, 'ํ˜•์šฉ์‚ฌ': 0, '๋ถ€์‚ฌ': 0, '์กฐ์‚ฌ': 0, '๊ตฌ๋‘์ ': 0, '๊ด€ํ˜•์‚ฌ': 0, '๊ฐํƒ„์‚ฌ': 0, '๊ธฐํƒ€': 0}
660
-
661
- for _, pos in tagged:
662
- if pos in pos_dict:
663
- pos_counts[pos_dict[pos]] += 1
664
- elif pos.startswith('N'): # ๊ธฐํƒ€ ๋ช…์‚ฌ๋ฅ˜
665
- pos_counts['๋ช…์‚ฌ'] += 1
666
- elif pos.startswith('V'): # ๊ธฐํƒ€ ๋™์‚ฌ๋ฅ˜
667
- pos_counts['๋™์‚ฌ'] += 1
668
- else:
669
- pos_counts['๊ธฐํƒ€'] += 1
670
-
671
- else:
672
- # ์˜์–ด POS ํƒœ๊น…
673
- tokens = word_tokenize(content.lower())
674
- tagged = pos_tag(tokens)
675
-
676
- # ์˜์–ด ํ’ˆ์‚ฌ ๋งคํ•‘
677
- pos_dict = {
678
- 'NN': '๋ช…์‚ฌ', 'NNS': '๋ช…์‚ฌ', 'NNP': '๊ณ ์œ ๋ช…์‚ฌ', 'NNPS': '๊ณ ์œ ๋ช…์‚ฌ',
679
- 'VB': '๋™์‚ฌ', 'VBD': '๋™์‚ฌ', 'VBG': '๋™์‚ฌ', 'VBN': '๋™์‚ฌ', 'VBP': '๋™์‚ฌ', 'VBZ': '๋™์‚ฌ',
680
- 'JJ': 'ํ˜•์šฉ์‚ฌ', 'JJR': 'ํ˜•์šฉ์‚ฌ', 'JJS': 'ํ˜•์šฉ์‚ฌ',
681
- 'RB': '๋ถ€์‚ฌ', 'RBR': '๋ถ€์‚ฌ', 'RBS': '๋ถ€์‚ฌ'
682
- }
683
-
684
- pos_counts = {'๋ช…์‚ฌ': 0, '๋™์‚ฌ': 0, 'ํ˜•์šฉ์‚ฌ': 0, '๋ถ€์‚ฌ': 0, '๊ธฐํƒ€': 0}
685
-
686
- for _, pos in tagged:
687
- if pos in pos_dict:
688
- pos_counts[pos_dict[pos]] += 1
689
- else:
690
- pos_counts['๊ธฐํƒ€'] += 1
691
-
692
- # ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
693
- pos_df = pd.DataFrame({
694
- 'ํ’ˆ์‚ฌ': list(pos_counts.keys()),
695
- '๋นˆ๋„': list(pos_counts.values())
696
- })
697
-
698
- st.bar_chart(pos_df.set_index('ํ’ˆ์‚ฌ'))
699
-
700
- if is_korean:
701
- st.info("ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ๊ฐ์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
702
- else:
703
- st.info("์˜์–ด ํ…์ŠคํŠธ๊ฐ€ ๊ฐ์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
704
- except Exception as e:
705
- st.error(f"ํ’ˆ์‚ฌ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
706
- st.error(traceback.format_exc())
707
 
708
  elif analysis_type == "๊ฐ์ • ๋ถ„์„":
709
  if st.button("๊ฐ์ • ๋ถ„์„ํ•˜๊ธฐ"):
710
- if st.session_state.openai_api_key:
711
  with st.spinner("๊ธฐ์‚ฌ์˜ ๊ฐ์ •์„ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
712
  try:
713
- # ๊ฐ์ • ๋ถ„์„ ํ”„๋กฌํ”„ํŠธ ์„ค์ • (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
714
- prompt = """
715
- ๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๊ณ , ์•„๋ž˜ ์˜ˆ์‹œ์ฒ˜๋Ÿผ JSON๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
716
- ๋ถˆํ•„์š”ํ•œ ์„ค๋ช…, ์ธ์‚ฌ๋ง, ๊ธฐํƒ€ ํ…์ŠคํŠธ ์—†์ด ๋ฐ˜๋“œ์‹œ JSON๋งŒ ์ถœ๋ ฅํ•˜์„ธ์š”.
717
-
718
- ์˜ˆ์‹œ:
719
- {
720
- "sentiment": "๊ธ์ •์ ",
721
- "reason": "์ด ๊ธฐ์‚ฌ์—์„œ๋Š” ๊ธ์ •์ ์ธ ๋‹จ์–ด์™€ ํ‘œํ˜„์ด ๋งŽ์ด ์‚ฌ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
722
- "keywords": [
723
- {"word": "ํฌ๋ง", "score": 8},
724
- {"word": "์„ฑ๊ณต", "score": 7},
725
- {"word": "๊ธฐ๋Œ€", "score": 6},
726
- {"word": "์„ฑ์žฅ", "score": 7},
727
- {"word": "ํ˜์‹ ", "score": 8}
728
- ]
729
- }
730
-
731
- ๋ถ„์„ํ•  ๊ธฐ์‚ฌ:
732
- ์ œ๋ชฉ: {title}
733
- ๋‚ด์šฉ: {content}
734
- """
735
-
736
- # ๊ฐ์ • ๋ถ„์„
737
- response = openai.ChatCompletion.create(
738
  model="gpt-4.1-mini",
739
  messages=[
740
- {"role": "system", "content": "๋‹น์‹ ์€ ํ…์ŠคํŠธ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๊ณ , '๊ธ์ •์ ', '๋ถ€์ •์ ', '์ค‘๋ฆฝ์ ' ์ค‘ ํ•˜๋‚˜๋กœ ๋ถ„๋ฅ˜ํ•ด ์ฃผ์„ธ์š”. ๋˜ํ•œ ๊ธฐ์‚ฌ์—์„œ ๋“œ๋Ÿฌ๋‚˜๋Š” ํ•ต์‹ฌ ๊ฐ์ • ํ‚ค์›Œ๋“œ๋ฅผ 5๊ฐœ ์ถ”์ถœํ•˜๊ณ , ๊ฐ ํ‚ค์›Œ๋“œ๋ณ„๋กœ 1-10 ์‚ฌ์ด์˜ ๊ฐ•๋„ ์ ์ˆ˜๋ฅผ ๋งค๊ฒจ์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์‘๋‹ตํ•ด์ฃผ์„ธ์š”: {'sentiment': '๊ธ์ •์ /๋ถ€์ •์ /์ค‘๋ฆฝ์ ', 'reason': '์ด์œ  ์„ค๋ช…...', 'keywords': [{'word': 'ํ‚ค์›Œ๋“œ1', 'score': 8}, {'word': 'ํ‚ค์›Œ๋“œ2', 'score': 7}, ...]}"},
741
- {"role": "user", "content": prompt.format(title=selected_article['title'], content=selected_article['content'][:1500])}
 
 
 
 
 
 
 
 
 
742
  ],
743
- max_tokens=800
 
744
  )
745
 
746
- # JSON ํŒŒ์‹ฑ (๊ตฌ ๋ฒ„์ „ ๋ฐฉ์‹)
747
- analysis_result = json.loads(response.choices[0].message['content'])
 
 
 
 
 
 
 
 
 
 
748
 
749
  # ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
750
  st.subheader("๊ฐ์ • ๋ถ„์„ ๊ฒฐ๊ณผ")
@@ -893,9 +911,9 @@ elif menu == "๊ธฐ์‚ฌ ๋ถ„์„ํ•˜๊ธฐ":
893
 
894
  except Exception as e:
895
  st.error(f"๊ฐ์ • ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
896
- st.code(traceback.format_exc())
897
  else:
898
- st.warning("OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์‚ฌ์ด๋“œ๋ฐ”์—์„œ API ํ‚ค๋ฅผ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
899
 
900
  elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
901
  st.header("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ")
@@ -930,7 +948,7 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
930
  generate_image_too = st.checkbox("๊ธฐ์‚ฌ ์ƒ์„ฑ ํ›„ ์ด๋ฏธ์ง€๋„ ํ•จ๊ป˜ ์ƒ์„ฑํ•˜๊ธฐ", value=True)
931
 
932
  if st.button("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ"):
933
- if st.session_state.openai_api_key:
934
  with st.spinner("๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
935
  new_article = generate_article(selected_article['content'], prompt_text)
936
 
@@ -940,7 +958,6 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
940
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑํ•˜๊ธฐ (์˜ต์…˜์ด ์„ ํƒ๋œ ๊ฒฝ์šฐ)
941
  if generate_image_too:
942
  with st.spinner("๊ธฐ์‚ฌ ๊ด€๋ จ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
943
- # ์ด๋ฏธ์ง€ ์ƒ์„ฑ ํ”„๋กฌํ”„ํŠธ ์ค€๋น„
944
  image_prompt = f"""์‹ ๋ฌธ๊ธฐ์‚ฌ ์ œ๋ชฉ "{selected_article['title']}" ์„ ๋ณด๊ณ  ์ด๋ฏธ์ง€๋ฅผ ๋งŒ๋“ค์–ด์ค˜
945
  ์ด๋ฏธ์ง€์—๋Š” ๋‹ค์Œ ์š”์†Œ๊ฐ€ ํฌํ•จ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
946
  - ๊ธฐ์‚ฌ๋ฅผ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ๋„์‹
@@ -949,13 +966,13 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
949
  """
950
 
951
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑ
952
- image_url = generate_image(image_prompt)
953
 
954
- if image_url and not image_url.startswith("์ด๋ฏธ์ง€ ์ƒ์„ฑ ์˜ค๋ฅ˜"):
955
  st.subheader("์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€:")
956
- st.image(image_url)
957
  else:
958
- st.error(image_url)
959
 
960
  # ์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ ์˜ต์…˜
961
  if st.button("์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ"):
@@ -973,8 +990,6 @@ elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
973
  else:
974
  st.warning("OpenAI API ํ‚ค๋ฅผ ์‚ฌ์ด๋“œ๋ฐ”์—์„œ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
975
 
976
-
977
-
978
  elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
979
  st.header("๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ")
980
 
@@ -1061,6 +1076,30 @@ elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
1061
  with tab3:
1062
  st.subheader("์Šค์ผ€์ค„๋Ÿฌ ์ œ์–ด ๋ฐ ์ƒํƒœ")
1063
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
  col1, col2 = st.columns(2)
1065
 
1066
  with col1:
@@ -1144,20 +1183,4 @@ elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
1144
 
1145
  # ํ‘ธํ„ฐ
1146
  st.markdown("---")
1147
- st.markdown("ยฉ ๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ @conanssam")
1148
-
1149
- def extract_json_from_response(response_text):
1150
- # JSON ๊ฐ์ฒด ๋ถ€๋ถ„๋งŒ ์ถ”์ถœ (๊ฐ€์žฅ ๋จผ์ € ๋‚˜์˜ค๋Š” ์ค‘๊ด„ํ˜ธ ์Œ)
1151
- match = re.search(r'\{.*\}', response_text, re.DOTALL)
1152
- if match:
1153
- json_str = match.group(0)
1154
- try:
1155
- return json.loads(json_str)
1156
- except Exception as e:
1157
- st.error(f"JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {str(e)}")
1158
- st.code(json_str)
1159
- return None
1160
- else:
1161
- st.error("์‘๋‹ต์—์„œ JSON์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
1162
- st.code(response_text)
1163
- return None
 
11
  import json
12
  import os
13
  from datetime import datetime, timedelta
14
+ from openai import OpenAI # ์ƒˆ๋กœ์šด import ๋ฐฉ์‹
15
  from dotenv import load_dotenv
16
  import traceback
17
  import plotly.graph_objects as go
18
  import schedule
19
  import threading
20
  import matplotlib.pyplot as plt
21
+ import kss # KoNLPy ๋Œ€์‹  KSS ์‚ฌ์šฉ
22
+ from PIL import Image
23
+ import base64
24
+ from io import BytesIO
25
+ import logging
26
+
27
+ # ๋กœ๊น… ์„ค์ •
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(levelname)s - %(message)s',
31
+ handlers=[
32
+ logging.StreamHandler(),
33
+ logging.FileHandler('/tmp/crawler.log')
34
+ ]
35
+ )
36
 
37
  # ์›Œ๋“œํด๋ผ์šฐ๋“œ ์ถ”๊ฐ€
38
  try:
 
55
  global_scheduler_state = SchedulerState()
56
 
57
  # API ํ‚ค ๊ด€๋ฆฌ๋ฅผ ์œ„ํ•œ ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”
58
+ if 'openai_client' not in st.session_state:
59
+ st.session_state.openai_client = None
60
 
61
  # ์—ฌ๋Ÿฌ ๋ฐฉ๋ฒ•์œผ๋กœ API ํ‚ค ๋กœ๋“œ ์‹œ๋„
62
  load_dotenv() # .env ํŒŒ์ผ์—์„œ ๋กœ๋“œ ์‹œ๋„
63
 
64
+ # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”๋ฅผ ์œ„ํ•œ ํ•จ์ˆ˜
65
+ def init_openai_client(api_key=None):
66
+ try:
67
+ if api_key:
68
+ client = OpenAI(api_key=api_key)
69
+ # ๊ฐ„๋‹จํ•œ API ํ‚ค ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
70
+ client.models.list() # API ํ‚ค๊ฐ€ ์œ ํšจํ•œ์ง€ ํ…Œ์ŠคํŠธ
71
+ return client
72
+ return None
73
+ except Exception as e:
74
+ st.error(f"API ํ‚ค ์ดˆ๊ธฐํ™” ์˜ค๋ฅ˜: {str(e)}")
75
+ return None
76
+
77
  # 1. ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ํ™•์ธ
78
+ api_key = os.environ.get('OPENAI_API_KEY')
79
+ if api_key:
80
+ st.session_state.openai_client = init_openai_client(api_key)
81
 
82
+ # 2. Streamlit secrets์—์„œ API ํ‚ค ํ™•์ธ
83
+ if not st.session_state.openai_client:
84
  try:
85
  if 'OPENAI_API_KEY' in st.secrets:
86
+ st.session_state.openai_client = init_openai_client(st.secrets['OPENAI_API_KEY'])
 
87
  except Exception as e:
88
  pass # secrets ํŒŒ์ผ์ด ์—†์–ด๋„ ์˜ค๋ฅ˜ ๋ฐœ์ƒํ•˜์ง€ ์•Š์Œ
89
 
90
+ # NLTK ๋ฐ์ดํ„ฐ ๊ฒฝ๋กœ ์„ค์ • - ํ˜„์žฌ ์›Œํฌ์ŠคํŽ˜์ด์Šค์˜ nltk_data ์‚ฌ์šฉ
91
+ nltk_data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'nltk_data')
92
+ nltk.data.path.insert(0, nltk_data_path)
 
93
 
94
+ # ํ•„์š”ํ•œ NLTK ๋ฐ์ดํ„ฐ ํ™•์ธ
95
  try:
96
  nltk.data.find('tokenizers/punkt')
97
  except LookupError:
98
+ nltk.download('punkt', download_dir=nltk_data_path)
99
 
100
  try:
101
  nltk.data.find('corpora/stopwords')
102
  except LookupError:
103
+ nltk.download('stopwords', download_dir=nltk_data_path)
104
 
105
  # ํŽ˜์ด์ง€ ์„ค์ •
106
  st.set_page_config(page_title="๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ", page_icon="๐Ÿ“ฐ", layout="wide")
 
116
  st.divider()
117
  api_key = st.text_input("OpenAI API ํ‚ค ์ž…๋ ฅ", type="password")
118
  if api_key:
119
+ client = init_openai_client(api_key)
120
+ if client:
121
+ st.session_state.openai_client = client
122
+ st.success("API ํ‚ค๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
123
+ else:
124
+ st.error("์œ ํšจํ•˜์ง€ ์•Š์€ API ํ‚ค์ž…๋‹ˆ๋‹ค.")
125
 
126
  # ์ €์žฅ๋œ ๊ธฐ์‚ฌ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ํ•จ์ˆ˜
127
  def load_saved_articles():
 
141
  """
142
  ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ์ˆ˜์ง‘ํ•˜๋Š” ํ•จ์ˆ˜
143
  """
144
+ logging.info(f"ํฌ๋กค๋ง ์‹œ์ž‘: ํ‚ค์›Œ๋“œ={keyword}, ๊ธฐ์‚ฌ ์ˆ˜={num_articles}")
145
  url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
146
  results = []
147
 
148
  try:
149
  # ํŽ˜์ด์ง€ ์š”์ฒญ
150
+ logging.info(f"์š”์ฒญ URL: {url}")
151
  response = requests.get(url)
152
+ logging.info(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
153
+
154
  soup = BeautifulSoup(response.text, 'html.parser')
155
 
156
  # ๋‰ด์Šค ์•„์ดํ…œ ์ฐพ๊ธฐ
157
  news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
158
+ logging.info(f"์ฐพ์€ ๋‰ด์Šค ์•„์ดํ…œ ์ˆ˜: {len(news_items)}")
159
 
160
  # ๊ฐ ๋‰ด์Šค ์•„์ดํ…œ์—์„œ ์ •๋ณด ์ถ”์ถœ
161
  for i, item in enumerate(news_items):
 
190
  'description': description,
191
  'source': source,
192
  'date': date,
193
+ 'content': ""
194
  })
195
 
196
+ logging.info(f"๊ธฐ์‚ฌ ์ถ”์ถœ ์„ฑ๊ณต: {title}")
197
+
198
  except Exception as e:
199
+ logging.error(f"๊ธฐ์‚ฌ ์ •๋ณด ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
200
  continue
201
 
202
  except Exception as e:
203
+ logging.error(f"ํŽ˜์ด์ง€ ์š”์ฒญ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
204
 
205
+ logging.info(f"ํฌ๋กค๋ง ์™„๋ฃŒ: {len(results)}๊ฐœ ๊ธฐ์‚ฌ ์ˆ˜์ง‘")
206
  return results
207
 
208
  # ๊ธฐ์‚ฌ ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ
209
  def get_article_content(url):
210
+ logging.info(f"๊ธฐ์‚ฌ ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹œ์ž‘: {url}")
211
  try:
212
  response = requests.get(url, timeout=5)
213
+ logging.info(f"์›๋ฌธ ์š”์ฒญ ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
214
+
215
  soup = BeautifulSoup(response.text, 'html.parser')
216
 
217
  # ๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ณธ๋ฌธ ์ฐพ๊ธฐ
218
  content = soup.select_one('#dic_area')
219
  if content:
220
  text = content.text.strip()
221
+ text = re.sub(r'\s+', ' ', text)
222
+ logging.info("๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ณธ๋ฌธ ์ถ”์ถœ ์„ฑ๊ณต")
223
  return text
224
 
225
+ # ๋‹ค๋ฅธ ๋‰ด์Šค ์‚ฌ์ดํŠธ ๋ณธ๋ฌธ ์ฐพ๊ธฐ
226
  content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
227
  if content:
228
  text = content.text.strip()
229
  text = re.sub(r'\s+', ' ', text)
230
+ logging.info("์ผ๋ฐ˜ ๋‰ด์Šค ๋ณธ๋ฌธ ์ถ”์ถœ ์„ฑ๊ณต")
231
  return text
232
 
233
+ logging.warning("๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
234
  return "๋ณธ๋ฌธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
235
  except Exception as e:
236
+ logging.error(f"์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ ์˜ค๋ฅ˜: {str(e)}", exc_info=True)
237
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
238
 
239
+ # NLTK๋ฅผ ์ด์šฉํ•œ ํ‚ค์›Œ๋“œ ๋ถ„์„ (KSS ํ™œ์šฉ)
240
  def analyze_keywords(text, top_n=10):
241
+ # ํ•œ๊ตญ์–ด ๋ถˆ์šฉ์–ด ๋ชฉ๋ก
242
  korean_stopwords = ['์ด', '๊ทธ', '์ €', '๊ฒƒ', '๋ฐ', '๋“ฑ', '๋ฅผ', '์„', '์—', '์—์„œ', '์˜', '์œผ๋กœ', '๋กœ']
243
 
244
+ # KSS๋ฅผ ์‚ฌ์šฉํ•œ ๋ฌธ์žฅ ๋ถ„๋ฆฌ ๋ฐ ํ† ํฐํ™”
245
+ try:
246
+ sentences = kss.split_sentences(text)
247
+ tokens = []
248
+ for sentence in sentences:
249
+ # ๊ฐ„๋‹จํ•œ ํ† ํฐํ™” (๊ณต๋ฐฑ ๊ธฐ์ค€)
250
+ tokens.extend(sentence.split())
251
+ except:
252
+ # KSS ์‹คํŒจ์‹œ ๊ธฐ๋ณธ ํ† ํฐํ™”
253
+ tokens = text.split()
254
+
255
  tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
256
 
257
  word_count = Counter(tokens)
 
328
  ).generate_from_frequencies(keywords_dict)
329
 
330
  try:
331
+ import os
332
+ script_dir = os.path.dirname(os.path.abspath(__file__))
333
+ possible_font_paths = ["NanumGothic.ttf", "์ด๋ฆ„"]
334
 
335
  font_path = None
336
  for path in possible_font_paths:
337
+ candidate = os.path.join(script_dir, fname)
338
+ if os.path.exists(candidate):
339
+ font_path = candidate
340
  break
341
 
342
  if font_path:
 
382
  results['top_keywords'] = []
383
  return results
384
 
385
+ # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑ (์ƒˆ๋กœ์šด ๋ฒ„์ „ ๋ฐฉ์‹)
386
  def generate_article(original_content, prompt_text):
387
  try:
388
+ if not st.session_state.openai_client:
389
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
390
 
391
+ response = st.session_state.openai_client.chat.completions.create(
392
+ model="gpt-4", # ๋˜๋Š” ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์ ์ ˆํ•œ ๋ชจ๋ธ
393
  messages=[
394
  {"role": "system", "content": "๋‹น์‹ ์€ ์ „๋ฌธ์ ์ธ ๋‰ด์Šค ๊ธฐ์ž์ž…๋‹ˆ๋‹ค. ์ฃผ์–ด์ง„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ƒˆ๋กœ์šด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."},
395
  {"role": "user", "content": f"๋‹ค์Œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ {prompt_text}\n\n{original_content[:1000]}"}
396
  ],
397
  max_tokens=2000
398
  )
399
+ return response.choices[0].message.content
400
  except Exception as e:
401
  return f"๊ธฐ์‚ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
402
 
403
+ # OpenAI API๋ฅผ ์ด์šฉํ•œ ์ด๋ฏธ์ง€ ์ƒ์„ฑ (์ƒˆ๋กœ์šด ๋ฒ„์ „ ๋ฐฉ์‹)
404
  def generate_image(prompt):
405
  try:
406
+ if not st.session_state.openai_client:
407
  return "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
408
 
409
+ # GPT Image 1 ๋ชจ๋ธ๋กœ ์ด๋ฏธ์ง€ ์ƒ์„ฑ
410
+ result = st.session_state.openai_client.images.generate(
411
+ model="gpt-image-1", # ์ƒˆ๋กœ์šด ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
412
  prompt=prompt,
 
413
  size="1024x1024"
414
  )
415
+
416
+ # base64 ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ๋ฅผ ๋””์ฝ”๋”ฉ
417
+ image_base64 = result.data[0].b64_json
418
+ image_bytes = base64.b64decode(image_base64)
419
+
420
+ # BytesIO ๊ฐ์ฒด๋กœ ๋ณ€ํ™˜
421
+ image = BytesIO(image_bytes)
422
+
423
+ # PIL Image๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ํฌ๊ธฐ ์กฐ์ • (์„ ํƒ์‚ฌํ•ญ)
424
+ pil_image = Image.open(image)
425
+ pil_image = pil_image.resize((800, 800), Image.LANCZOS) # ํฌ๊ธฐ ์กฐ์ •
426
+
427
+ # ๋‹ค์‹œ BytesIO๋กœ ๋ณ€ํ™˜
428
+ output = BytesIO()
429
+ pil_image.save(output, format="JPEG", quality=80, optimize=True)
430
+ output.seek(0)
431
+
432
+ return output
433
+
434
  except Exception as e:
435
  return f"์ด๋ฏธ์ง€ ์ƒ์„ฑ ์˜ค๋ฅ˜: {str(e)}"
436
 
 
452
  traceback.print_exc()
453
 
454
  def perform_news_task(task_type, keyword, num_articles, file_prefix):
455
+ logging.info(f"์Šค์ผ€์ค„๋Ÿฌ ์ž‘์—… ์‹œ์ž‘: {task_type}, ํ‚ค์›Œ๋“œ={keyword}")
456
  try:
457
  articles = crawl_naver_news(keyword, num_articles)
458
+ logging.info(f"์ˆ˜์ง‘๋œ ๊ธฐ์‚ฌ ์ˆ˜: {len(articles)}")
459
 
460
  # ๊ธฐ์‚ฌ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ
461
+ for i, article in enumerate(articles):
462
+ logging.info(f"๊ธฐ์‚ฌ {i+1}/{len(articles)} ์›๋ฌธ ๊ฐ€์ ธ์˜ค๊ธฐ: {article['title']}")
463
  article['content'] = get_article_content(article['link'])
464
  time.sleep(0.5) # ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€
465
 
 
471
  with open(filename, 'w', encoding='utf-8') as f:
472
  json.dump(articles, f, ensure_ascii=False, indent=2)
473
 
474
+ logging.info(f"๊ฒฐ๊ณผ ์ €์žฅ ์™„๋ฃŒ: {filename}")
475
+
476
  global_scheduler_state.last_run = datetime.now()
477
  print(f"{datetime.now()} - {task_type} ๋‰ด์Šค ๊ธฐ์‚ฌ ์ˆ˜์ง‘ ์™„๋ฃŒ: {keyword}")
478
 
479
+ # ์ „์—ญ ์ƒํƒœ์— ์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅ
480
  result_item = {
481
  'task_type': task_type,
482
  'keyword': keyword,
 
487
  global_scheduler_state.scheduled_results.append(result_item)
488
 
489
  except Exception as e:
490
+ logging.error(f"์ž‘์—… ์‹คํ–‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
491
  traceback.print_exc()
492
 
493
  def start_scheduler(daily_tasks, interval_tasks):
 
644
  with keyword_tab1:
645
  keywords = analyze_keywords(selected_article['content'])
646
 
647
+ # Plotly๋ฅผ ์‚ฌ์šฉํ•œ ์‹œ๊ฐํ™”
648
  df = pd.DataFrame(keywords, columns=['๋‹จ์–ด', '๋นˆ๋„์ˆ˜'])
649
+ fig = go.Figure(data=[
650
+ go.Bar(
651
+ x=df['๋‹จ์–ด'],
652
+ y=df['๋นˆ๋„์ˆ˜'],
653
+ marker_color='rgb(55, 83, 109)'
654
+ )
655
+ ])
656
+
657
+ fig.update_layout(
658
+ title='ํ‚ค์›Œ๋“œ ๋นˆ๋„ ๋ถ„์„',
659
+ xaxis_title='ํ‚ค์›Œ๋“œ',
660
+ yaxis_title='๋นˆ๋„์ˆ˜',
661
+ height=500,
662
+ margin=dict(l=50, r=50, t=80, b=50)
663
+ )
664
+
665
+ st.plotly_chart(fig, use_container_width=True)
666
 
667
  st.write("**์ฃผ์š” ํ‚ค์›Œ๋“œ:**")
668
  for word, count in keywords:
 
692
  # ํ…์ŠคํŠธ ํ†ต๊ณ„ ๊ณ„์‚ฐ
693
  word_count = len(re.findall(r'\b\w+\b', content))
694
  char_count = len(content)
695
+ try:
696
+ # KSS๋กœ ๋ฌธ์žฅ ๋ถ„๋ฆฌ
697
+ sentences = kss.split_sentences(content)
698
+ sentence_count = len(sentences)
699
+ except:
700
+ # KSS ์‹คํŒจ์‹œ ๊ธฐ๋ณธ ๋ฌธ์žฅ ๋ถ„๋ฆฌ
701
+ sentence_count = len(re.split(r'[.!?]+', content))
702
+
703
  avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
704
  avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
705
 
 
719
  with col2:
720
  st.metric("ํ‰๊ท  ๋ฌธ์žฅ ๊ธธ์ด", f"{avg_sentence_length:.1f}๋‹จ์–ด")
721
 
722
+ # ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜
723
  complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
724
  st.progress(complexity_score / 10)
725
  st.write(f"ํ…์ŠคํŠธ ๋ณต์žก์„ฑ ์ ์ˆ˜: {complexity_score:.1f}/10")
726
+
727
+ # ํ’ˆ์‚ฌ ๋ถ„์„ ๋ถ€๋ถ„ ์ œ๊ฑฐ (KoNLPy ์˜์กด์„ฑ ์ œ๊ฑฐ)
728
+ st.info("์ƒ์„ธ ํ’ˆ์‚ฌ ๋ถ„์„์€ ํ˜„์žฌ ์ง€์›๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
 
730
  elif analysis_type == "๊ฐ์ • ๋ถ„์„":
731
  if st.button("๊ฐ์ • ๋ถ„์„ํ•˜๊ธฐ"):
732
+ if st.session_state.openai_client:
733
  with st.spinner("๊ธฐ์‚ฌ์˜ ๊ฐ์ •์„ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค..."):
734
  try:
735
+ response = st.session_state.openai_client.chat.completions.create(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736
  model="gpt-4.1-mini",
737
  messages=[
738
+ {"role": "system", "content": """๋‹น์‹ ์€ ํ…์ŠคํŠธ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
739
+ ๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ์˜ ๊ฐ์ •๊ณผ ๋…ผ์กฐ๋ฅผ ๋ถ„์„ํ•˜๊ณ , ๋ฐ˜๋“œ์‹œ ์•„๋ž˜ ํ˜•์‹์˜ JSON์œผ๋กœ ์‘๋‹ตํ•ด์ฃผ์„ธ์š”:
740
+ {
741
+ "sentiment": "๊ธ์ •์ /๋ถ€์ •์ /์ค‘๋ฆฝ์ ",
742
+ "reason": "์ด์œ  ์„ค๋ช…...",
743
+ "keywords": [
744
+ {"word": "ํ‚ค์›Œ๋“œ1", "score": 8},
745
+ {"word": "ํ‚ค์›Œ๋“œ2", "score": 7}
746
+ ]
747
+ }"""},
748
+ {"role": "user", "content": f"๋‹ค์Œ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ๋ถ„์„ํ•ด ์ฃผ์„ธ์š”:\n\n์ œ๋ชฉ: {selected_article['title']}\n\n๋‚ด์šฉ: {selected_article['content'][:1500]}"}
749
  ],
750
+ max_tokens=800,
751
+ response_format={ "type": "json_object" } # JSON ์‘๋‹ต ํ˜•์‹ ๊ฐ•์ œ
752
  )
753
 
754
+ # ์‘๋‹ต ๋‚ด์šฉ ํ™•์ธ ๋ฐ ๋””๋ฒ„๊น…
755
+ content = response.choices[0].message.content
756
+ logging.info(f"API ์‘๋‹ต: {content}")
757
+
758
+ # JSON ํŒŒ์‹ฑ
759
+ try:
760
+ analysis_result = json.loads(content)
761
+ except json.JSONDecodeError as e:
762
+ logging.error(f"JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {str(e)}")
763
+ logging.error(f"ํŒŒ์‹ฑ ์‹œ๋„ํ•œ ๋‚ด์šฉ: {content}")
764
+ st.error("API ์‘๋‹ต์„ ํŒŒ์‹ฑํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์‘๋‹ต ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
765
+ st.stop() # return ๋Œ€์‹  st.stop() ์‚ฌ์šฉ
766
 
767
  # ๊ฒฐ๊ณผ ์‹œ๊ฐํ™”
768
  st.subheader("๊ฐ์ • ๋ถ„์„ ๊ฒฐ๊ณผ")
 
911
 
912
  except Exception as e:
913
  st.error(f"๊ฐ์ • ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
914
+ st.error(traceback.format_exc())
915
  else:
916
+ st.warning("OpenAI API ํ‚ค๋ฅผ ์‚ฌ์ด๋“œ๋ฐ”์—์„œ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
917
 
918
  elif menu == "์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ":
919
  st.header("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ")
 
948
  generate_image_too = st.checkbox("๊ธฐ์‚ฌ ์ƒ์„ฑ ํ›„ ์ด๋ฏธ์ง€๋„ ํ•จ๊ป˜ ์ƒ์„ฑํ•˜๊ธฐ", value=True)
949
 
950
  if st.button("์ƒˆ ๊ธฐ์‚ฌ ์ƒ์„ฑํ•˜๊ธฐ"):
951
+ if st.session_state.openai_client:
952
  with st.spinner("๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
953
  new_article = generate_article(selected_article['content'], prompt_text)
954
 
 
958
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑํ•˜๊ธฐ (์˜ต์…˜์ด ์„ ํƒ๋œ ๊ฒฝ์šฐ)
959
  if generate_image_too:
960
  with st.spinner("๊ธฐ์‚ฌ ๊ด€๋ จ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค..."):
 
961
  image_prompt = f"""์‹ ๋ฌธ๊ธฐ์‚ฌ ์ œ๋ชฉ "{selected_article['title']}" ์„ ๋ณด๊ณ  ์ด๋ฏธ์ง€๋ฅผ ๋งŒ๋“ค์–ด์ค˜
962
  ์ด๋ฏธ์ง€์—๋Š” ๋‹ค์Œ ์š”์†Œ๊ฐ€ ํฌํ•จ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค:
963
  - ๊ธฐ์‚ฌ๋ฅผ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ๋„์‹
 
966
  """
967
 
968
  # ์ด๋ฏธ์ง€ ์ƒ์„ฑ
969
+ image = generate_image(image_prompt)
970
 
971
+ if isinstance(image, BytesIO):
972
  st.subheader("์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€:")
973
+ st.image(image, use_column_width=True)
974
  else:
975
+ st.error(image)
976
 
977
  # ์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ ์˜ต์…˜
978
  if st.button("์ƒ์„ฑ๋œ ๊ธฐ์‚ฌ ์ €์žฅ"):
 
990
  else:
991
  st.warning("OpenAI API ํ‚ค๋ฅผ ์‚ฌ์ด๋“œ๋ฐ”์—์„œ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
992
 
 
 
993
  elif menu == "๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ":
994
  st.header("๋‰ด์Šค ๊ธฐ์‚ฌ ์˜ˆ์•ฝํ•˜๊ธฐ")
995
 
 
1076
  with tab3:
1077
  st.subheader("์Šค์ผ€์ค„๋Ÿฌ ์ œ์–ด ๋ฐ ์ƒํƒœ")
1078
 
1079
+ # ๋กœ๊ทธ ๋ทฐ์–ด๋ฅผ ์ƒ๋‹จ์— ๋ฐฐ์น˜
1080
+ st.subheader("์‹ค์‹œ๊ฐ„ ๋กœ๊ทธ")
1081
+ log_container = st.empty()
1082
+
1083
+ def update_logs():
1084
+ try:
1085
+ with open('/tmp/crawler.log', 'r') as f:
1086
+ logs = f.readlines()
1087
+ return ''.join(logs[-100:]) # ์ตœ๊ทผ 100์ค„๋งŒ ํ‘œ์‹œ
1088
+ except Exception as e:
1089
+ return f"๋กœ๊ทธ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {str(e)}"
1090
+
1091
+ # ๋กœ๊ทธ ์ž๋™ ์—…๋ฐ์ดํŠธ
1092
+ if st.checkbox("๋กœ๊ทธ ์ž๋™ ์—…๋ฐ์ดํŠธ", value=True):
1093
+ log_content = update_logs()
1094
+ log_container.text_area("์ตœ๊ทผ ๋กœ๊ทธ", value=log_content, height=400)
1095
+ else:
1096
+ if st.button("๋กœ๊ทธ ์ƒˆ๋กœ๊ณ ์นจ"):
1097
+ log_content = update_logs()
1098
+ log_container.text_area("์ตœ๊ทผ ๋กœ๊ทธ", value=log_content, height=400)
1099
+
1100
+ st.divider()
1101
+
1102
+ # ์Šค์ผ€์ค„๋Ÿฌ ์ œ์–ด
1103
  col1, col2 = st.columns(2)
1104
 
1105
  with col1:
 
1183
 
1184
  # ํ‘ธํ„ฐ
1185
  st.markdown("---")
1186
+ st.markdown("ยฉ ๋‰ด์Šค ๊ธฐ์‚ฌ ๋„๊ตฌ @conanssam")