Kims12 commited on
Commit
0b91ba4
ยท
verified ยท
1 Parent(s): 623047f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -361
app.py CHANGED
@@ -1,389 +1,289 @@
1
  import gradio as gr
2
- import requests
3
- from bs4 import BeautifulSoup
4
- import urllib.parse # iframe ๊ฒฝ๋กœ ๋ณด์ •์„ ์œ„ํ•œ ๋ชจ๋“ˆ
5
  import re
6
  import logging
7
- import tempfile
8
- import pandas as pd
9
- import mecab # pythonโ€‘mecabโ€‘ko ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ
10
- import os
11
- import time
12
- import hmac
13
- import hashlib
14
- import base64
15
-
16
- # ๋””๋ฒ„๊น…(๋กœ๊ทธ)์šฉ ํ•จ์ˆ˜
17
- def debug_log(message: str):
18
- print(f"[DEBUG] {message}")
19
 
20
- # --- ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘ ---
21
- def scrape_naver_blog(url: str) -> str:
22
- debug_log("scrape_naver_blog ํ•จ์ˆ˜ ์‹œ์ž‘")
23
- debug_log(f"์š”์ฒญ๋ฐ›์€ URL: {url}")
24
- headers = {
25
- "User-Agent": (
26
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
27
- "AppleWebKit/537.36 (KHTML, like Gecko) "
28
- "Chrome/96.0.4664.110 Safari/537.36"
29
- )
30
- }
31
- try:
32
- response = requests.get(url, headers=headers)
33
- debug_log("HTTP GET ์š”์ฒญ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
34
- if response.status_code != 200:
35
- debug_log(f"์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {response.status_code}")
36
- return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {response.status_code}"
37
- soup = BeautifulSoup(response.text, "html.parser")
38
- debug_log("HTML ํŒŒ์‹ฑ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
39
- iframe = soup.select_one("iframe#mainFrame")
40
- if not iframe:
41
- debug_log("iframe#mainFrame ํƒœ๊ทธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
42
- return "๋ณธ๋ฌธ iframe์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
43
- iframe_src = iframe.get("src")
44
- if not iframe_src:
45
- debug_log("iframe src๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
46
- return "๋ณธ๋ฌธ iframe์˜ src๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
47
- parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
48
- debug_log(f"iframe ํŽ˜์ด์ง€ ์š”์ฒญ URL: {parsed_iframe_url}")
49
- iframe_response = requests.get(parsed_iframe_url, headers=headers)
50
- debug_log("HTTP GET ์š”์ฒญ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
51
- if iframe_response.status_code != 200:
52
- debug_log(f"iframe ์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status_code}")
53
- return f"iframe์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status_code}"
54
- iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
55
- debug_log("HTML ํŒŒ์‹ฑ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
56
- title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
57
- title = title_div.get_text(strip=True) if title_div else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
58
- debug_log(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
59
- content_div = iframe_soup.select_one('.se-main-container')
60
- if content_div:
61
- content = content_div.get_text("\n", strip=True)
62
- else:
63
- content = "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
64
- debug_log("๋ณธ๋ฌธ ์ถ”์ถœ ์™„๋ฃŒ")
65
- result = f"[์ œ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
66
- debug_log("์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ ํ•ฉ์นจ ์™„๋ฃŒ")
67
- return result
68
- except Exception as e:
69
- debug_log(f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
70
- return f"์Šคํฌ๋ž˜ํ•‘ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
71
 
72
- # --- ํ˜•ํƒœ์†Œ ๋ถ„์„ (์ฐธ์กฐ์ฝ”๋“œ-1) ---
73
- def analyze_text(text: str):
74
- logging.basicConfig(level=logging.DEBUG)
75
- logger = logging.getLogger(__name__)
76
- logger.debug("์›๋ณธ ํ…์ŠคํŠธ: %s", text)
77
- filtered_text = re.sub(r'[^๊ฐ€-ํžฃ]', '', text)
78
- logger.debug("ํ•„ํ„ฐ๋ง๋œ ํ…์ŠคํŠธ: %s", filtered_text)
79
- if not filtered_text:
80
- logger.debug("์œ ํšจํ•œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ์—†์Œ.")
81
- return pd.DataFrame(columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"]), ""
82
- mecab_instance = mecab.MeCab()
83
- tokens = mecab_instance.pos(filtered_text)
84
- logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ: %s", tokens)
85
- freq = {}
86
- for word, pos in tokens:
87
- if word and word.strip() and pos.startswith("NN"):
88
- freq[word] = freq.get(word, 0) + 1
89
- logger.debug("๋‹จ์–ด: %s, ํ’ˆ์‚ฌ: %s, ๋นˆ๋„: %d", word, pos, freq[word])
90
- sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
91
- logger.debug("์ •๋ ฌ๋œ ๋‹จ์–ด ๋นˆ๋„: %s", sorted_freq)
92
- df = pd.DataFrame(sorted_freq, columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"])
93
- logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ DataFrame ์ƒ์„ฑ๋จ, shape: %s", df.shape)
94
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
95
- df.to_excel(temp_file.name, index=False, engine='openpyxl')
96
- temp_file.close()
97
- logger.debug("Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: %s", temp_file.name)
98
- return df, temp_file.name
99
 
100
- # --- ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๋ฐ ๊ด‘๊ณ  API ๊ด€๋ จ (์ฐธ์กฐ์ฝ”๋“œ-2) ---
101
- def generate_signature(timestamp, method, uri, secret_key):
102
- message = f"{timestamp}.{method}.{uri}"
103
- digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
104
- return base64.b64encode(digest).decode()
 
 
105
 
106
- def get_header(method, uri, api_key, secret_key, customer_id):
107
- timestamp = str(round(time.time() * 1000))
108
- signature = generate_signature(timestamp, method, uri, secret_key)
109
- return {
110
- "Content-Type": "application/json; charset=UTF-8",
111
- "X-Timestamp": timestamp,
112
- "X-API-KEY": api_key,
113
- "X-Customer": str(customer_id),
114
- "X-Signature": signature
115
- }
116
 
117
- def fetch_related_keywords(keyword):
118
- debug_log(f"fetch_related_keywords ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ: {keyword}")
119
- API_KEY = os.environ["NAVER_API_KEY"]
120
- SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
121
- CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
122
- BASE_URL = "https://api.naver.com"
123
- uri = "/keywordstool"
124
- method = "GET"
125
- headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
126
- params = {
127
- "hintKeywords": [keyword],
128
- "showDetail": "1"
129
- }
130
- response = requests.get(BASE_URL + uri, params=params, headers=headers)
131
- data = response.json()
132
- if "keywordList" not in data:
133
- return pd.DataFrame()
134
- df = pd.DataFrame(data["keywordList"])
135
- if len(df) > 100:
136
- df = df.head(100)
137
- def parse_count(x):
138
- try:
139
- return int(str(x).replace(",", ""))
140
- except:
141
- return 0
142
- df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
143
- df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
144
- df["ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] + df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"]
145
- df.rename(columns={"relKeyword": "์ •๋ณดํ‚ค์›Œ๋“œ"}, inplace=True)
146
- result_df = df[["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"]]
147
- debug_log("fetch_related_keywords ์™„๋ฃŒ")
148
- return result_df
149
 
150
- def fetch_blog_count(keyword):
151
- debug_log(f"fetch_blog_count ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ: {keyword}")
152
- client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
153
- client_secret = os.environ["NAVER_SEARCH_CLIENT_SECRET"]
154
- url = "https://openapi.naver.com/v1/search/blog.json"
155
- headers = {
156
- "X-Naver-Client-Id": client_id,
157
- "X-Naver-Client-Secret": client_secret
158
- }
159
- params = {"query": keyword, "display": 1}
160
- response = requests.get(url, headers=headers, params=params)
161
- if response.status_code == 200:
162
- data = response.json()
163
- debug_log(f"fetch_blog_count ๊ฒฐ๊ณผ: {data.get('total', 0)}")
164
- return data.get("total", 0)
165
- else:
166
- debug_log(f"fetch_blog_count ์˜ค๋ฅ˜, ์ƒํƒœ์ฝ”๋“œ: {response.status_code}")
167
- return 0
168
 
169
- def create_excel_file(df):
170
- with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
171
- excel_path = tmp.name
172
- df.to_excel(excel_path, index=False)
173
- debug_log(f"Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: {excel_path}")
174
- return excel_path
175
-
176
- def process_keyword(keywords: str, include_related: bool):
177
- debug_log(f"process_keyword ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ๋“ค: {keywords}, ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ํฌํ•จ: {include_related}")
178
- input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
179
- result_dfs = []
180
- for idx, kw in enumerate(input_keywords):
181
- df_kw = fetch_related_keywords(kw)
182
- if df_kw.empty:
183
- continue
184
- row_kw = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] == kw]
185
- if not row_kw.empty:
186
- result_dfs.append(row_kw)
187
- else:
188
- result_dfs.append(df_kw.head(1))
189
- if include_related and idx == 0:
190
- df_related = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] != kw]
191
- if not df_related.empty:
192
- result_dfs.append(df_related)
193
- if result_dfs:
194
- result_df = pd.concat(result_dfs, ignore_index=True)
195
- result_df.drop_duplicates(subset=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
196
- else:
197
- result_df = pd.DataFrame(columns=["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"])
198
- result_df["๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"] = result_df["์ •๋ณดํ‚ค์›Œ๋“œ"].apply(fetch_blog_count)
199
- result_df.sort_values(by="ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", ascending=False, inplace=True)
200
- debug_log("process_keyword ์™„๋ฃŒ")
201
- return result_df, create_excel_file(result_df)
202
 
203
- # --- ํ˜•ํƒœ์†Œ ๋ถ„์„๊ณผ ๊ฒ€์ƒ‰๋Ÿ‰/๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ๋ณ‘ํ•ฉ ---
204
- def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
205
- debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์‹œ์ž‘")
206
- df_freq, _ = analyze_text(text)
207
- if df_freq.empty:
208
- debug_log("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ๊ฐ€ ๋นˆ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์ž…๋‹ˆ๋‹ค.")
209
- return df_freq, ""
210
- if remove_freq1:
211
- before_shape = df_freq.shape
212
- df_freq = df_freq[df_freq["๋นˆ๋„์ˆ˜"] != 1]
213
- debug_log(f"๋นˆ๋„์ˆ˜ 1 ์ œ๊ฑฐ ์ ์šฉ๋จ. {before_shape} -> {df_freq.shape}")
214
- keywords = "\n".join(df_freq["๋‹จ์–ด"].tolist())
215
- debug_log(f"๋ถ„์„๋œ ํ‚ค์›Œ๋“œ: {keywords}")
216
- df_keyword_info, _ = process_keyword(keywords, include_related=False)
217
- debug_log("๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ์™„๋ฃŒ")
218
- merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋‹จ์–ด", right_on="์ •๋ณดํ‚ค์›Œ๋“œ", how="left")
219
- merged_df.drop(columns=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
220
- merged_excel_path = create_excel_file(merged_df)
221
- debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์™„๋ฃŒ")
222
- return merged_df, merged_excel_path
223
 
224
- # --- ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋ถ„์„ (๋‹จ๋… ๋ถ„์„) ---
225
- def direct_keyword_analysis(text: str, keyword_input: str):
226
- debug_log("direct_keyword_analysis ํ•จ์ˆ˜ ์‹œ์ž‘")
227
- keywords = re.split(r'[\n,]+', keyword_input)
228
- keywords = [kw.strip() for kw in keywords if kw.strip()]
229
- debug_log(f"์ž…๋ ฅ๋œ ํ‚ค์›Œ๋“œ ๋ชฉ๋ก: {keywords}")
230
- results = []
231
- for kw in keywords:
232
- count = text.count(kw)
233
- results.append((kw, count))
234
- debug_log(f"ํ‚ค์›Œ๋“œ '{kw}'์˜ ๋นˆ๋„์ˆ˜: {count}")
235
- df = pd.DataFrame(results, columns=["ํ‚ค์›Œ๋“œ", "๋นˆ๋„์ˆ˜"])
236
- excel_path = create_excel_file(df)
237
- debug_log("direct_keyword_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
238
- return df, excel_path
239
-
240
- # --- ํ†ตํ•ฉ ๋ถ„์„ (ํ˜•ํƒœ์†Œ ๋ถ„์„ + ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋ถ„์„) ---
241
- def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
242
- debug_log("combined_analysis ํ•จ์ˆ˜ ์‹œ์ž‘")
243
- merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
244
- if "์ง์ ‘์ž…๋ ฅ" not in merged_df.columns:
245
- merged_df["์ง์ ‘์ž…๋ ฅ"] = ""
246
- direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
247
- direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
248
- debug_log(f"์ž…๋ ฅ๋œ ์ง์ ‘ ํ‚ค์›Œ๋“œ: {direct_keywords}")
249
- for dk in direct_keywords:
250
- if dk in merged_df["๋‹จ์–ด"].values:
251
- merged_df.loc[merged_df["๋‹จ์–ด"] == dk, "์ง์ ‘์ž…๋ ฅ"] = "์ง์ ‘์ž…๋ ฅ"
252
- else:
253
- freq = blog_text.count(dk)
254
- df_direct, _ = process_keyword(dk, include_related=False)
255
- if (not df_direct.empty) and (dk in df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"].values):
256
- row = df_direct[df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"] == dk].iloc[0]
257
- pc = row.get("PC์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
258
- mobile = row.get("๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
259
- total = row.get("ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
260
- blog_count = row.get("๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜", None)
261
- else:
262
- pc = mobile = total = blog_count = None
263
- new_row = {
264
- "๋‹จ์–ด": dk,
265
- "๋นˆ๋„์ˆ˜": freq,
266
- "PC์›”๊ฒ€์ƒ‰๋Ÿ‰": pc,
267
- "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰": mobile,
268
- "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰": total,
269
- "๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜": blog_count,
270
- "์ง์ ‘์ž…๋ ฅ": "์ง์ ‘์ž…๋ ฅ"
271
- }
272
- merged_df = pd.concat([merged_df, pd.DataFrame([new_row])], ignore_index=True)
273
- merged_df = merged_df.sort_values(by="๋นˆ๋„์ˆ˜", ascending=False).reset_index(drop=True)
274
- combined_excel = create_excel_file(merged_df)
275
- debug_log("combined_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
276
- return merged_df, combined_excel
277
-
278
- # --- ๋ถ„์„ ํ•ธ๋“ค๋Ÿฌ ---
279
- def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
280
- debug_log("analysis_handler ํ•จ์ˆ˜ ์‹œ์ž‘")
281
- if direct_keyword_only:
282
- # "์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„" ์„ ํƒ ์‹œ ๋‹จ๋… ๋ถ„์„ ์ˆ˜ํ–‰
283
- return direct_keyword_analysis(blog_text, direct_keyword_input)
284
  else:
285
- # ๊ธฐ๋ณธ ํ†ตํ•ฉ ๋ถ„์„ ์ˆ˜ํ–‰
286
- return combined_analysis(blog_text, remove_freq1, direct_keyword_input)
 
 
 
 
 
 
 
 
287
 
288
- # --- ์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰ ---
289
- def fetch_blog_content(url: str):
290
- debug_log("fetch_blog_content ํ•จ์ˆ˜ ์‹œ์ž‘")
291
- content = scrape_naver_blog(url)
292
- debug_log("fetch_blog_content ํ•จ์ˆ˜ ์™„๋ฃŒ")
293
- return content
 
294
 
295
- # --- Custom CSS ---
296
- custom_css = """
297
- /* ์ „์ฒด ์ปจํ…Œ์ด๋„ˆ ์Šคํƒ€์ผ */
298
- .gradio-container {
299
- max-width: 960px;
300
- margin: auto;
301
- font-family: 'Helvetica Neue', Arial, sans-serif;
302
- background: #f5f7fa;
303
- padding: 2rem;
304
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
- /* ํ—ค๋” ์Šคํƒ€์ผ */
307
- .custom-header {
308
- text-align: center;
309
- font-size: 2.5rem;
310
- font-weight: bold;
311
- margin-bottom: 1.5rem;
312
- color: #333;
313
- }
314
 
315
- /* ๊ทธ๋ฃน ๋ฐ•์Šค ์Šคํƒ€์ผ */
316
- .custom-group {
317
- background: #ffffff;
318
- border-radius: 8px;
319
- padding: 1.5rem;
320
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
321
- margin-bottom: 1.5rem;
322
- }
323
-
324
- /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
325
- .custom-button {
326
- background-color: #007bff;
327
- color: #fff;
328
- border: none;
329
- border-radius: 4px;
330
- padding: 0.6rem 1.2rem;
331
- font-size: 1rem;
332
- cursor: pointer;
333
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
- /* ์ฒดํฌ๋ฐ•์Šค ์Šคํƒ€์ผ */
336
- .custom-checkbox {
337
- margin-right: 1rem;
338
- }
339
 
340
- /* ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” ๋ฐ ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ */
341
- .custom-result {
342
- margin-top: 1.5rem;
343
- }
344
 
345
- /* ๊ฐ€์šด๋ฐ ์ •๋ ฌ */
346
- .centered {
347
- display: flex;
348
- justify-content: center;
349
- align-items: center;
350
  }
351
  """
352
 
353
- # --- Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ ---
354
- with gr.Blocks(title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์„œ๋น„์Šค", css=custom_css) as demo:
355
- gr.HTML("<div class='custom-header'>๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์„œ๋น„์Šค</div>")
356
- # ๋ธ”๋กœ๊ทธ ๋งํฌ์™€ ์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰ ๋ฒ„ํŠผ์„ ํ•œ ๊ทธ๋ฃน ๋‚ด์— ๋ฐฐ์น˜ (๋ฒ„ํŠผ์€ ๊ฐ€์šด๋ฐ ์ •๋ ฌ)
357
- with gr.Group(elem_classes="custom-group"):
358
- with gr.Row():
359
- blog_url_input = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ", placeholder="์˜ˆ: https://blog.naver.com/ssboost/222983068507", lines=1)
360
- with gr.Row(elem_classes="centered"):
361
- scrape_button = gr.Button("์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰", elem_classes="custom-button")
362
- with gr.Group(elem_classes="custom-group"):
363
- blog_content_box = gr.Textbox(label="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ (์ˆ˜์ • ๊ฐ€๋Šฅ)", lines=10, placeholder="์Šคํฌ๋ž˜ํ•‘๋œ ๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์ด ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
364
- with gr.Group(elem_classes="custom-group"):
365
- with gr.Row():
366
- remove_freq_checkbox = gr.Checkbox(label="๋นˆ๋„์ˆ˜1 ์ œ๊ฑฐ", value=True, elem_classes="custom-checkbox")
 
 
 
 
 
 
 
 
367
  with gr.Row():
368
- # "๋นˆ๋„์ˆ˜1 ์ œ๊ฑฐ" ์•„๋ž˜์— "์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„" ์ฒดํฌ๋ฐ•์Šค ๋ฐฐ์น˜
369
- direct_keyword_only_checkbox = gr.Checkbox(label="์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„", value=False, elem_classes="custom-checkbox")
370
- with gr.Row():
371
- direct_keyword_box = gr.Textbox(label="์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ (์—”ํ„ฐ ๋˜๋Š” ','๋กœ ๊ตฌ๋ถ„)", lines=2, placeholder="์˜ˆ: ํ‚ค์›Œ๋“œ1, ํ‚ค์›Œ๋“œ2\nํ‚ค์›Œ๋“œ3")
372
- with gr.Group(elem_classes="custom-group"):
373
- with gr.Row(elem_classes="centered"):
374
- analyze_button = gr.Button("๋ถ„์„ ์‹คํ–‰", elem_classes="custom-button")
375
- with gr.Group(elem_classes="custom-group custom-result"):
376
- result_df = gr.Dataframe(label="ํ†ตํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ (๋‹จ์–ด, ๋นˆ๋„์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜, ์ง์ ‘์ž…๋ ฅ)", interactive=True)
377
- with gr.Group(elem_classes="custom-group"):
378
- excel_file = gr.File(label="Excel ๋‹ค์šด๋กœ๋“œ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
 
380
- # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
381
- scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
382
- analyze_button.click(fn=analysis_handler,
383
- inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
384
- outputs=[result_df, excel_file])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
  if __name__ == "__main__":
387
- debug_log("Gradio ์•ฑ ์‹คํ–‰ ์‹œ์ž‘")
388
  demo.launch()
389
- debug_log("Gradio ์•ฑ ์‹คํ–‰ ์ข…๋ฃŒ")
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import tempfile
 
4
  import re
5
  import logging
6
+ from mecab import MeCab
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ logging.basicConfig(level=logging.DEBUG)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ ##############################
11
+ # 1) ๊ณตํ†ต ํ•จ์ˆ˜๋“ค
12
+ ##############################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ def preprocess_text(text: str) -> str:
15
+ """
16
+ ์‰ผํ‘œ, ๋งˆ์นจํ‘œ, ๊ณต๋ฐฑ, ์ˆซ์ž, ์˜์–ด ๋“ฑ
17
+ ํ•œ๊ธ€(๊ฐ€-ํžฃ) ์ด์™ธ์˜ ๋ฌธ์ž๋ฅผ ๋ชจ๋‘ ์ œ๊ฑฐํ•˜๊ณ 
18
+ ํ•œ๊ธ€๋งŒ ์—ฐ์†์œผ๋กœ ๋‚จ๊ธด๋‹ค.
19
+ """
20
+ return re.sub(r'[^๊ฐ€-ํžฃ]', '', text)
21
 
22
+ def expand_columns_if_needed(df, needed_index: int):
23
+ """
24
+ df์— (needed_index + 1)๋ฒˆ์งธ ์—ด์ด ์กด์žฌํ•˜์ง€ ์•Š์œผ๋ฉด
25
+ ์ž„์‹œ๋กœ ํ™•์žฅํ•ด์„œ ๋นˆ ์—ด์„ ๋งŒ๋“ ๋‹ค.
26
+ ์˜ˆ) needed_index=13 โ†’ N์—ด(14๋ฒˆ์งธ ์—ด)์„ ์“ฐ๋ ค๋ฉด
27
+ df.shape[1]์ด 14 ์ด์ƒ์ด ๋˜๋„๋ก ํ™•์žฅ
28
+ """
29
+ while df.shape[1] <= needed_index:
30
+ # ๋งจ ๋์— ๋นˆ ์—ด ์ถ”๊ฐ€
31
+ df[df.shape[1]] = None
32
 
33
+ ##############################
34
+ # 2) ํ‚ค์›Œ๋“œ ์นด์šดํŠธ ํ•จ์ˆ˜
35
+ ##############################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def count_keywords(main_text, excel_file, direct_input):
38
+ """
39
+ - ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ(์ค„๋ฐ”๊ฟˆ ๊ตฌ๋ถ„)๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉ(A์—ด=ํ‚ค์›Œ๋“œ, B์—ด=์นด์šดํŠธ)
40
+ - ์—†์œผ๋ฉด ์—‘์…€ ์‚ฌ์šฉ:
41
+ * ํ—ค๋”๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ(header=None) โ†’ 1ํ–‰ ๊ทธ๋Œ€๋กœ ๋ณด์กด
42
+ * A5~A10000: ํ‚ค์›Œ๋“œ
43
+ * N5~N10000: ์นด์šดํŠธ ๊ธฐ๋ก(์—ด ์ธ๋ฑ์Šค 13)
44
+ - ๋ณธ๋ฌธ์€ ํ•œ๊ธ€๋งŒ ๋‚จ๊ธฐ๊ณ  .count(ํ‚ค์›Œ๋“œ)๋กœ ๋นˆ๋„์ˆ˜๋ฅผ ๊ณ„์‚ฐ
45
+ - 1ํšŒ ์ด์ƒ์ธ ํ‚ค์›Œ๋“œ๋งŒ ๊ฒฐ๊ณผ ํ‘œ(Markdown)์— ํ‘œ์‹œ
46
+ """
 
 
 
 
 
 
 
 
47
 
48
+ logging.debug(f"main_text: {main_text}")
49
+ logging.debug(f"excel_file: {excel_file}")
50
+ logging.debug(f"direct_input: {direct_input}")
51
+
52
+ # ๋ณธ๋ฌธ ์ „์ฒ˜๋ฆฌ
53
+ cleaned_text = preprocess_text(main_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ direct_input = direct_input.strip()
56
+ if direct_input:
57
+ # ===== ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ ์‚ฌ์šฉ =====
58
+ keywords = [kw.strip() for kw in direct_input.split('\n') if kw.strip()]
59
+ if not keywords:
60
+ return ("์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", None)
61
+
62
+ # counts
63
+ counts = [cleaned_text.count(k) for k in keywords]
64
+
65
+ # 1ํšŒ ์ด์ƒ ํ•„ํ„ฐ
66
+ filtered = [(k, c) for k, c in zip(keywords, counts) if c > 0]
 
 
 
 
 
 
 
 
67
 
68
+ if not filtered:
69
+ # ์ „๋ถ€ 0ํšŒ
70
+ msg = "๋ณธ๋ฌธ์— ํ•ด๋‹น ํ‚ค์›Œ๋“œ๊ฐ€ ์ „ํ˜€ ๋“ฑ์žฅํ•˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
71
+ # ๊ทธ๋ž˜๋„ ๊ฒฐ๊ณผ CSV(A,B) ๋งŒ๋“ค์–ด์„œ ๋ฐ˜ํ™˜
72
+ tmp_df = pd.DataFrame({"๋ช…์‚ฌ": keywords, "๋นˆ๋„์ˆ˜": counts})
73
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
74
+ tmp_df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
75
+ tmp_path = tmp.name
76
+ return (msg, tmp_path)
77
+
78
+ # 1ํšŒ ์ด์ƒ ํ‘œ(Markdown)
79
+ lines = ["| ๋ช…์‚ฌ | ๋นˆ๋„์ˆ˜ |", "|---|---|"]
80
+ for (k, c) in filtered:
81
+ lines.append(f"| {k} | {c} |")
82
+ md_table = "\n".join(lines)
83
+
84
+ # CSV ์ €์žฅ
85
+ tmp_df = pd.DataFrame({"๋ช…์‚ฌ": keywords, "๋นˆ๋„์ˆ˜": counts})
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
87
+ tmp_df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
88
+ tmp_path = tmp.name
89
+
90
+ return (md_table, tmp_path)
91
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  else:
93
+ # ===== ์—‘์…€ ํŒŒ์ผ ์‚ฌ์šฉ =====
94
+ if not excel_file:
95
+ return ("์—‘์…€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ํ‚ค์›Œ๋“œ๋ฅผ ์ง์ ‘ ์ž…๋ ฅํ•˜์„ธ์š”.", None)
96
+
97
+ # 1) ์—‘์…€ ์ „์ฒด๋ฅผ header=None๋กœ ์ฝ์Œ โ†’ 1ํ–‰ ๊ทธ๋Œ€๋กœ ๋ณด์กด
98
+ df = pd.read_excel(excel_file.name, header=None)
99
+
100
+ # 2) A5~A10000 โ†’ (์ธ๋ฑ์Šค 4~9999) ํ‚ค์›Œ๋“œ
101
+ max_row = min(df.shape[0], 10000) # ์‹ค์ œ ํ–‰ ๊ฐœ์ˆ˜ vs 10000 ์ค‘ ๋” ์ž‘์€ ๊ฒƒ
102
+ sub_df = df.iloc[4:max_row, 0] # ์ฒซ ๋ฒˆ์งธ ์—ด(์ธ๋ฑ์Šค=0)
103
 
104
+ # strip + NaN ์ œ๊ฑฐ
105
+ keywords = sub_df.dropna().astype(str).apply(lambda x: x.strip()).tolist()
106
+ if not keywords:
107
+ return ("A5~A10000 ๋ฒ”์œ„์— ํ‚ค์›Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", None)
108
+
109
+ # counts
110
+ counts = [cleaned_text.count(k) for k in keywords]
111
 
112
+ # 1ํšŒ ์ด์ƒ ํ•„ํ„ฐ
113
+ filtered = [(k, c) for k, c in zip(keywords, counts) if c > 0]
114
+ if not filtered:
115
+ msg = "๋ณธ๋ฌธ์— ํ•ด๋‹น ํ‚ค์›Œ๋“œ๊ฐ€ ์ „ํ˜€ ๋“ฑ์žฅํ•˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค(0ํšŒ)."
116
+ # ๊ทธ๋ž˜๋„ N5~N10000์— ๊ธฐ๋ก
117
+ expand_columns_if_needed(df, 13) # N์—ด=13
118
+ for i, cnt_val in enumerate(counts):
119
+ row_idx = 4 + i
120
+ if row_idx < df.shape[0]:
121
+ df.iloc[row_idx, 13] = cnt_val
122
+
123
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
124
+ df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
125
+ tmp_path = tmp.name
126
+ return (msg, tmp_path)
127
+
128
+ # 1ํšŒ ์ด์ƒ ํ‘œ(Markdown)
129
+ lines = ["| ๋ช…์‚ฌ | ๋นˆ๋„์ˆ˜ |", "|---|---|"]
130
+ for (k, c) in filtered:
131
+ lines.append(f"| {k} | {c} |")
132
+ md_table = "\n".join(lines)
133
+
134
+ # N5~N10000์— ๊ธฐ๋ก
135
+ expand_columns_if_needed(df, 13)
136
+ for i, cnt_val in enumerate(counts):
137
+ row_idx = 4 + i
138
+ if row_idx < df.shape[0]:
139
+ df.iloc[row_idx, 13] = cnt_val
140
+
141
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
142
+ df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
143
+ tmp_path = tmp.name
144
+
145
+ return (md_table, tmp_path)
146
 
147
+ ##############################
148
+ # 3) ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ์นด์šดํŠธ ํ•จ์ˆ˜
149
+ ##############################
 
 
 
 
 
150
 
151
+ def morph_analysis_and_count(text: str):
152
+ """
153
+ 1) ์ž…๋ ฅ๋œ ํ…์ŠคํŠธ์—์„œ ํ•œ๊ธ€๋งŒ ๋‚จ๊น€
154
+ 2) Mecab ํ˜•ํƒœ์†Œ ๋ถ„์„ (python-mecab-ko)
155
+ 3) ๋ช…์‚ฌ ๋ฐ ๋ณตํ•ฉ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ
156
+ 4) ๊ฐ ํ‚ค์›Œ๋“œ๋ฅผ ๋ณธ๋ฌธ์—์„œ ๋‹ค์‹œ ๏ฟฝ๏ฟฝ์ƒ‰ํ•˜์—ฌ ๋นˆ๋„์ˆ˜ ์นด์šดํŠธ
157
+ """
158
+ # 1) ์ „์ฒ˜๋ฆฌ
159
+ cleaned = preprocess_text(text)
160
+
161
+ # 2) Mecab ๋ถ„์„
162
+ tagger = MeCab()
163
+ parsed = tagger.pos(cleaned)
164
+
165
+ # 3) ๋ช…์‚ฌ ๋ฐ ๋ณตํ•ฉ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ
166
+ noun_tags = ['NNG', 'NNP', 'NP', 'NNB']
167
+ nouns = [word for (word, pos) in parsed if pos in noun_tags]
168
+
169
+ # ์ค‘๋ณต ์ œ๊ฑฐํ•˜์—ฌ ๊ณ ์œ  ํ‚ค์›Œ๋“œ ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ
170
+ unique_nouns = list(set(nouns))
171
+
172
+ # 4) ๊ฐ ํ‚ค์›Œ๋“œ๋ฅผ ๋ณธ๋ฌธ์—์„œ ๊ฒ€์ƒ‰ํ•˜์—ฌ ๋นˆ๋„์ˆ˜ ์นด์šดํŠธ
173
+ freq_dict = {}
174
+ for noun in unique_nouns:
175
+ count = cleaned.count(noun)
176
+ freq_dict[noun] = count
177
+
178
+ filtered_freq = {k: v for k, v in freq_dict.items() if v > 0}
179
+
180
+ if not filtered_freq:
181
+ return "์ถ”์ถœ๋œ ๋ช…์‚ฌ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", None
182
+
183
+ freq_df = pd.DataFrame(list(filtered_freq.items()), columns=['๋ช…์‚ฌ', '๋นˆ๋„์ˆ˜'])
184
+ freq_df = freq_df.sort_values(by='๋นˆ๋„์ˆ˜', ascending=False).reset_index(drop=True)
185
+
186
+ try:
187
+ md_table = freq_df.to_markdown(index=False)
188
+ except ImportError:
189
+ md_table = "Markdown ๋ณ€ํ™˜์„ ์œ„ํ•ด 'tabulate' ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค."
190
+ return md_table, None
191
+
192
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
193
+ freq_df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
194
+ tmp_path = tmp.name
195
+
196
+ return md_table, tmp_path
197
 
198
+ ########################
199
+ # 4) Gradio ์ธํ„ฐํŽ˜์ด์Šค #
200
+ ########################
 
201
 
202
+ # ๊ธฐ์กด CSS์— ๋ฒ„ํŠผ ์ƒ‰์ƒ ์ถ”๊ฐ€
203
+ css = """
 
 
204
 
205
+ /* '๋ถ„์„ํ•˜๊ธฐ' ๋ฒ„ํŠผ ์ƒ‰์ƒ ๋ฐ ๊ธ€์ž์ƒ‰ ๋ณ€๊ฒฝ */
206
+ #run_analysis_button > button,
207
+ #morph_analysis_button > button {
208
+ background-color: #EA580C !important; /* ์ง„ํ•œ ์ฃผํ™ฉ์ƒ‰ */
209
+ color: #FFFFFF !important; /* ํฐ์ƒ‰ ๊ธ€์ž */
210
  }
211
  """
212
 
213
+ with gr.Blocks(
214
+ theme=gr.themes.Soft(
215
+ primary_hue=gr.themes.Color(
216
+ c50="#FFF7ED",
217
+ c100="#FFEDD5",
218
+ c200="#FED7AA",
219
+ c300="#FDBA74",
220
+ c400="#FB923C",
221
+ c500="#F97316",
222
+ c600="#EA580C",
223
+ c700="#C2410C",
224
+ c800="#9A3412",
225
+ c900="#7C2D12",
226
+ c950="#431407",
227
+ ),
228
+ secondary_hue="zinc",
229
+ neutral_hue="zinc",
230
+ font=("Pretendard", "sans-serif")
231
+ ),
232
+ css=css
233
+ ) as demo:
234
+ with gr.Tab("ํ‚ค์›Œ๋“œ ์นด์šดํŠธ"):
235
  with gr.Row():
236
+ # ์™ผ์ชฝ ์ž…๋ ฅ ์˜์—ญ
237
+ with gr.Column():
238
+ main_textbox = gr.Textbox(
239
+ label="๋ณธ๋ฌธ ํ…์ŠคํŠธ",
240
+ lines=16,
241
+ placeholder="์—ฌ๊ธฐ์— ๊ธด ๋ณธ๋ฌธ์„ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”."
242
+ )
243
+ keyword_input = gr.Textbox(
244
+ label="(์„ ํƒ) ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ - ์—”ํ„ฐ๋กœ ๊ตฌ๋ถ„",
245
+ lines=6,
246
+ placeholder="์˜ˆ)\n์ดˆ์ŒํŒŒ๊ฐ€์Šต๊ธฐ\n๊ฐ€์Šต๊ธฐ\n..."
247
+ )
248
+ excel_input = gr.File(
249
+ label="(์„ ํƒ) ์—‘์…€ ์—…๋กœ๋“œ"
250
+ )
251
+ # ๋ฒ„ํŠผ์— elem_id ์ถ”๊ฐ€
252
+ run_button = gr.Button("๋ถ„์„ํ•˜๊ธฐ", elem_id="run_analysis_button")
253
+
254
+ # ์˜ค๋ฅธ์ชฝ ์ถœ๋ ฅ ์˜์—ญ
255
+ with gr.Column():
256
+ output_md = gr.Markdown(label="๊ฒฐ๊ณผ ํ‘œ")
257
+ output_file = gr.File(label="๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")
258
+
259
+ run_button.click(
260
+ fn=count_keywords,
261
+ inputs=[main_textbox, excel_input, keyword_input],
262
+ outputs=[output_md, output_file]
263
+ )
264
 
265
+ with gr.Tab("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ธฐ๋ฐ˜ ์นด์šดํŠธ"):
266
+ with gr.Row():
267
+ # ์™ผ์ชฝ ์ž…๋ ฅ ์˜์—ญ
268
+ with gr.Column():
269
+ morph_text_input = gr.Textbox(
270
+ label="๋ณธ๋ฌธ ํ…์ŠคํŠธ",
271
+ lines=16,
272
+ placeholder="์—ฌ๊ธฐ์— ๊ธด ๋ณธ๋ฌธ์„ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”."
273
+ )
274
+ # ๋ฒ„ํŠผ์— elem_id ์ถ”๊ฐ€
275
+ morph_run_button = gr.Button("๋ถ„์„ํ•˜๊ธฐ", elem_id="morph_analysis_button")
276
+
277
+ # ์˜ค๋ฅธ์ชฝ ์ถœ๋ ฅ ์˜์—ญ
278
+ with gr.Column():
279
+ morph_result_display = gr.Markdown(label="๋ถ„์„ ๊ฒฐ๊ณผ")
280
+ morph_download_button = gr.File(label="๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")
281
+
282
+ morph_run_button.click(
283
+ fn=morph_analysis_and_count,
284
+ inputs=morph_text_input,
285
+ outputs=[morph_result_display, morph_download_button]
286
+ )
287
 
288
  if __name__ == "__main__":
 
289
  demo.launch()