Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import time
|
2 |
import hashlib
|
3 |
import hmac
|
4 |
import base64
|
5 |
import requests
|
6 |
-
import gradio as gr
|
7 |
import urllib.request
|
8 |
import urllib.parse
|
9 |
import json
|
10 |
-
import pandas as pd
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
-
import os
|
13 |
import tempfile
|
14 |
from datetime import datetime
|
15 |
from dotenv import load_dotenv # dotenv ์ถ๊ฐ
|
@@ -100,18 +104,10 @@ def get_blog_count(keyword):
|
|
100 |
print(f"Error fetching blog count for keyword '{keyword}': {e}")
|
101 |
return 0
|
102 |
|
103 |
-
def
|
104 |
-
api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
|
105 |
-
return api.get_keywords_data(chunk)
|
106 |
-
|
107 |
-
def get_blog_count_parallel(keyword):
|
108 |
-
return (keyword, get_blog_count(keyword))
|
109 |
-
|
110 |
-
def get_search_volumes(keyword):
|
111 |
"""
|
112 |
๋จ์ผ ํค์๋์ ์ ๊ฒ์๋์ ๊ฐ์ ธ์ค๋ ํจ์.
|
113 |
"""
|
114 |
-
api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
|
115 |
try:
|
116 |
data = api.get_keywords_data([keyword])
|
117 |
if 'keywordList' in data and len(data['keywordList']) > 0:
|
@@ -135,131 +131,93 @@ def get_search_volumes(keyword):
|
|
135 |
monthly_mobile = 0
|
136 |
|
137 |
total_searches = monthly_pc + monthly_mobile
|
138 |
-
|
|
|
139 |
# ์
๋ ฅํ ํค์๋์ ์ผ์นํ๋ ํญ๋ชฉ์ด ์์ ๊ฒฝ์ฐ
|
140 |
-
return (
|
141 |
else:
|
142 |
-
return (
|
143 |
except Exception as e:
|
144 |
print(f"Error fetching search volumes for keyword '{keyword}': {e}")
|
145 |
-
return (
|
146 |
-
|
147 |
-
def
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
if not all_data:
|
165 |
-
return [("Error", "๋ฐ์ดํฐ๊ฐ ๋ฐํ๋์ง ์์๊ฑฐ๋ API ์๋ต์ด ์ ํจํ์ง ์์ต๋๋ค.", "", "", "")]
|
166 |
-
|
167 |
-
unique_keywords = set()
|
168 |
-
for item in all_data:
|
169 |
-
keyword = item['relKeyword']
|
170 |
-
if keyword not in unique_keywords:
|
171 |
-
unique_keywords.add(keyword)
|
172 |
-
monthly_pc = item.get('monthlyPcQcCnt', 0)
|
173 |
-
monthly_mobile = item.get('monthlyMobileQcCnt', 0)
|
174 |
-
|
175 |
-
if isinstance(monthly_pc, str):
|
176 |
-
monthly_pc = monthly_pc.replace(',', '').replace('< 10', '0')
|
177 |
-
try:
|
178 |
-
monthly_pc = int(monthly_pc)
|
179 |
-
except ValueError:
|
180 |
-
monthly_pc = 0
|
181 |
-
if isinstance(monthly_mobile, str):
|
182 |
-
monthly_mobile = monthly_mobile.replace(',', '').replace('< 10', '0')
|
183 |
-
try:
|
184 |
-
monthly_mobile = int(monthly_mobile)
|
185 |
-
except ValueError:
|
186 |
-
monthly_mobile = 0
|
187 |
-
|
188 |
-
total_searches = monthly_pc + monthly_mobile
|
189 |
-
results.append((keyword, monthly_pc, monthly_mobile, total_searches))
|
190 |
-
|
191 |
-
if len(results) >= 100:
|
192 |
-
break
|
193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
else:
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
try:
|
215 |
-
keyword, blog_count = future.result()
|
216 |
-
results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], blog_count)
|
217 |
-
except Exception as e:
|
218 |
-
print(f"Error fetching blog count for keyword '{results[i][0]}': {e}")
|
219 |
-
results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], "Error")
|
220 |
-
else:
|
221 |
-
blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
|
222 |
-
temp_results = []
|
223 |
-
for future in blog_futures:
|
224 |
-
try:
|
225 |
-
keyword, blog_count = future.result()
|
226 |
-
temp_results.append((keyword, results[0][1], results[0][2], results[0][3], blog_count))
|
227 |
-
except Exception as e:
|
228 |
-
print(f"Error fetching blog count for keyword '{keyword}': {e}")
|
229 |
-
temp_results.append((keyword, results[0][1], results[0][2], results[0][3], "Error"))
|
230 |
-
results = temp_results
|
231 |
-
|
232 |
-
return results
|
233 |
-
|
234 |
-
def save_to_excel(results, keyword):
|
235 |
-
df = pd.DataFrame(results, columns=["ํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋", "๋ธ๋ก๊ทธ๋ฌธ์์"])
|
236 |
-
now = datetime.now().strftime('%Y-%m-%d')
|
237 |
-
sanitized_keyword = keyword.replace(' ', '_')
|
238 |
-
filename = f"{now}_{sanitized_keyword}_์ฐ๊ด๊ฒ์์ด.xlsx"
|
239 |
-
file_path = os.path.join(tempfile.gettempdir(), filename)
|
240 |
-
df.to_excel(file_path, index=False)
|
241 |
-
return file_path
|
242 |
-
|
243 |
-
def display_search_volumes(keywords, include_related):
|
244 |
-
keyword_list = [keyword.strip() for keyword in keywords.split(',') if keyword.strip()]
|
245 |
-
if not keyword_list:
|
246 |
-
return [("Error", "์
๋ ฅ๋ ํค์๋๊ฐ ์์ต๋๋ค.", "", "", "")], None
|
247 |
-
results = get_monthly_search_volumes(keyword_list, include_related_keywords=include_related)
|
248 |
-
file_path = save_to_excel(results, keywords)
|
249 |
-
return results, file_path
|
250 |
|
|
|
251 |
iface = gr.Interface(
|
252 |
-
fn=
|
253 |
-
inputs=[
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
outputs=[
|
258 |
-
gr.Dataframe(headers=["ํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋", "๋ธ๋ก๊ทธ๋ฌธ์์"]),
|
259 |
-
gr.File(label="๋ค์ด๋ก๋ ์์
ํ์ผ")
|
260 |
-
],
|
261 |
-
title="๋ค์ด๋ฒ ์๊ฒ์๋ ๊ฒ์๊ธฐ",
|
262 |
-
description="ํค์๋์ ์ ๊ฒ์๋๊ณผ ๋ธ๋ก๊ทธ ๋ฌธ์ ์๋ฅผ ํ์ธํ ์ ์์ต๋๋ค. ์ฐ๊ด๊ฒ์์ด๋ฅผ ํฌํจํ ์ง ์ ํํ์ธ์.",
|
263 |
)
|
264 |
|
265 |
-
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import re
|
4 |
+
from collections import Counter
|
5 |
+
import os
|
6 |
+
from openpyxl import load_workbook
|
7 |
+
from openpyxl.drawing.image import Image
|
8 |
import time
|
9 |
import hashlib
|
10 |
import hmac
|
11 |
import base64
|
12 |
import requests
|
|
|
13 |
import urllib.request
|
14 |
import urllib.parse
|
15 |
import json
|
|
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
17 |
import tempfile
|
18 |
from datetime import datetime
|
19 |
from dotenv import load_dotenv # dotenv ์ถ๊ฐ
|
|
|
104 |
print(f"Error fetching blog count for keyword '{keyword}': {e}")
|
105 |
return 0
|
106 |
|
107 |
+
def get_search_volumes(keyword, api):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
"""
|
109 |
๋จ์ผ ํค์๋์ ์ ๊ฒ์๋์ ๊ฐ์ ธ์ค๋ ํจ์.
|
110 |
"""
|
|
|
111 |
try:
|
112 |
data = api.get_keywords_data([keyword])
|
113 |
if 'keywordList' in data and len(data['keywordList']) > 0:
|
|
|
131 |
monthly_mobile = 0
|
132 |
|
133 |
total_searches = monthly_pc + monthly_mobile
|
134 |
+
blog_count = get_blog_count(keyword)
|
135 |
+
return (monthly_pc, monthly_mobile, total_searches, blog_count)
|
136 |
# ์
๋ ฅํ ํค์๋์ ์ผ์นํ๋ ํญ๋ชฉ์ด ์์ ๊ฒฝ์ฐ
|
137 |
+
return (0, 0, 0, 0)
|
138 |
else:
|
139 |
+
return (0, 0, 0, 0)
|
140 |
except Exception as e:
|
141 |
print(f"Error fetching search volumes for keyword '{keyword}': {e}")
|
142 |
+
return (0, 0, 0, 0)
|
143 |
+
|
144 |
+
def process_excel(file):
|
145 |
+
# ์์
ํ์ผ ์ฝ๊ธฐ
|
146 |
+
df = pd.read_excel(file.name)
|
147 |
+
|
148 |
+
# D์ด์ ๋ฐ์ดํฐ ์ถ์ถ
|
149 |
+
product_names = df.iloc[:, 3].dropna() # D์ด์ 0๋ถํฐ ์์ํ๋ฏ๋ก index๋ 3
|
150 |
+
|
151 |
+
# ํค์๋ ์ถ์ถ ๋ฐ ๋น๋ ๊ณ์ฐ
|
152 |
+
all_keywords = []
|
153 |
+
|
154 |
+
for name in product_names:
|
155 |
+
# ํน์๋ฌธ์ ์ ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค์ผ๋ก ๋ถํ
|
156 |
+
words = re.sub(r'[^\w\s]', '', name).split()
|
157 |
+
# ์ค๋ณต ์ ๊ฑฐ
|
158 |
+
unique_words = set(words)
|
159 |
+
all_keywords.extend(unique_words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
+
# ๋น๋ ๊ณ์ฐ
|
162 |
+
keyword_counts = Counter(all_keywords)
|
163 |
+
|
164 |
+
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์ ๋ฆฌ
|
165 |
+
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
|
166 |
+
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
|
167 |
+
|
168 |
+
# ๋๋ ํ ๋ฆฌ ์์ฑ ํ์ธ ๋ฐ ํ์ผ ์ ์ฅ
|
169 |
+
output_dir = "output"
|
170 |
+
if not os.path.exists(output_dir):
|
171 |
+
os.makedirs(output_dir)
|
172 |
+
|
173 |
+
output_file = os.path.join(output_dir, "keyword_counts.xlsx")
|
174 |
+
|
175 |
+
# ์์
ํ์ผ์ ๋ฐ์ดํฐ๋ฅผ A4, B4 ์
๋ถํฐ ์ฐ๊ธฐ
|
176 |
+
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
177 |
+
result_df.to_excel(writer, index=False, startrow=3) # startrow=3์ผ๋ก ์ค์ ํ์ฌ 4๋ฒ์งธ ํ(A4, B4)๋ถํฐ ์์
|
178 |
+
|
179 |
+
# ์ด๋ฏธ์ง๋ฅผ ์์
ํ์ผ์ A1 ์
์ ์ฝ์
|
180 |
+
wb = load_workbook(output_file)
|
181 |
+
ws = wb.active
|
182 |
+
|
183 |
+
# ssboost-logo.png ํ์ผ์ A1 ์
์ ์ฝ์
|
184 |
+
if os.path.exists("ssboost-logo.png"):
|
185 |
+
img = Image("ssboost-logo.png")
|
186 |
+
|
187 |
+
# ์ด๋ฏธ์ง ํฌ๊ธฐ ์ค์ (1.54cm ๋์ด, 5.69cm ๋๋น)
|
188 |
+
img.height = int(1.54 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํธ
|
189 |
+
img.width = int(5.69 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํธ
|
190 |
+
|
191 |
+
ws.add_image(img, "A1")
|
192 |
else:
|
193 |
+
print("ssboost-logo.png ํ์ผ์ด ์กด์ฌํ์ง ์์ต๋๋ค. ์ด๋ฏธ์ง๋ฅผ ์ฝ์
ํ์ง ์์ต๋๋ค.")
|
194 |
+
|
195 |
+
# Naver API๋ฅผ ์ฌ์ฉํ์ฌ ํค์๋ ๋ถ์ ์ถ๊ฐ
|
196 |
+
api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
|
197 |
+
|
198 |
+
# ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ C์ด๋ถํฐ ์ถ๊ฐ
|
199 |
+
for idx, row in result_df.iterrows():
|
200 |
+
keyword = row['Keyword']
|
201 |
+
monthly_pc, monthly_mobile, total_searches, blog_count = get_search_volumes(keyword, api)
|
202 |
+
excel_row = idx + 5 # A5๋ถํฐ ์์
|
203 |
+
ws.cell(row=excel_row, column=3, value=monthly_pc) # C์ด: PC์๊ฒ์๋
|
204 |
+
ws.cell(row=excel_row, column=4, value=monthly_mobile) # D์ด: ๋ชจ๋ฐ์ผ์๊ฒ์๋
|
205 |
+
ws.cell(row=excel_row, column=5, value=total_searches) # E์ด: ํ ํ์๊ฒ์๋
|
206 |
+
ws.cell(row=excel_row, column=6, value=blog_count) # F์ด: ๋ธ๋ก๊ทธ๋ฌธ์์
|
207 |
+
|
208 |
+
# ์์
ํ์ผ ์ ์ฅ
|
209 |
+
wb.save(output_file)
|
210 |
+
|
211 |
+
return output_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
+
# Gradio ์ธํฐํ์ด์ค ์ ์
|
214 |
iface = gr.Interface(
|
215 |
+
fn=process_excel,
|
216 |
+
inputs=gr.File(file_types=[".xlsx"]), # ์์
ํ์ผ๋ง ์
๋ก๋ํ ์ ์๊ฒ ์ค์
|
217 |
+
outputs="file",
|
218 |
+
title="Excel Keyword Extractor with Naver Analysis",
|
219 |
+
description="์์
ํ์ผ์ D์ด์์ ํค์๋๋ฅผ ์ถ์ถํ๊ณ ๋น๋๋ฅผ ๊ณ์ฐํ ํ, ๊ฐ ํค์๋์ ๊ฒ์๋ ๋ฐ ๋ธ๋ก๊ทธ ๋ฌธ์ ์๋ฅผ ๋ถ์ํ์ฌ ์๋ก์ด ์์
ํ์ผ๋ก ์ถ๋ ฅํฉ๋๋ค."
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
)
|
221 |
|
222 |
+
if __name__ == "__main__":
|
223 |
+
iface.launch()
|