Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,15 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import pandas as pd
|
3 |
-
import re
|
4 |
-
from collections import Counter
|
5 |
-
import os
|
6 |
-
from openpyxl import load_workbook
|
7 |
-
from openpyxl.drawing.image import Image
|
8 |
import time
|
9 |
import hashlib
|
10 |
import hmac
|
11 |
import base64
|
12 |
import requests
|
|
|
13 |
import urllib.request
|
14 |
import urllib.parse
|
15 |
import json
|
|
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
17 |
import tempfile
|
18 |
from datetime import datetime
|
19 |
from dotenv import load_dotenv # dotenv ์ถ๊ฐ
|
@@ -79,13 +75,6 @@ def get_blog_count(keyword):
|
|
79 |
# ํด๋ผ์ด์ธํธ ID์ ์ํฌ๋ฆฟ์ ํ๊ฒฝ ๋ณ์์์ ๋ถ๋ฌ์ต๋๋ค.
|
80 |
client_id = CLIENT_ID
|
81 |
client_secret = CLIENT_SECRET
|
82 |
-
|
83 |
-
# keyword๊ฐ ๋ฐ์ดํธ ํ์
์ผ ๊ฒฝ์ฐ ๋์ฝ๋ฉ
|
84 |
-
if isinstance(keyword, bytes):
|
85 |
-
keyword = keyword.decode('utf-8')
|
86 |
-
elif not isinstance(keyword, str):
|
87 |
-
keyword = str(keyword)
|
88 |
-
|
89 |
encText = urllib.parse.quote(keyword)
|
90 |
url = "https://openapi.naver.com/v1/search/blog?query=" + encText
|
91 |
request = urllib.request.Request(url)
|
@@ -97,127 +86,94 @@ def get_blog_count(keyword):
|
|
97 |
if rescode == 200:
|
98 |
response_body = response.read()
|
99 |
data = json.loads(response_body.decode('utf-8'))
|
100 |
-
return data
|
101 |
else:
|
102 |
return 0
|
103 |
except Exception as e:
|
104 |
print(f"Error fetching blog count for keyword '{keyword}': {e}")
|
105 |
return 0
|
106 |
|
107 |
-
def
|
108 |
-
"""
|
109 |
-
๋จ์ผ ํค์๋์ ์ ๊ฒ์๋์ ๊ฐ์ ธ์ค๋ ํจ์.
|
110 |
-
"""
|
111 |
-
try:
|
112 |
-
data = api.get_keywords_data([keyword])
|
113 |
-
if 'keywordList' in data and len(data['keywordList']) > 0:
|
114 |
-
# keywordList์์ ์
๋ ฅํ ํค์๋์ ์ผ์นํ๋ ํญ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
115 |
-
for item in data['keywordList']:
|
116 |
-
if item['relKeyword'].strip().lower() == keyword.strip().lower():
|
117 |
-
monthly_pc = item.get('monthlyPcQcCnt', 0)
|
118 |
-
monthly_mobile = item.get('monthlyMobileQcCnt', 0)
|
119 |
-
|
120 |
-
if isinstance(monthly_pc, str):
|
121 |
-
monthly_pc = monthly_pc.replace(',', '').replace('< 10', '0')
|
122 |
-
try:
|
123 |
-
monthly_pc = int(monthly_pc)
|
124 |
-
except ValueError:
|
125 |
-
monthly_pc = 0
|
126 |
-
if isinstance(monthly_mobile, str):
|
127 |
-
monthly_mobile = monthly_mobile.replace(',', '').replace('< 10', '0')
|
128 |
-
try:
|
129 |
-
monthly_mobile = int(monthly_mobile)
|
130 |
-
except ValueError:
|
131 |
-
monthly_mobile = 0
|
132 |
-
|
133 |
-
total_searches = monthly_pc + monthly_mobile
|
134 |
-
blog_count = get_blog_count(keyword)
|
135 |
-
return (monthly_pc, monthly_mobile, total_searches, blog_count)
|
136 |
-
# ์
๋ ฅํ ํค์๋์ ์ผ์นํ๋ ํญ๋ชฉ์ด ์์ ๊ฒฝ์ฐ
|
137 |
-
return (0, 0, 0, 0)
|
138 |
-
else:
|
139 |
-
return (0, 0, 0, 0)
|
140 |
-
except Exception as e:
|
141 |
-
print(f"Error fetching search volumes for keyword '{keyword}': {e}")
|
142 |
-
return (0, 0, 0, 0)
|
143 |
-
|
144 |
-
def process_excel(file):
|
145 |
-
# ์์
ํ์ผ ์ฝ๊ธฐ
|
146 |
-
df = pd.read_excel(file.name)
|
147 |
-
|
148 |
-
# D์ด์ ๋ฐ์ดํฐ ์ถ์ถ
|
149 |
-
product_names = df.iloc[:, 3].dropna() # D์ด์ 0๋ถํฐ ์์ํ๋ฏ๋ก index๋ 3
|
150 |
-
|
151 |
-
# ํค์๋ ์ถ์ถ ๋ฐ ๋น๋ ๊ณ์ฐ
|
152 |
-
all_keywords = []
|
153 |
-
|
154 |
-
for name in product_names:
|
155 |
-
# ํน์๋ฌธ์ ์ ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค์ผ๋ก ๋ถํ
|
156 |
-
words = re.sub(r'[^\w\s]', '', name).split()
|
157 |
-
# ์ค๋ณต ์ ๊ฑฐ
|
158 |
-
unique_words = set(words)
|
159 |
-
all_keywords.extend(unique_words)
|
160 |
-
|
161 |
-
# ๋น๋ ๊ณ์ฐ
|
162 |
-
keyword_counts = Counter(all_keywords)
|
163 |
-
|
164 |
-
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์ ๋ฆฌ
|
165 |
-
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
|
166 |
-
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
|
167 |
-
|
168 |
-
# ๋๋ ํ ๋ฆฌ ์์ฑ ํ์ธ ๋ฐ ํ์ผ ์ ์ฅ
|
169 |
-
output_dir = "output"
|
170 |
-
if not os.path.exists(output_dir):
|
171 |
-
os.makedirs(output_dir)
|
172 |
-
|
173 |
-
output_file = os.path.join(output_dir, "keyword_counts.xlsx")
|
174 |
-
|
175 |
-
# ์์
ํ์ผ์ ๋ฐ์ดํฐ๋ฅผ A4, B4 ์
๋ถํฐ ์ฐ๊ธฐ
|
176 |
-
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
177 |
-
result_df.to_excel(writer, index=False, startrow=3) # startrow=3์ผ๋ก ์ค์ ํ์ฌ 4๋ฒ์งธ ํ(A4, B4)๋ถํฐ ์์
|
178 |
-
|
179 |
-
# ์ด๋ฏธ์ง๋ฅผ ์์
ํ์ผ์ A1 ์
์ ์ฝ์
|
180 |
-
wb = load_workbook(output_file)
|
181 |
-
ws = wb.active
|
182 |
-
|
183 |
-
# ssboost-logo.png ํ์ผ์ A1 ์
์ ์ฝ์
|
184 |
-
if os.path.exists("ssboost-logo.png"):
|
185 |
-
img = Image("ssboost-logo.png")
|
186 |
-
|
187 |
-
# ์ด๋ฏธ์ง ํฌ๊ธฐ ์ค์ (1.54cm ๋์ด, 5.69cm ๋๋น)
|
188 |
-
img.height = int(1.54 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํธ
|
189 |
-
img.width = int(5.69 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํธ
|
190 |
-
|
191 |
-
ws.add_image(img, "A1")
|
192 |
-
else:
|
193 |
-
print("ssboost-logo.png ๏ฟฝ๏ฟฝ๏ฟฝ์ผ์ด ์กด์ฌํ์ง ์์ต๋๋ค. ์ด๋ฏธ์ง๋ฅผ ์ฝ์
ํ์ง ์์ต๋๋ค.")
|
194 |
-
|
195 |
-
# Naver API๋ฅผ ์ฌ์ฉํ์ฌ ํค์๋ ๋ถ์ ์ถ๊ฐ
|
196 |
api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
-
# ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ C์ด๋ถํฐ ์ถ๊ฐ
|
199 |
-
for idx, row in result_df.iterrows():
|
200 |
-
keyword = row['Keyword']
|
201 |
-
monthly_pc, monthly_mobile, total_searches, blog_count = get_search_volumes(keyword, api)
|
202 |
-
excel_row = idx + 5 # A5๋ถํฐ ์์
|
203 |
-
ws.cell(row=excel_row, column=3, value=monthly_pc) # C์ด: PC์๊ฒ์๋
|
204 |
-
ws.cell(row=excel_row, column=4, value=monthly_mobile) # D์ด: ๋ชจ๋ฐ์ผ์๊ฒ์๋
|
205 |
-
ws.cell(row=excel_row, column=5, value=total_searches) # E์ด: ํ ํ์๊ฒ์๋
|
206 |
-
ws.cell(row=excel_row, column=6, value=blog_count) # F์ด: ๋ธ๋ก๊ทธ๋ฌธ์์
|
207 |
-
|
208 |
-
# ์์
ํ์ผ ์ ์ฅ
|
209 |
-
wb.save(output_file)
|
210 |
-
|
211 |
-
return output_file
|
212 |
-
|
213 |
-
# Gradio ์ธํฐํ์ด์ค ์ ์
|
214 |
iface = gr.Interface(
|
215 |
-
fn=
|
216 |
-
inputs=gr.
|
217 |
-
outputs=
|
218 |
-
|
219 |
-
|
|
|
|
|
220 |
)
|
221 |
|
222 |
-
|
223 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import time
|
2 |
import hashlib
|
3 |
import hmac
|
4 |
import base64
|
5 |
import requests
|
6 |
+
import gradio as gr
|
7 |
import urllib.request
|
8 |
import urllib.parse
|
9 |
import json
|
10 |
+
import pandas as pd
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
+
import os
|
13 |
import tempfile
|
14 |
from datetime import datetime
|
15 |
from dotenv import load_dotenv # dotenv ์ถ๊ฐ
|
|
|
75 |
# ํด๋ผ์ด์ธํธ ID์ ์ํฌ๋ฆฟ์ ํ๊ฒฝ ๋ณ์์์ ๋ถ๋ฌ์ต๋๋ค.
|
76 |
client_id = CLIENT_ID
|
77 |
client_secret = CLIENT_SECRET
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
encText = urllib.parse.quote(keyword)
|
79 |
url = "https://openapi.naver.com/v1/search/blog?query=" + encText
|
80 |
request = urllib.request.Request(url)
|
|
|
86 |
if rescode == 200:
|
87 |
response_body = response.read()
|
88 |
data = json.loads(response_body.decode('utf-8'))
|
89 |
+
return data['total']
|
90 |
else:
|
91 |
return 0
|
92 |
except Exception as e:
|
93 |
print(f"Error fetching blog count for keyword '{keyword}': {e}")
|
94 |
return 0
|
95 |
|
96 |
+
def get_keywords_data_chunk(chunk):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
|
98 |
+
return api.get_keywords_data(chunk)
|
99 |
+
|
100 |
+
def get_blog_count_parallel(keyword):
|
101 |
+
return (keyword, get_blog_count(keyword))
|
102 |
+
|
103 |
+
def get_monthly_search_volumes(keywords):
|
104 |
+
all_data = []
|
105 |
+
chunk_size = 10 # ํค์๋๋ฅผ 10๊ฐ์ฉ ๋๋์ด ์์ฒญ
|
106 |
+
|
107 |
+
# API ๋ณ๋ ฌ ์์ฒญ
|
108 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
109 |
+
futures = [executor.submit(get_keywords_data_chunk, keywords[i:i+chunk_size]) for i in range(0, len(keywords), chunk_size)]
|
110 |
+
for future in futures:
|
111 |
+
try:
|
112 |
+
data = future.result()
|
113 |
+
if 'keywordList' in data:
|
114 |
+
all_data.extend(data['keywordList'])
|
115 |
+
except Exception as e:
|
116 |
+
print(f"Error fetching keywords data chunk: {e}")
|
117 |
+
|
118 |
+
if not all_data:
|
119 |
+
return [("Error", "๋ฐ์ดํฐ๊ฐ ๋ฐํ๋์ง ์์๊ฑฐ๋ API ์๋ต์ด ์ ํจํ์ง ์์ต๋๋ค.", "", "", "")] # ๋ธ๋ก๊ทธ ๋ฌธ์ ์ ์นผ๋ผ ์ถ๊ฐ
|
120 |
+
|
121 |
+
results = []
|
122 |
+
unique_keywords = set()
|
123 |
+
for item in all_data:
|
124 |
+
keyword = item['relKeyword']
|
125 |
+
if keyword not in unique_keywords:
|
126 |
+
unique_keywords.add(keyword)
|
127 |
+
monthly_pc = item['monthlyPcQcCnt']
|
128 |
+
monthly_mobile = item['monthlyMobileQcCnt']
|
129 |
+
|
130 |
+
if isinstance(monthly_pc, str):
|
131 |
+
monthly_pc = int(monthly_pc.replace(',', '').replace('< 10', '0'))
|
132 |
+
if isinstance(monthly_mobile, str):
|
133 |
+
monthly_mobile = int(monthly_mobile.replace(',', '').replace('< 10', '0'))
|
134 |
+
|
135 |
+
total_searches = monthly_pc + monthly_mobile
|
136 |
+
results.append((keyword, monthly_pc, monthly_mobile, total_searches))
|
137 |
+
|
138 |
+
if len(results) >= 100:
|
139 |
+
break
|
140 |
+
|
141 |
+
# ๋ธ๋ก๊ทธ ๋ฌธ์ ์ ๋ณ๋ ฌ ์์ฒญ
|
142 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
143 |
+
blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
|
144 |
+
for i, future in enumerate(blog_futures):
|
145 |
+
try:
|
146 |
+
keyword, blog_count = future.result()
|
147 |
+
results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], blog_count)
|
148 |
+
except Exception as e:
|
149 |
+
print(f"Error fetching blog count for keyword '{results[i][0]}': {e}")
|
150 |
+
results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], "Error")
|
151 |
+
|
152 |
+
return results
|
153 |
+
|
154 |
+
def save_to_excel(results, keyword):
|
155 |
+
df = pd.DataFrame(results, columns=["ํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋", "๋ธ๋ก๊ทธ๋ฌธ์์"])
|
156 |
+
now = datetime.now().strftime('%Y-%m-%d')
|
157 |
+
sanitized_keyword = keyword.replace(' ', '_')
|
158 |
+
filename = f"{now}_{sanitized_keyword}_์ฐ๊ด๊ฒ์์ด.xlsx"
|
159 |
+
file_path = os.path.join(tempfile.gettempdir(), filename)
|
160 |
+
df.to_excel(file_path, index=False)
|
161 |
+
return file_path
|
162 |
+
|
163 |
+
def display_search_volumes(keywords):
|
164 |
+
keyword_list = [keyword.strip() for keyword in keywords.split(',')]
|
165 |
+
results = get_monthly_search_volumes(keyword_list)
|
166 |
+
file_path = save_to_excel(results, keywords)
|
167 |
+
return results, file_path
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
iface = gr.Interface(
|
170 |
+
fn=display_search_volumes,
|
171 |
+
inputs=gr.Textbox(placeholder="ํค์๋๋ฅผ ์
๋ ฅํ์ธ์"),
|
172 |
+
outputs=[
|
173 |
+
gr.Dataframe(headers=["ํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋", "๋ธ๋ก๊ทธ๋ฌธ์์"]),
|
174 |
+
gr.File(label="๋ค์ด๋ก๋ ์์
ํ์ผ")
|
175 |
+
],
|
176 |
+
title="๋ค์ด๋ฒ ์๊ฒ์๋ ๊ฒ์๊ธฐ",
|
177 |
)
|
178 |
|
179 |
+
iface.launch(share=True) # share=True๋ฅผ ์ถ๊ฐํ์ฌ ๊ณต๊ฐ ๋งํฌ ์์ฑ
|
|