Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import time
|
3 |
import hmac
|
4 |
import hashlib
|
5 |
import base64
|
6 |
import requests
|
7 |
-
import pandas as pd
|
8 |
-
import tempfile
|
9 |
-
import gradio as gr
|
10 |
|
11 |
-
# λ€μ΄λ² κ΄κ³ API
|
12 |
def generate_signature(timestamp, method, uri, secret_key):
|
13 |
message = f"{timestamp}.{method}.{uri}"
|
14 |
digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
|
15 |
return base64.b64encode(digest).decode()
|
16 |
|
17 |
-
# λ€μ΄λ² κ΄κ³ API νΈμΆ ν€λ μμ± ν¨μ
|
18 |
def get_header(method, uri, api_key, secret_key, customer_id):
|
19 |
timestamp = str(round(time.time() * 1000))
|
20 |
signature = generate_signature(timestamp, method, uri, secret_key)
|
@@ -26,13 +27,14 @@ def get_header(method, uri, api_key, secret_key, customer_id):
|
|
26 |
"X-Signature": signature
|
27 |
}
|
28 |
|
29 |
-
# λ€μ΄λ² κ΄κ³ API
|
30 |
def fetch_related_keywords(keyword):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
BASE_URL = "https://api.naver.com"
|
37 |
uri = "/keywordstool"
|
38 |
method = "GET"
|
@@ -41,18 +43,20 @@ def fetch_related_keywords(keyword):
|
|
41 |
"hintKeywords": [keyword],
|
42 |
"showDetail": "1"
|
43 |
}
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
46 |
if "keywordList" not in data:
|
47 |
return pd.DataFrame()
|
48 |
df = pd.DataFrame(data["keywordList"])
|
49 |
if len(df) > 100:
|
50 |
df = df.head(100)
|
51 |
-
|
52 |
def parse_count(x):
|
53 |
try:
|
54 |
-
|
55 |
-
return int(x_str)
|
56 |
except:
|
57 |
return 0
|
58 |
|
@@ -63,93 +67,152 @@ def fetch_related_keywords(keyword):
|
|
63 |
result_df = df[["μ 보ν€μλ", "PCμκ²μλ", "λͺ¨λ°μΌμκ²μλ", "ν νμκ²μλ"]]
|
64 |
return result_df
|
65 |
|
66 |
-
# λ€μ΄λ² κ²μ
|
67 |
def fetch_blog_count(keyword):
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
71 |
url = "https://openapi.naver.com/v1/search/blog.json"
|
72 |
headers = {
|
73 |
"X-Naver-Client-Id": client_id,
|
74 |
"X-Naver-Client-Secret": client_secret
|
75 |
}
|
76 |
params = {"query": keyword, "display": 1}
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
82 |
return 0
|
83 |
|
84 |
-
|
85 |
-
def create_excel_file(df):
|
86 |
-
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
|
87 |
-
excel_path = tmp.name
|
88 |
-
df.to_excel(excel_path, index=False)
|
89 |
-
return excel_path
|
90 |
-
|
91 |
-
# μ
λ ₯λ μ¬λ¬ ν€μλλ₯Ό μ²λ¦¬νλ ν¨μ
|
92 |
-
def process_keyword(keywords: str, include_related: bool):
|
93 |
"""
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
"""
|
99 |
-
#
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
)
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
include_checkbox = gr.Checkbox(label="μ°κ΄κ²μμ΄ ν¬ν¨ (첫λ²μ§Έ ν€μλμ νν¨)", value=False)
|
145 |
-
search_button = gr.Button("κ²μ")
|
146 |
-
|
147 |
-
with gr.Row():
|
148 |
-
df_output = gr.Dataframe(label="κ²μ κ²°κ³Ό")
|
149 |
-
excel_output = gr.File(label="μμ
λ€μ΄λ‘λ")
|
150 |
-
|
151 |
-
# λ²νΌ ν΄λ¦ μ process_keyword ν¨μ μ€ν
|
152 |
-
search_button.click(fn=process_keyword, inputs=[keyword_input, include_checkbox], outputs=[df_output, excel_output])
|
153 |
-
|
154 |
-
# μ± μ€ν (Hugging Face Spaces λ°°ν¬ κ°λ₯)
|
155 |
-
demo.launch()
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import re
|
4 |
+
from io import BytesIO
|
5 |
+
import tempfile
|
6 |
import os
|
7 |
import time
|
8 |
import hmac
|
9 |
import hashlib
|
10 |
import base64
|
11 |
import requests
|
|
|
|
|
|
|
12 |
|
13 |
+
# --- λ€μ΄λ² κ΄κ³ API: μλͺ
μμ± λ° ν€λ κ΅¬μ± ---
|
14 |
def generate_signature(timestamp, method, uri, secret_key):
|
15 |
message = f"{timestamp}.{method}.{uri}"
|
16 |
digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
|
17 |
return base64.b64encode(digest).decode()
|
18 |
|
|
|
19 |
def get_header(method, uri, api_key, secret_key, customer_id):
|
20 |
timestamp = str(round(time.time() * 1000))
|
21 |
signature = generate_signature(timestamp, method, uri, secret_key)
|
|
|
27 |
"X-Signature": signature
|
28 |
}
|
29 |
|
30 |
+
# --- λ€μ΄λ² κ΄κ³ API: κ²μλ μ‘°ν (μ°κ΄κ²μμ΄ μ μΈ) ---
|
31 |
def fetch_related_keywords(keyword):
|
32 |
+
API_KEY = os.environ.get("NAVER_API_KEY")
|
33 |
+
SECRET_KEY = os.environ.get("NAVER_SECRET_KEY")
|
34 |
+
CUSTOMER_ID = os.environ.get("NAVER_CUSTOMER_ID")
|
35 |
+
|
36 |
+
if not API_KEY or not SECRET_KEY or not CUSTOMER_ID:
|
37 |
+
return pd.DataFrame()
|
38 |
BASE_URL = "https://api.naver.com"
|
39 |
uri = "/keywordstool"
|
40 |
method = "GET"
|
|
|
43 |
"hintKeywords": [keyword],
|
44 |
"showDetail": "1"
|
45 |
}
|
46 |
+
try:
|
47 |
+
response = requests.get(BASE_URL + uri, params=params, headers=headers)
|
48 |
+
data = response.json()
|
49 |
+
except Exception as e:
|
50 |
+
return pd.DataFrame()
|
51 |
if "keywordList" not in data:
|
52 |
return pd.DataFrame()
|
53 |
df = pd.DataFrame(data["keywordList"])
|
54 |
if len(df) > 100:
|
55 |
df = df.head(100)
|
56 |
+
|
57 |
def parse_count(x):
|
58 |
try:
|
59 |
+
return int(str(x).replace(",", ""))
|
|
|
60 |
except:
|
61 |
return 0
|
62 |
|
|
|
67 |
result_df = df[["μ 보ν€μλ", "PCμκ²μλ", "λͺ¨λ°μΌμκ²μλ", "ν νμκ²μλ"]]
|
68 |
return result_df
|
69 |
|
70 |
+
# --- λ€μ΄λ² κ²μ API: λΈλ‘κ·Έ λ¬Έμμ μ‘°ν ---
|
71 |
def fetch_blog_count(keyword):
|
72 |
+
client_id = os.environ.get("NAVER_SEARCH_CLIENT_ID")
|
73 |
+
client_secret = os.environ.get("NAVER_SEARCH_CLIENT_SECRET")
|
74 |
+
if not client_id or not client_secret:
|
75 |
+
return 0
|
76 |
url = "https://openapi.naver.com/v1/search/blog.json"
|
77 |
headers = {
|
78 |
"X-Naver-Client-Id": client_id,
|
79 |
"X-Naver-Client-Secret": client_secret
|
80 |
}
|
81 |
params = {"query": keyword, "display": 1}
|
82 |
+
try:
|
83 |
+
response = requests.get(url, headers=headers, params=params)
|
84 |
+
if response.status_code == 200:
|
85 |
+
data = response.json()
|
86 |
+
return data.get("total", 0)
|
87 |
+
else:
|
88 |
+
return 0
|
89 |
+
except:
|
90 |
return 0
|
91 |
|
92 |
+
def process_excel(file_bytes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
"""
|
94 |
+
μ
λ‘λλ μμ
νμΌμμ D4μ
λΆν° Dμ΄μ μνλͺ
μ μΆμΆνμ¬,
|
95 |
+
κ° μ
μμ νΉμλ¬Έμλ₯Ό μ κ±°ν ν 곡백 κΈ°μ€μΌλ‘ ν€μλλ₯Ό μΆμΆν©λλ€.
|
96 |
+
ν μ
λ΄μμ μ€λ³΅λ ν€μλλ ν λ²λ§ μΉ΄μ΄νΈνκ³ , μ 체 μ
μ λν΄
|
97 |
+
ν€μλμ λΉλλ₯Ό κ³μ°ν©λλ€.
|
98 |
+
|
99 |
+
μ΄ν, κ° ν€μλμ λν΄ λ€μ΄λ² APIλ₯Ό νμ©νμ¬
|
100 |
+
- PCμκ²μλ, λͺ¨λ°μΌμκ²μλ, ν νμκ²μλ λ°
|
101 |
+
- λ€μ΄λ² κ²μ APIλ₯Ό ν΅ν λΈλ‘κ·Έ λ¬Έμμλ₯Ό μ‘°ννμ¬
|
102 |
+
κ²°κ³Ό μμ
νμΌκ³Ό λ°μ΄ν°νλ μμΌλ‘ μΆλ ₯ν©λλ€.
|
103 |
+
|
104 |
+
μ΅μ’
μμ
νμΌμ μ΄ κ΅¬μ±μ λ€μκ³Ό κ°μ΅λλ€.
|
105 |
+
Aμ΄ : ν€μλ
|
106 |
+
Bμ΄ : λΉλμ
|
107 |
+
Cμ΄ : PCμκ²μλ
|
108 |
+
Dμ΄ : λͺ¨λ°μΌμκ²μλ
|
109 |
+
Eμ΄ : ν νμκ²μλ
|
110 |
+
Fμ΄ : λΈλ‘κ·Έλ¬Έμμ
|
111 |
+
|
112 |
+
μλ¬ λ°μ μ, μλ¬ λ©μμ§λ₯Ό ν
μ€νΈ νμΌκ³Ό λ°μ΄ν°νλ μ ννλ‘ λ°νν©λλ€.
|
113 |
"""
|
114 |
+
# μμ
νμΌ μ½κΈ°
|
115 |
+
try:
|
116 |
+
df = pd.read_excel(BytesIO(file_bytes), header=None, engine="openpyxl")
|
117 |
+
except Exception as e:
|
118 |
+
error_message = "μμ
νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμμ΅λλ€: " + str(e)
|
119 |
+
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
|
120 |
+
temp_error.write(error_message.encode("utf-8"))
|
121 |
+
temp_error.close()
|
122 |
+
error_df = pd.DataFrame({"μλ¬": [error_message]})
|
123 |
+
return temp_error.name, error_df
|
124 |
+
|
125 |
+
# μμ
νμΌ νμ μ²΄ν¬ (μ΅μ 4μ΄, μ΅μ 4ν)
|
126 |
+
if df.shape[1] < 4 or df.shape[0] < 4:
|
127 |
+
error_message = "μμ
νμΌμ νμμ΄ μ¬λ°λ₯΄μ§ μμ΅λλ€."
|
128 |
+
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
|
129 |
+
temp_error.write(error_message.encode("utf-8"))
|
130 |
+
temp_error.close()
|
131 |
+
error_df = pd.DataFrame({"μλ¬": [error_message]})
|
132 |
+
return temp_error.name, error_df
|
133 |
+
|
134 |
+
# Dμ΄(4λ²μ§Έ μ΄, μΈλ±μ€ 3)μμ 4ν(μΈλ±μ€ 3)λΆν° λ°μ΄ν°λ₯Ό κ°μ Έμ΄
|
135 |
+
product_names_series = df.iloc[3:, 3]
|
136 |
+
product_names_series = product_names_series.dropna()
|
137 |
+
|
138 |
+
keyword_counts = {}
|
139 |
+
for cell in product_names_series:
|
140 |
+
if not isinstance(cell, str):
|
141 |
+
cell = str(cell)
|
142 |
+
cleaned = re.sub(r'[^0-9a-zA-Zκ°-ν£\s]', '', cell)
|
143 |
+
keywords = cleaned.split()
|
144 |
+
unique_keywords = set(keywords)
|
145 |
+
for keyword in unique_keywords:
|
146 |
+
keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
|
147 |
+
|
148 |
+
sorted_keywords = sorted(keyword_counts.items(), key=lambda x: (-x[1], x[0]))
|
149 |
+
|
150 |
+
# κ° ν€μλμ λν΄ λ€μ΄λ² APIλ₯Ό νμ©νμ¬ κ²μλ λ° λΈλ‘κ·Έ λ¬Έμμ μ‘°ν
|
151 |
+
result_data = []
|
152 |
+
for keyword, count in sorted_keywords:
|
153 |
+
pc_search = 0
|
154 |
+
mobile_search = 0
|
155 |
+
total_search = 0
|
156 |
+
df_api = fetch_related_keywords(keyword)
|
157 |
+
if not df_api.empty:
|
158 |
+
row = df_api[df_api["μ 보ν€μλ"] == keyword]
|
159 |
+
if row.empty:
|
160 |
+
row = df_api.iloc[[0]]
|
161 |
+
pc_search = int(row["PCμκ²μλ"].iloc[0])
|
162 |
+
mobile_search = int(row["λͺ¨λ°μΌμκ²μλ"].iloc[0])
|
163 |
+
total_search = int(row["ν νμκ²μλ"].iloc[0])
|
164 |
+
blog_count = fetch_blog_count(keyword)
|
165 |
+
result_data.append({
|
166 |
+
"ν€μλ": keyword,
|
167 |
+
"λΉλμ": count,
|
168 |
+
"PCμκ²μλ": pc_search,
|
169 |
+
"λͺ¨λ°μΌμκ²μλ": mobile_search,
|
170 |
+
"ν νμκ²μλ": total_search,
|
171 |
+
"λΈλ‘κ·Έλ¬Έμμ": blog_count
|
172 |
+
})
|
173 |
+
result_df = pd.DataFrame(result_data)
|
174 |
+
|
175 |
+
# κ²°κ³Ό μμ
νμΌ μμ± (ν€λ: Aμ΄λΆν° Fμ΄κΉμ§)
|
176 |
+
output = BytesIO()
|
177 |
+
try:
|
178 |
+
with pd.ExcelWriter(output, engine="openpyxl") as writer:
|
179 |
+
result_df.to_excel(writer, index=False, startrow=1, header=False)
|
180 |
+
worksheet = writer.sheets["Sheet1"]
|
181 |
+
worksheet.cell(row=1, column=1, value="ν€μλ")
|
182 |
+
worksheet.cell(row=1, column=2, value="λΉλμ")
|
183 |
+
worksheet.cell(row=1, column=3, value="PCμκ²μλ")
|
184 |
+
worksheet.cell(row=1, column=4, value="λͺ¨λ°μΌμκ²μλ")
|
185 |
+
worksheet.cell(row=1, column=5, value="ν νμκ²μλ")
|
186 |
+
worksheet.cell(row=1, column=6, value="λΈλ‘κ·Έλ¬Έμμ")
|
187 |
+
output.seek(0)
|
188 |
+
except Exception as e:
|
189 |
+
error_message = "μμ
νμΌμ μμ±νλ μ€ μ€λ₯κ° λ°μνμμ΅λλ€: " + str(e)
|
190 |
+
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
|
191 |
+
temp_error.write(error_message.encode("utf-8"))
|
192 |
+
temp_error.close()
|
193 |
+
error_df = pd.DataFrame({"μλ¬": [error_message]})
|
194 |
+
return temp_error.name, error_df
|
195 |
+
|
196 |
+
temp_excel = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode="wb")
|
197 |
+
temp_excel.write(output.getvalue())
|
198 |
+
temp_excel.close()
|
199 |
+
|
200 |
+
return temp_excel.name, result_df
|
201 |
+
|
202 |
+
iface = gr.Interface(
|
203 |
+
fn=process_excel,
|
204 |
+
inputs=gr.File(label="μμ
νμΌ μ
λ‘λ", type="binary"),
|
205 |
+
outputs=[
|
206 |
+
gr.File(label="κ²°κ³Ό μμ
νμΌ"),
|
207 |
+
gr.DataFrame(label="ν€μλ λΆμ ν")
|
208 |
+
],
|
209 |
+
title="μμ
μνλͺ
ν€μλ μΆμΆ λ° κ²μλ/λΈλ‘κ·Έ λ¬Έμμ μ‘°ν",
|
210 |
+
description=(
|
211 |
+
"μμ
νμΌμ D4μ
λΆν° Dμ΄μ μλ μνλͺ
λ°μ΄ν°λ₯Ό λΆμνμ¬, "
|
212 |
+
"νΉμλ¬Έμλ₯Ό μ κ±°ν ν 곡백 κΈ°μ€μΌλ‘ ν€μλλ₯Ό μΆμΆν©λλ€. "
|
213 |
+
"κ° ν€μλμ λν΄ λ€μ΄λ² APIλ₯Ό νμ©νμ¬ PC/λͺ¨λ°μΌ/ν ν μ κ²μλκ³Ό "
|
214 |
+
"λ€μ΄λ² λΈλ‘κ·Έ λ¬Έμμλ₯Ό μ‘°νν κ²°κ³Όλ₯Ό μμ
νμΌκ³Ό ν(λ°μ΄ν°νλ μ)λ‘ μΆλ ₯ν©λλ€."
|
215 |
)
|
216 |
+
)
|
217 |
+
|
218 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|