Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -143,6 +143,7 @@ def get_header(method, uri, api_key, secret_key, customer_id):
|
|
143 |
"X-Signature": signature
|
144 |
}
|
145 |
|
|
|
146 |
def fetch_related_keywords(keyword):
|
147 |
debug_log(f"fetch_related_keywords νΈμΆ, ν€μλ: {keyword}")
|
148 |
API_KEY = os.environ["NAVER_API_KEY"]
|
@@ -179,6 +180,80 @@ def fetch_related_keywords(keyword):
|
|
179 |
debug_log("fetch_related_keywords μλ£")
|
180 |
return result_df
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
def fetch_blog_count(keyword):
|
183 |
debug_log(f"fetch_blog_count νΈμΆ, ν€μλ: {keyword}")
|
184 |
client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
|
@@ -205,36 +280,6 @@ def create_excel_file(df):
|
|
205 |
debug_log(f"Excel νμΌ μμ±λ¨: {excel_path}")
|
206 |
return excel_path
|
207 |
|
208 |
-
def process_keyword(keywords: str, include_related: bool):
|
209 |
-
debug_log(f"process_keyword νΈμΆ, ν€μλλ€: {keywords}, μ°κ΄κ²μμ΄ ν¬ν¨: {include_related}")
|
210 |
-
input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
|
211 |
-
result_dfs = []
|
212 |
-
|
213 |
-
for idx, kw in enumerate(input_keywords):
|
214 |
-
df_kw = fetch_related_keywords(kw)
|
215 |
-
if df_kw.empty:
|
216 |
-
continue
|
217 |
-
row_kw = df_kw[df_kw["μ 보ν€μλ"] == kw]
|
218 |
-
if not row_kw.empty:
|
219 |
-
result_dfs.append(row_kw)
|
220 |
-
else:
|
221 |
-
result_dfs.append(df_kw.head(1))
|
222 |
-
if include_related and idx == 0:
|
223 |
-
df_related = df_kw[df_kw["μ 보ν€μλ"] != kw]
|
224 |
-
if not df_related.empty:
|
225 |
-
result_dfs.append(df_related)
|
226 |
-
|
227 |
-
if result_dfs:
|
228 |
-
result_df = pd.concat(result_dfs, ignore_index=True)
|
229 |
-
result_df.drop_duplicates(subset=["μ 보ν€μλ"], inplace=True)
|
230 |
-
else:
|
231 |
-
result_df = pd.DataFrame(columns=["μ 보ν€μλ", "PCμκ²μλ", "λͺ¨λ°μΌμκ²μλ", "ν νμκ²μλ"])
|
232 |
-
|
233 |
-
result_df["λΈλ‘κ·Έλ¬Έμμ"] = result_df["μ 보ν€μλ"].apply(fetch_blog_count)
|
234 |
-
result_df.sort_values(by="ν νμκ²μλ", ascending=False, inplace=True)
|
235 |
-
debug_log("process_keyword μλ£")
|
236 |
-
return result_df, create_excel_file(result_df)
|
237 |
-
|
238 |
# [μ°Έμ‘°μ½λ-1] λ° [μ°Έμ‘°μ½λ-2]λ₯Ό νμ©ν ννμ λΆμ λ° κ²μλ, λΈλ‘κ·Έλ¬Έμμ μΆκ° (λΉλμ1 μ κ±° μ΅μ
ν¬ν¨)
|
239 |
def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
|
240 |
debug_log("morphological_analysis_and_enrich ν¨μ μμ")
|
|
|
143 |
"X-Signature": signature
|
144 |
}
|
145 |
|
146 |
+
# κΈ°μ‘΄μ λ¨μΌ ν€μλμ© ν¨μ (μ°Έκ³ μ©)
|
147 |
def fetch_related_keywords(keyword):
|
148 |
debug_log(f"fetch_related_keywords νΈμΆ, ν€μλ: {keyword}")
|
149 |
API_KEY = os.environ["NAVER_API_KEY"]
|
|
|
180 |
debug_log("fetch_related_keywords μλ£")
|
181 |
return result_df
|
182 |
|
183 |
+
# μ κ· μΆκ°: ν€μλ 10κ°μ© κ·Έλ£ΉμΌλ‘ λ¬Άμ΄ ν λ²μ κ²μλμ μ‘°ννλ ν¨μ
|
184 |
+
def fetch_related_keywords_batch(keywords: list):
|
185 |
+
debug_log(f"fetch_related_keywords_batch νΈμΆ, ν€μλ κ·Έλ£Ή: {keywords}")
|
186 |
+
API_KEY = os.environ["NAVER_API_KEY"]
|
187 |
+
SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
|
188 |
+
CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
|
189 |
+
|
190 |
+
BASE_URL = "https://api.naver.com"
|
191 |
+
uri = "/keywordstool"
|
192 |
+
method = "GET"
|
193 |
+
headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
|
194 |
+
params = {
|
195 |
+
"hintKeywords": keywords, # κ·Έλ£ΉμΌλ‘ μ λ¬ (μ΅λ 10κ°)
|
196 |
+
"showDetail": "1"
|
197 |
+
}
|
198 |
+
response = requests.get(BASE_URL + uri, params=params, headers=headers)
|
199 |
+
data = response.json()
|
200 |
+
if "keywordList" not in data:
|
201 |
+
return pd.DataFrame()
|
202 |
+
df = pd.DataFrame(data["keywordList"])
|
203 |
+
if len(df) > 100:
|
204 |
+
df = df.head(100)
|
205 |
+
|
206 |
+
def parse_count(x):
|
207 |
+
try:
|
208 |
+
return int(str(x).replace(",", ""))
|
209 |
+
except:
|
210 |
+
return 0
|
211 |
+
|
212 |
+
df["PCμκ²μλ"] = df["monthlyPcQcCnt"].apply(parse_count)
|
213 |
+
df["λͺ¨λ°μΌμκ²μλ"] = df["monthlyMobileQcCnt"].apply(parse_count)
|
214 |
+
df["ν νμκ²μλ"] = df["PCμκ²μλ"] + df["λͺ¨λ°μΌμκ²μλ"]
|
215 |
+
df.rename(columns={"relKeyword": "μ 보ν€μλ"}, inplace=True)
|
216 |
+
result_df = df[["μ 보ν€μλ", "PCμκ²μλ", "λͺ¨λ°μΌμκ²μλ", "ν νμκ²μλ"]]
|
217 |
+
debug_log("fetch_related_keywords_batch μλ£")
|
218 |
+
return result_df
|
219 |
+
|
220 |
+
# κΈ°μ‘΄ process_keyword ν¨μλ₯Ό κ·Έλ£Ήλ³λ‘ 10κ°μ© λ¬Άμ΄ μ²λ¦¬νλλ‘ μμ
|
221 |
+
def process_keyword(keywords: str, include_related: bool):
|
222 |
+
debug_log(f"process_keyword νΈμΆ, ν€μλλ€: {keywords}, μ°κ΄κ²μμ΄ ν¬ν¨: {include_related}")
|
223 |
+
input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
|
224 |
+
result_dfs = []
|
225 |
+
|
226 |
+
# μ
λ ₯λ ν€μλλ₯Ό 10κ°μ© κ·Έλ£ΉμΌλ‘ λ¬ΆκΈ°
|
227 |
+
for i in range(0, len(input_keywords), 10):
|
228 |
+
batch = input_keywords[i:i+10]
|
229 |
+
df_batch = fetch_related_keywords_batch(batch)
|
230 |
+
if df_batch.empty:
|
231 |
+
continue
|
232 |
+
# κ° κ·Έλ£Ή λ΄μμ μ
λ ₯ ν€μλμ μΌμΉνλ ν μΆμΆ
|
233 |
+
for kw in batch:
|
234 |
+
row_kw = df_batch[df_batch["μ 보ν€μλ"] == kw]
|
235 |
+
if not row_kw.empty:
|
236 |
+
result_dfs.append(row_kw)
|
237 |
+
else:
|
238 |
+
result_dfs.append(df_batch.head(1))
|
239 |
+
# 첫 κ·Έλ£Ήμ κ²½μ° μ°κ΄κ²μμ΄ μ΅μ
μ μ© (첫 ν€μλ μ μΈ)
|
240 |
+
if include_related and i == 0:
|
241 |
+
first_keyword = batch[0]
|
242 |
+
df_related = df_batch[df_batch["μ 보ν€μλ"] != first_keyword]
|
243 |
+
if not df_related.empty:
|
244 |
+
result_dfs.append(df_related)
|
245 |
+
|
246 |
+
if result_dfs:
|
247 |
+
result_df = pd.concat(result_dfs, ignore_index=True)
|
248 |
+
result_df.drop_duplicates(subset=["μ 보ν€μλ"], inplace=True)
|
249 |
+
else:
|
250 |
+
result_df = pd.DataFrame(columns=["μ 보ν€μλ", "PCμκ²μλ", "λͺ¨λ°μΌμκ²μλ", "ν νμκ²μλ"])
|
251 |
+
|
252 |
+
result_df["λΈλ‘κ·Έλ¬Έμμ"] = result_df["μ 보ν€μλ"].apply(fetch_blog_count)
|
253 |
+
result_df.sort_values(by="ν νμκ²μλ", ascending=False, inplace=True)
|
254 |
+
debug_log("process_keyword μλ£")
|
255 |
+
return result_df, create_excel_file(result_df)
|
256 |
+
|
257 |
def fetch_blog_count(keyword):
|
258 |
debug_log(f"fetch_blog_count νΈμΆ, ν€μλ: {keyword}")
|
259 |
client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
|
|
|
280 |
debug_log(f"Excel νμΌ μμ±λ¨: {excel_path}")
|
281 |
return excel_path
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
# [μ°Έμ‘°μ½λ-1] λ° [μ°Έμ‘°μ½λ-2]λ₯Ό νμ©ν ννμ λΆμ λ° κ²μλ, λΈλ‘κ·Έλ¬Έμμ μΆκ° (λΉλμ1 μ κ±° μ΅μ
ν¬ν¨)
|
284 |
def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
|
285 |
debug_log("morphological_analysis_and_enrich ν¨μ μμ")
|