Kims12 commited on
Commit
c242fd9
Β·
verified Β·
1 Parent(s): 110447f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -30
app.py CHANGED
@@ -143,6 +143,7 @@ def get_header(method, uri, api_key, secret_key, customer_id):
143
  "X-Signature": signature
144
  }
145
 
 
146
  def fetch_related_keywords(keyword):
147
  debug_log(f"fetch_related_keywords 호좜, ν‚€μ›Œλ“œ: {keyword}")
148
  API_KEY = os.environ["NAVER_API_KEY"]
@@ -179,6 +180,80 @@ def fetch_related_keywords(keyword):
179
  debug_log("fetch_related_keywords μ™„λ£Œ")
180
  return result_df
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def fetch_blog_count(keyword):
183
  debug_log(f"fetch_blog_count 호좜, ν‚€μ›Œλ“œ: {keyword}")
184
  client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
@@ -205,36 +280,6 @@ def create_excel_file(df):
205
  debug_log(f"Excel 파일 생성됨: {excel_path}")
206
  return excel_path
207
 
208
- def process_keyword(keywords: str, include_related: bool):
209
- debug_log(f"process_keyword 호좜, ν‚€μ›Œλ“œλ“€: {keywords}, 연관검색어 포함: {include_related}")
210
- input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
211
- result_dfs = []
212
-
213
- for idx, kw in enumerate(input_keywords):
214
- df_kw = fetch_related_keywords(kw)
215
- if df_kw.empty:
216
- continue
217
- row_kw = df_kw[df_kw["μ •λ³΄ν‚€μ›Œλ“œ"] == kw]
218
- if not row_kw.empty:
219
- result_dfs.append(row_kw)
220
- else:
221
- result_dfs.append(df_kw.head(1))
222
- if include_related and idx == 0:
223
- df_related = df_kw[df_kw["μ •λ³΄ν‚€μ›Œλ“œ"] != kw]
224
- if not df_related.empty:
225
- result_dfs.append(df_related)
226
-
227
- if result_dfs:
228
- result_df = pd.concat(result_dfs, ignore_index=True)
229
- result_df.drop_duplicates(subset=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True)
230
- else:
231
- result_df = pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"])
232
-
233
- result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].apply(fetch_blog_count)
234
- result_df.sort_values(by="ν† νƒˆμ›”κ²€μƒ‰λŸ‰", ascending=False, inplace=True)
235
- debug_log("process_keyword μ™„λ£Œ")
236
- return result_df, create_excel_file(result_df)
237
-
238
  # [μ°Έμ‘°μ½”λ“œ-1] 및 [μ°Έμ‘°μ½”λ“œ-2]λ₯Ό ν™œμš©ν•œ ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ μΆ”κ°€ (λΉˆλ„μˆ˜1 제거 μ˜΅μ…˜ 포함)
239
  def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
240
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ‹œμž‘")
 
143
  "X-Signature": signature
144
  }
145
 
146
+ # 기쑴의 단일 ν‚€μ›Œλ“œμš© ν•¨μˆ˜ (참고용)
147
  def fetch_related_keywords(keyword):
148
  debug_log(f"fetch_related_keywords 호좜, ν‚€μ›Œλ“œ: {keyword}")
149
  API_KEY = os.environ["NAVER_API_KEY"]
 
180
  debug_log("fetch_related_keywords μ™„λ£Œ")
181
  return result_df
182
 
183
+ # μ‹ κ·œ μΆ”κ°€: ν‚€μ›Œλ“œ 10κ°œμ”© 그룹으둜 λ¬Άμ–΄ ν•œ λ²ˆμ— κ²€μƒ‰λŸ‰μ„ μ‘°νšŒν•˜λŠ” ν•¨μˆ˜
184
+ def fetch_related_keywords_batch(keywords: list):
185
+ debug_log(f"fetch_related_keywords_batch 호좜, ν‚€μ›Œλ“œ κ·Έλ£Ή: {keywords}")
186
+ API_KEY = os.environ["NAVER_API_KEY"]
187
+ SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
188
+ CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
189
+
190
+ BASE_URL = "https://api.naver.com"
191
+ uri = "/keywordstool"
192
+ method = "GET"
193
+ headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
194
+ params = {
195
+ "hintKeywords": keywords, # 그룹으둜 전달 (μ΅œλŒ€ 10개)
196
+ "showDetail": "1"
197
+ }
198
+ response = requests.get(BASE_URL + uri, params=params, headers=headers)
199
+ data = response.json()
200
+ if "keywordList" not in data:
201
+ return pd.DataFrame()
202
+ df = pd.DataFrame(data["keywordList"])
203
+ if len(df) > 100:
204
+ df = df.head(100)
205
+
206
+ def parse_count(x):
207
+ try:
208
+ return int(str(x).replace(",", ""))
209
+ except:
210
+ return 0
211
+
212
+ df["PCμ›”κ²€μƒ‰λŸ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
213
+ df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
214
+ df["ν† νƒˆμ›”κ²€μƒ‰λŸ‰"] = df["PCμ›”κ²€μƒ‰λŸ‰"] + df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"]
215
+ df.rename(columns={"relKeyword": "μ •λ³΄ν‚€μ›Œλ“œ"}, inplace=True)
216
+ result_df = df[["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"]]
217
+ debug_log("fetch_related_keywords_batch μ™„λ£Œ")
218
+ return result_df
219
+
220
+ # κΈ°μ‘΄ process_keyword ν•¨μˆ˜λ₯Ό κ·Έλ£Ήλ³„λ‘œ 10κ°œμ”© λ¬Άμ–΄ μ²˜λ¦¬ν•˜λ„λ‘ μˆ˜μ •
221
+ def process_keyword(keywords: str, include_related: bool):
222
+ debug_log(f"process_keyword 호좜, ν‚€μ›Œλ“œλ“€: {keywords}, 연관검색어 포함: {include_related}")
223
+ input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
224
+ result_dfs = []
225
+
226
+ # μž…λ ₯된 ν‚€μ›Œλ“œλ₯Ό 10κ°œμ”© 그룹으둜 λ¬ΆκΈ°
227
+ for i in range(0, len(input_keywords), 10):
228
+ batch = input_keywords[i:i+10]
229
+ df_batch = fetch_related_keywords_batch(batch)
230
+ if df_batch.empty:
231
+ continue
232
+ # 각 κ·Έλ£Ή λ‚΄μ—μ„œ μž…λ ₯ ν‚€μ›Œλ“œμ™€ μΌμΉ˜ν•˜λŠ” ν–‰ μΆ”μΆœ
233
+ for kw in batch:
234
+ row_kw = df_batch[df_batch["μ •λ³΄ν‚€μ›Œλ“œ"] == kw]
235
+ if not row_kw.empty:
236
+ result_dfs.append(row_kw)
237
+ else:
238
+ result_dfs.append(df_batch.head(1))
239
+ # 첫 그룹의 경우 연관검색어 μ˜΅μ…˜ 적용 (첫 ν‚€μ›Œλ“œ μ œμ™Έ)
240
+ if include_related and i == 0:
241
+ first_keyword = batch[0]
242
+ df_related = df_batch[df_batch["μ •λ³΄ν‚€μ›Œλ“œ"] != first_keyword]
243
+ if not df_related.empty:
244
+ result_dfs.append(df_related)
245
+
246
+ if result_dfs:
247
+ result_df = pd.concat(result_dfs, ignore_index=True)
248
+ result_df.drop_duplicates(subset=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True)
249
+ else:
250
+ result_df = pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"])
251
+
252
+ result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].apply(fetch_blog_count)
253
+ result_df.sort_values(by="ν† νƒˆμ›”κ²€μƒ‰λŸ‰", ascending=False, inplace=True)
254
+ debug_log("process_keyword μ™„λ£Œ")
255
+ return result_df, create_excel_file(result_df)
256
+
257
  def fetch_blog_count(keyword):
258
  debug_log(f"fetch_blog_count 호좜, ν‚€μ›Œλ“œ: {keyword}")
259
  client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
 
280
  debug_log(f"Excel 파일 생성됨: {excel_path}")
281
  return excel_path
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # [μ°Έμ‘°μ½”λ“œ-1] 및 [μ°Έμ‘°μ½”λ“œ-2]λ₯Ό ν™œμš©ν•œ ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ μΆ”κ°€ (λΉˆλ„μˆ˜1 제거 μ˜΅μ…˜ 포함)
284
  def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
285
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ‹œμž‘")