Kims12 commited on
Commit
c85a34f
Β·
verified Β·
1 Parent(s): c98c67f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -130
app.py CHANGED
@@ -1,15 +1,19 @@
 
 
 
 
 
 
 
1
  import time
2
  import hashlib
3
  import hmac
4
  import base64
5
  import requests
6
- import gradio as gr
7
  import urllib.request
8
  import urllib.parse
9
  import json
10
- import pandas as pd
11
  from concurrent.futures import ThreadPoolExecutor
12
- import os
13
  import tempfile
14
  from datetime import datetime
15
  from dotenv import load_dotenv # dotenv μΆ”κ°€
@@ -100,18 +104,10 @@ def get_blog_count(keyword):
100
  print(f"Error fetching blog count for keyword '{keyword}': {e}")
101
  return 0
102
 
103
- def get_keywords_data_chunk(chunk):
104
- api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
105
- return api.get_keywords_data(chunk)
106
-
107
- def get_blog_count_parallel(keyword):
108
- return (keyword, get_blog_count(keyword))
109
-
110
- def get_search_volumes(keyword):
111
  """
112
  단일 ν‚€μ›Œλ“œμ˜ μ›” κ²€μƒ‰λŸ‰μ„ κ°€μ Έμ˜€λŠ” ν•¨μˆ˜.
113
  """
114
- api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
115
  try:
116
  data = api.get_keywords_data([keyword])
117
  if 'keywordList' in data and len(data['keywordList']) > 0:
@@ -135,131 +131,93 @@ def get_search_volumes(keyword):
135
  monthly_mobile = 0
136
 
137
  total_searches = monthly_pc + monthly_mobile
138
- return (keyword, monthly_pc, monthly_mobile, total_searches)
 
139
  # μž…λ ₯ν•œ ν‚€μ›Œλ“œμ™€ μΌμΉ˜ν•˜λŠ” ν•­λͺ©μ΄ 없을 경우
140
- return (keyword, 0, 0, 0)
141
  else:
142
- return (keyword, 0, 0, 0)
143
  except Exception as e:
144
  print(f"Error fetching search volumes for keyword '{keyword}': {e}")
145
- return (keyword, 0, 0, 0)
146
-
147
- def get_monthly_search_volumes(keywords, include_related_keywords=True):
148
- all_data = []
149
- results = []
150
-
151
- if include_related_keywords:
152
- chunk_size = 10 # ν‚€μ›Œλ“œλ₯Ό 10κ°œμ”© λ‚˜λˆ„μ–΄ μš”μ²­
153
- # API 병렬 μš”μ²­
154
- with ThreadPoolExecutor(max_workers=5) as executor:
155
- futures = [executor.submit(get_keywords_data_chunk, keywords[i:i+chunk_size]) for i in range(0, len(keywords), chunk_size)]
156
- for future in futures:
157
- try:
158
- data = future.result()
159
- if 'keywordList' in data:
160
- all_data.extend(data['keywordList'])
161
- except Exception as e:
162
- print(f"Error fetching keywords data chunk: {e}")
163
-
164
- if not all_data:
165
- return [("Error", "데이터가 λ°˜ν™˜λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ API 응닡이 μœ νš¨ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.", "", "", "")]
166
-
167
- unique_keywords = set()
168
- for item in all_data:
169
- keyword = item['relKeyword']
170
- if keyword not in unique_keywords:
171
- unique_keywords.add(keyword)
172
- monthly_pc = item.get('monthlyPcQcCnt', 0)
173
- monthly_mobile = item.get('monthlyMobileQcCnt', 0)
174
-
175
- if isinstance(monthly_pc, str):
176
- monthly_pc = monthly_pc.replace(',', '').replace('< 10', '0')
177
- try:
178
- monthly_pc = int(monthly_pc)
179
- except ValueError:
180
- monthly_pc = 0
181
- if isinstance(monthly_mobile, str):
182
- monthly_mobile = monthly_mobile.replace(',', '').replace('< 10', '0')
183
- try:
184
- monthly_mobile = int(monthly_mobile)
185
- except ValueError:
186
- monthly_mobile = 0
187
-
188
- total_searches = monthly_pc + monthly_mobile
189
- results.append((keyword, monthly_pc, monthly_mobile, total_searches))
190
-
191
- if len(results) >= 100:
192
- break
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  else:
195
- # 연관검색어λ₯Ό ν¬ν•¨ν•˜μ§€ μ•ŠμœΌλ―€λ‘œ μž…λ ₯ ν‚€μ›Œλ“œλ§Œ 처리
196
- with ThreadPoolExecutor(max_workers=5) as executor:
197
- futures = [executor.submit(get_search_volumes, keyword) for keyword in keywords]
198
- for future in futures:
199
- try:
200
- result = future.result()
201
- results.append(result)
202
- except Exception as e:
203
- print(f"Error fetching search volumes for keyword '{keyword}': {e}")
204
- results.append((keyword, 0, 0, 0))
205
-
206
- if not results:
207
- return [("Error", "데이터가 λ°˜ν™˜λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ API 응닡이 μœ νš¨ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.", "", "", "")]
208
-
209
- # λΈ”λ‘œκ·Έ λ¬Έμ„œ 수 병렬 μš”μ²­
210
- with ThreadPoolExecutor(max_workers=5) as executor:
211
- if include_related_keywords:
212
- blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
213
- for i, future in enumerate(blog_futures):
214
- try:
215
- keyword, blog_count = future.result()
216
- results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], blog_count)
217
- except Exception as e:
218
- print(f"Error fetching blog count for keyword '{results[i][0]}': {e}")
219
- results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], "Error")
220
- else:
221
- blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
222
- temp_results = []
223
- for future in blog_futures:
224
- try:
225
- keyword, blog_count = future.result()
226
- temp_results.append((keyword, results[0][1], results[0][2], results[0][3], blog_count))
227
- except Exception as e:
228
- print(f"Error fetching blog count for keyword '{keyword}': {e}")
229
- temp_results.append((keyword, results[0][1], results[0][2], results[0][3], "Error"))
230
- results = temp_results
231
-
232
- return results
233
-
234
- def save_to_excel(results, keyword):
235
- df = pd.DataFrame(results, columns=["ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"])
236
- now = datetime.now().strftime('%Y-%m-%d')
237
- sanitized_keyword = keyword.replace(' ', '_')
238
- filename = f"{now}_{sanitized_keyword}_연관검색어.xlsx"
239
- file_path = os.path.join(tempfile.gettempdir(), filename)
240
- df.to_excel(file_path, index=False)
241
- return file_path
242
-
243
- def display_search_volumes(keywords, include_related):
244
- keyword_list = [keyword.strip() for keyword in keywords.split(',') if keyword.strip()]
245
- if not keyword_list:
246
- return [("Error", "μž…λ ₯된 ν‚€μ›Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.", "", "", "")], None
247
- results = get_monthly_search_volumes(keyword_list, include_related_keywords=include_related)
248
- file_path = save_to_excel(results, keywords)
249
- return results, file_path
250
 
 
251
  iface = gr.Interface(
252
- fn=display_search_volumes,
253
- inputs=[
254
- gr.Textbox(placeholder="ν‚€μ›Œλ“œλ₯Ό μž…λ ₯ν•˜μ„Έμš” (μ‰Όν‘œλ‘œ ꡬ뢄)", lines=2),
255
- gr.Checkbox(label="연관검색어 포���", value=True) # 연관검색어 ν† κΈ€ μΆ”κ°€
256
- ],
257
- outputs=[
258
- gr.Dataframe(headers=["ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]),
259
- gr.File(label="λ‹€μš΄λ‘œλ“œ μ—‘μ…€ 파일")
260
- ],
261
- title="넀이버 μ›”κ²€μƒ‰λŸ‰ 검색기",
262
- description="ν‚€μ›Œλ“œμ˜ μ›” κ²€μƒ‰λŸ‰κ³Ό λΈ”λ‘œκ·Έ λ¬Έμ„œ 수λ₯Ό 확인할 수 μžˆμŠ΅λ‹ˆλ‹€. 연관검색어λ₯Ό 포함할지 μ„ νƒν•˜μ„Έμš”.",
263
  )
264
 
265
- iface.launch(share=True) # share=Trueλ₯Ό μΆ”κ°€ν•˜μ—¬ 곡개 링크 생성
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ from collections import Counter
5
+ import os
6
+ from openpyxl import load_workbook
7
+ from openpyxl.drawing.image import Image
8
  import time
9
  import hashlib
10
  import hmac
11
  import base64
12
  import requests
 
13
  import urllib.request
14
  import urllib.parse
15
  import json
 
16
  from concurrent.futures import ThreadPoolExecutor
 
17
  import tempfile
18
  from datetime import datetime
19
  from dotenv import load_dotenv # dotenv μΆ”κ°€
 
104
  print(f"Error fetching blog count for keyword '{keyword}': {e}")
105
  return 0
106
 
107
+ def get_search_volumes(keyword, api):
 
 
 
 
 
 
 
108
  """
109
  단일 ν‚€μ›Œλ“œμ˜ μ›” κ²€μƒ‰λŸ‰μ„ κ°€μ Έμ˜€λŠ” ν•¨μˆ˜.
110
  """
 
111
  try:
112
  data = api.get_keywords_data([keyword])
113
  if 'keywordList' in data and len(data['keywordList']) > 0:
 
131
  monthly_mobile = 0
132
 
133
  total_searches = monthly_pc + monthly_mobile
134
+ blog_count = get_blog_count(keyword)
135
+ return (monthly_pc, monthly_mobile, total_searches, blog_count)
136
  # μž…λ ₯ν•œ ν‚€μ›Œλ“œμ™€ μΌμΉ˜ν•˜λŠ” ν•­λͺ©μ΄ 없을 경우
137
+ return (0, 0, 0, 0)
138
  else:
139
+ return (0, 0, 0, 0)
140
  except Exception as e:
141
  print(f"Error fetching search volumes for keyword '{keyword}': {e}")
142
+ return (0, 0, 0, 0)
143
+
144
+ def process_excel(file):
145
+ # μ—‘μ…€ 파일 읽기
146
+ df = pd.read_excel(file.name)
147
+
148
+ # Dμ—΄μ˜ 데이터 μΆ”μΆœ
149
+ product_names = df.iloc[:, 3].dropna() # D열은 0λΆ€ν„° μ‹œμž‘ν•˜λ―€λ‘œ indexλŠ” 3
150
+
151
+ # ν‚€μ›Œλ“œ μΆ”μΆœ 및 λΉˆλ„ 계산
152
+ all_keywords = []
153
+
154
+ for name in product_names:
155
+ # 특수문자 제거 및 곡백 κΈ°μ€€μœΌλ‘œ λΆ„ν• 
156
+ words = re.sub(r'[^\w\s]', '', name).split()
157
+ # 쀑볡 제거
158
+ unique_words = set(words)
159
+ all_keywords.extend(unique_words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ # λΉˆλ„ 계산
162
+ keyword_counts = Counter(all_keywords)
163
+
164
+ # κ²°κ³Όλ₯Ό λ°μ΄ν„°ν”„λ ˆμž„μœΌλ‘œ 정리
165
+ result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
166
+ result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
167
+
168
+ # 디렉토리 생성 확인 및 파일 μ €μž₯
169
+ output_dir = "output"
170
+ if not os.path.exists(output_dir):
171
+ os.makedirs(output_dir)
172
+
173
+ output_file = os.path.join(output_dir, "keyword_counts.xlsx")
174
+
175
+ # μ—‘μ…€ νŒŒμΌμ— 데이터λ₯Ό A4, B4 μ…€λΆ€ν„° μ“°κΈ°
176
+ with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
177
+ result_df.to_excel(writer, index=False, startrow=3) # startrow=3으둜 μ„€μ •ν•˜μ—¬ 4번째 ν–‰(A4, B4)λΆ€ν„° μ‹œμž‘
178
+
179
+ # 이미지λ₯Ό μ—‘μ…€ 파일의 A1 셀에 μ‚½μž…
180
+ wb = load_workbook(output_file)
181
+ ws = wb.active
182
+
183
+ # ssboost-logo.png νŒŒμΌμ„ A1 셀에 μ‚½μž…
184
+ if os.path.exists("ssboost-logo.png"):
185
+ img = Image("ssboost-logo.png")
186
+
187
+ # 이미지 크기 μ„€μ • (1.54cm 높이, 5.69cm λ„ˆλΉ„)
188
+ img.height = int(1.54 * 28.3465) # 1 cm = 28.3465 포인트
189
+ img.width = int(5.69 * 28.3465) # 1 cm = 28.3465 포인트
190
+
191
+ ws.add_image(img, "A1")
192
  else:
193
+ print("ssboost-logo.png 파일이 μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. 이미지λ₯Ό μ‚½μž…ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
194
+
195
+ # Naver APIλ₯Ό μ‚¬μš©ν•˜μ—¬ ν‚€μ›Œλ“œ 뢄석 μΆ”κ°€
196
+ api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
197
+
198
+ # ν‚€μ›Œλ“œ 뢄석 κ²°κ³Όλ₯Ό Cμ—΄λΆ€ν„° μΆ”κ°€
199
+ for idx, row in result_df.iterrows():
200
+ keyword = row['Keyword']
201
+ monthly_pc, monthly_mobile, total_searches, blog_count = get_search_volumes(keyword, api)
202
+ excel_row = idx + 5 # A5λΆ€ν„° μ‹œμž‘
203
+ ws.cell(row=excel_row, column=3, value=monthly_pc) # Cμ—΄: PCμ›”κ²€μƒ‰λŸ‰
204
+ ws.cell(row=excel_row, column=4, value=monthly_mobile) # Dμ—΄: λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰
205
+ ws.cell(row=excel_row, column=5, value=total_searches) # Eμ—΄: ν† νƒˆμ›”κ²€μƒ‰λŸ‰
206
+ ws.cell(row=excel_row, column=6, value=blog_count) # Fμ—΄: λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜
207
+
208
+ # μ—‘μ…€ 파일 μ €μž₯
209
+ wb.save(output_file)
210
+
211
+ return output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜
214
  iface = gr.Interface(
215
+ fn=process_excel,
216
+ inputs=gr.File(file_types=[".xlsx"]), # μ—‘μ…€ 파일만 μ—…λ‘œλ“œν•  수 있게 μ„€μ •
217
+ outputs="file",
218
+ title="Excel Keyword Extractor with Naver Analysis",
219
+ description="μ—‘μ…€ 파일의 Dμ—΄μ—μ„œ ν‚€μ›Œλ“œλ₯Ό μΆ”μΆœν•˜κ³  λΉˆλ„λ₯Ό κ³„μ‚°ν•œ ν›„, 각 ν‚€μ›Œλ“œμ˜ κ²€μƒ‰λŸ‰ 및 λΈ”λ‘œκ·Έ λ¬Έμ„œ 수λ₯Ό λΆ„μ„ν•˜μ—¬ μƒˆλ‘œμš΄ μ—‘μ…€ 파일둜 좜λ ₯ν•©λ‹ˆλ‹€."
 
 
 
 
 
 
220
  )
221
 
222
+ if __name__ == "__main__":
223
+ iface.launch()