Kims12 commited on
Commit
c85a34f
ยท
verified ยท
1 Parent(s): c98c67f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -130
app.py CHANGED
@@ -1,15 +1,19 @@
 
 
 
 
 
 
 
1
  import time
2
  import hashlib
3
  import hmac
4
  import base64
5
  import requests
6
- import gradio as gr
7
  import urllib.request
8
  import urllib.parse
9
  import json
10
- import pandas as pd
11
  from concurrent.futures import ThreadPoolExecutor
12
- import os
13
  import tempfile
14
  from datetime import datetime
15
  from dotenv import load_dotenv # dotenv ์ถ”๊ฐ€
@@ -100,18 +104,10 @@ def get_blog_count(keyword):
100
  print(f"Error fetching blog count for keyword '{keyword}': {e}")
101
  return 0
102
 
103
- def get_keywords_data_chunk(chunk):
104
- api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
105
- return api.get_keywords_data(chunk)
106
-
107
- def get_blog_count_parallel(keyword):
108
- return (keyword, get_blog_count(keyword))
109
-
110
- def get_search_volumes(keyword):
111
  """
112
  ๋‹จ์ผ ํ‚ค์›Œ๋“œ์˜ ์›” ๊ฒ€์ƒ‰๋Ÿ‰์„ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜.
113
  """
114
- api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
115
  try:
116
  data = api.get_keywords_data([keyword])
117
  if 'keywordList' in data and len(data['keywordList']) > 0:
@@ -135,131 +131,93 @@ def get_search_volumes(keyword):
135
  monthly_mobile = 0
136
 
137
  total_searches = monthly_pc + monthly_mobile
138
- return (keyword, monthly_pc, monthly_mobile, total_searches)
 
139
  # ์ž…๋ ฅํ•œ ํ‚ค์›Œ๋“œ์™€ ์ผ์น˜ํ•˜๋Š” ํ•ญ๋ชฉ์ด ์—†์„ ๊ฒฝ์šฐ
140
- return (keyword, 0, 0, 0)
141
  else:
142
- return (keyword, 0, 0, 0)
143
  except Exception as e:
144
  print(f"Error fetching search volumes for keyword '{keyword}': {e}")
145
- return (keyword, 0, 0, 0)
146
-
147
- def get_monthly_search_volumes(keywords, include_related_keywords=True):
148
- all_data = []
149
- results = []
150
-
151
- if include_related_keywords:
152
- chunk_size = 10 # ํ‚ค์›Œ๋“œ๋ฅผ 10๊ฐœ์”ฉ ๋‚˜๋ˆ„์–ด ์š”์ฒญ
153
- # API ๋ณ‘๋ ฌ ์š”์ฒญ
154
- with ThreadPoolExecutor(max_workers=5) as executor:
155
- futures = [executor.submit(get_keywords_data_chunk, keywords[i:i+chunk_size]) for i in range(0, len(keywords), chunk_size)]
156
- for future in futures:
157
- try:
158
- data = future.result()
159
- if 'keywordList' in data:
160
- all_data.extend(data['keywordList'])
161
- except Exception as e:
162
- print(f"Error fetching keywords data chunk: {e}")
163
-
164
- if not all_data:
165
- return [("Error", "๋ฐ์ดํ„ฐ๊ฐ€ ๋ฐ˜ํ™˜๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ API ์‘๋‹ต์ด ์œ ํšจํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.", "", "", "")]
166
-
167
- unique_keywords = set()
168
- for item in all_data:
169
- keyword = item['relKeyword']
170
- if keyword not in unique_keywords:
171
- unique_keywords.add(keyword)
172
- monthly_pc = item.get('monthlyPcQcCnt', 0)
173
- monthly_mobile = item.get('monthlyMobileQcCnt', 0)
174
-
175
- if isinstance(monthly_pc, str):
176
- monthly_pc = monthly_pc.replace(',', '').replace('< 10', '0')
177
- try:
178
- monthly_pc = int(monthly_pc)
179
- except ValueError:
180
- monthly_pc = 0
181
- if isinstance(monthly_mobile, str):
182
- monthly_mobile = monthly_mobile.replace(',', '').replace('< 10', '0')
183
- try:
184
- monthly_mobile = int(monthly_mobile)
185
- except ValueError:
186
- monthly_mobile = 0
187
-
188
- total_searches = monthly_pc + monthly_mobile
189
- results.append((keyword, monthly_pc, monthly_mobile, total_searches))
190
-
191
- if len(results) >= 100:
192
- break
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  else:
195
- # ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด๋ฅผ ํฌํ•จํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ๋งŒ ์ฒ˜๋ฆฌ
196
- with ThreadPoolExecutor(max_workers=5) as executor:
197
- futures = [executor.submit(get_search_volumes, keyword) for keyword in keywords]
198
- for future in futures:
199
- try:
200
- result = future.result()
201
- results.append(result)
202
- except Exception as e:
203
- print(f"Error fetching search volumes for keyword '{keyword}': {e}")
204
- results.append((keyword, 0, 0, 0))
205
-
206
- if not results:
207
- return [("Error", "๋ฐ์ดํ„ฐ๊ฐ€ ๋ฐ˜ํ™˜๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ API ์‘๋‹ต์ด ์œ ํšจํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.", "", "", "")]
208
-
209
- # ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ ์ˆ˜ ๋ณ‘๋ ฌ ์š”์ฒญ
210
- with ThreadPoolExecutor(max_workers=5) as executor:
211
- if include_related_keywords:
212
- blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
213
- for i, future in enumerate(blog_futures):
214
- try:
215
- keyword, blog_count = future.result()
216
- results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], blog_count)
217
- except Exception as e:
218
- print(f"Error fetching blog count for keyword '{results[i][0]}': {e}")
219
- results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], "Error")
220
- else:
221
- blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
222
- temp_results = []
223
- for future in blog_futures:
224
- try:
225
- keyword, blog_count = future.result()
226
- temp_results.append((keyword, results[0][1], results[0][2], results[0][3], blog_count))
227
- except Exception as e:
228
- print(f"Error fetching blog count for keyword '{keyword}': {e}")
229
- temp_results.append((keyword, results[0][1], results[0][2], results[0][3], "Error"))
230
- results = temp_results
231
-
232
- return results
233
-
234
- def save_to_excel(results, keyword):
235
- df = pd.DataFrame(results, columns=["ํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"])
236
- now = datetime.now().strftime('%Y-%m-%d')
237
- sanitized_keyword = keyword.replace(' ', '_')
238
- filename = f"{now}_{sanitized_keyword}_์—ฐ๊ด€๊ฒ€์ƒ‰์–ด.xlsx"
239
- file_path = os.path.join(tempfile.gettempdir(), filename)
240
- df.to_excel(file_path, index=False)
241
- return file_path
242
-
243
- def display_search_volumes(keywords, include_related):
244
- keyword_list = [keyword.strip() for keyword in keywords.split(',') if keyword.strip()]
245
- if not keyword_list:
246
- return [("Error", "์ž…๋ ฅ๋œ ํ‚ค์›Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", "", "")], None
247
- results = get_monthly_search_volumes(keyword_list, include_related_keywords=include_related)
248
- file_path = save_to_excel(results, keywords)
249
- return results, file_path
250
 
 
251
  iface = gr.Interface(
252
- fn=display_search_volumes,
253
- inputs=[
254
- gr.Textbox(placeholder="ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š” (์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„)", lines=2),
255
- gr.Checkbox(label="์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ํฌ๏ฟฝ๏ฟฝ๏ฟฝ", value=True) # ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ํ† ๊ธ€ ์ถ”๊ฐ€
256
- ],
257
- outputs=[
258
- gr.Dataframe(headers=["ํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"]),
259
- gr.File(label="๋‹ค์šด๋กœ๋“œ ์—‘์…€ ํŒŒ์ผ")
260
- ],
261
- title="๋„ค์ด๋ฒ„ ์›”๊ฒ€์ƒ‰๋Ÿ‰ ๊ฒ€์ƒ‰๊ธฐ",
262
- description="ํ‚ค์›Œ๋“œ์˜ ์›” ๊ฒ€์ƒ‰๋Ÿ‰๊ณผ ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ ์ˆ˜๋ฅผ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด๋ฅผ ํฌํ•จํ• ์ง€ ์„ ํƒํ•˜์„ธ์š”.",
263
  )
264
 
265
- iface.launch(share=True) # share=True๋ฅผ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ณต๊ฐœ ๋งํฌ ์ƒ์„ฑ
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ from collections import Counter
5
+ import os
6
+ from openpyxl import load_workbook
7
+ from openpyxl.drawing.image import Image
8
  import time
9
  import hashlib
10
  import hmac
11
  import base64
12
  import requests
 
13
  import urllib.request
14
  import urllib.parse
15
  import json
 
16
  from concurrent.futures import ThreadPoolExecutor
 
17
  import tempfile
18
  from datetime import datetime
19
  from dotenv import load_dotenv # dotenv ์ถ”๊ฐ€
 
104
  print(f"Error fetching blog count for keyword '{keyword}': {e}")
105
  return 0
106
 
107
+ def get_search_volumes(keyword, api):
 
 
 
 
 
 
 
108
  """
109
  ๋‹จ์ผ ํ‚ค์›Œ๋“œ์˜ ์›” ๊ฒ€์ƒ‰๋Ÿ‰์„ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜.
110
  """
 
111
  try:
112
  data = api.get_keywords_data([keyword])
113
  if 'keywordList' in data and len(data['keywordList']) > 0:
 
131
  monthly_mobile = 0
132
 
133
  total_searches = monthly_pc + monthly_mobile
134
+ blog_count = get_blog_count(keyword)
135
+ return (monthly_pc, monthly_mobile, total_searches, blog_count)
136
  # ์ž…๋ ฅํ•œ ํ‚ค์›Œ๋“œ์™€ ์ผ์น˜ํ•˜๋Š” ํ•ญ๋ชฉ์ด ์—†์„ ๊ฒฝ์šฐ
137
+ return (0, 0, 0, 0)
138
  else:
139
+ return (0, 0, 0, 0)
140
  except Exception as e:
141
  print(f"Error fetching search volumes for keyword '{keyword}': {e}")
142
+ return (0, 0, 0, 0)
143
+
144
+ def process_excel(file):
145
+ # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
146
+ df = pd.read_excel(file.name)
147
+
148
+ # D์—ด์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
149
+ product_names = df.iloc[:, 3].dropna() # D์—ด์€ 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ index๋Š” 3
150
+
151
+ # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ๋นˆ๋„ ๊ณ„์‚ฐ
152
+ all_keywords = []
153
+
154
+ for name in product_names:
155
+ # ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค€์œผ๋กœ ๋ถ„ํ• 
156
+ words = re.sub(r'[^\w\s]', '', name).split()
157
+ # ์ค‘๋ณต ์ œ๊ฑฐ
158
+ unique_words = set(words)
159
+ all_keywords.extend(unique_words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ # ๋นˆ๋„ ๊ณ„์‚ฐ
162
+ keyword_counts = Counter(all_keywords)
163
+
164
+ # ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ์ •๋ฆฌ
165
+ result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
166
+ result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
167
+
168
+ # ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ํ™•์ธ ๋ฐ ํŒŒ์ผ ์ €์žฅ
169
+ output_dir = "output"
170
+ if not os.path.exists(output_dir):
171
+ os.makedirs(output_dir)
172
+
173
+ output_file = os.path.join(output_dir, "keyword_counts.xlsx")
174
+
175
+ # ์—‘์…€ ํŒŒ์ผ์— ๋ฐ์ดํ„ฐ๋ฅผ A4, B4 ์…€๋ถ€ํ„ฐ ์“ฐ๊ธฐ
176
+ with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
177
+ result_df.to_excel(writer, index=False, startrow=3) # startrow=3์œผ๋กœ ์„ค์ •ํ•˜์—ฌ 4๋ฒˆ์งธ ํ–‰(A4, B4)๋ถ€ํ„ฐ ์‹œ์ž‘
178
+
179
+ # ์ด๋ฏธ์ง€๋ฅผ ์—‘์…€ ํŒŒ์ผ์˜ A1 ์…€์— ์‚ฝ์ž…
180
+ wb = load_workbook(output_file)
181
+ ws = wb.active
182
+
183
+ # ssboost-logo.png ํŒŒ์ผ์„ A1 ์…€์— ์‚ฝ์ž…
184
+ if os.path.exists("ssboost-logo.png"):
185
+ img = Image("ssboost-logo.png")
186
+
187
+ # ์ด๋ฏธ์ง€ ํฌ๊ธฐ ์„ค์ • (1.54cm ๋†’์ด, 5.69cm ๋„ˆ๋น„)
188
+ img.height = int(1.54 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํŠธ
189
+ img.width = int(5.69 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํŠธ
190
+
191
+ ws.add_image(img, "A1")
192
  else:
193
+ print("ssboost-logo.png ํŒŒ์ผ์ด ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์ด๋ฏธ์ง€๋ฅผ ์‚ฝ์ž…ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
194
+
195
+ # Naver API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ‚ค์›Œ๋“œ ๋ถ„์„ ์ถ”๊ฐ€
196
+ api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
197
+
198
+ # ํ‚ค์›Œ๋“œ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ C์—ด๋ถ€ํ„ฐ ์ถ”๊ฐ€
199
+ for idx, row in result_df.iterrows():
200
+ keyword = row['Keyword']
201
+ monthly_pc, monthly_mobile, total_searches, blog_count = get_search_volumes(keyword, api)
202
+ excel_row = idx + 5 # A5๋ถ€ํ„ฐ ์‹œ์ž‘
203
+ ws.cell(row=excel_row, column=3, value=monthly_pc) # C์—ด: PC์›”๊ฒ€์ƒ‰๋Ÿ‰
204
+ ws.cell(row=excel_row, column=4, value=monthly_mobile) # D์—ด: ๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰
205
+ ws.cell(row=excel_row, column=5, value=total_searches) # E์—ด: ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰
206
+ ws.cell(row=excel_row, column=6, value=blog_count) # F์—ด: ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜
207
+
208
+ # ์—‘์…€ ํŒŒ์ผ ์ €์žฅ
209
+ wb.save(output_file)
210
+
211
+ return output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
214
  iface = gr.Interface(
215
+ fn=process_excel,
216
+ inputs=gr.File(file_types=[".xlsx"]), # ์—‘์…€ ํŒŒ์ผ๋งŒ ์—…๋กœ๋“œํ•  ์ˆ˜ ์žˆ๊ฒŒ ์„ค์ •
217
+ outputs="file",
218
+ title="Excel Keyword Extractor with Naver Analysis",
219
+ description="์—‘์…€ ํŒŒ์ผ์˜ D์—ด์—์„œ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜๊ณ  ๋นˆ๋„๋ฅผ ๊ณ„์‚ฐํ•œ ํ›„, ๊ฐ ํ‚ค์›Œ๋“œ์˜ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ ์ˆ˜๋ฅผ ๋ถ„์„ํ•˜์—ฌ ์ƒˆ๋กœ์šด ์—‘์…€ ํŒŒ์ผ๋กœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
 
 
 
 
 
 
220
  )
221
 
222
+ if __name__ == "__main__":
223
+ iface.launch()