Kims12 commited on
Commit
863de1b
ยท
verified ยท
1 Parent(s): 5a01f24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -127
app.py CHANGED
@@ -1,19 +1,15 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import re
4
- from collections import Counter
5
- import os
6
- from openpyxl import load_workbook
7
- from openpyxl.drawing.image import Image
8
  import time
9
  import hashlib
10
  import hmac
11
  import base64
12
  import requests
 
13
  import urllib.request
14
  import urllib.parse
15
  import json
 
16
  from concurrent.futures import ThreadPoolExecutor
 
17
  import tempfile
18
  from datetime import datetime
19
  from dotenv import load_dotenv # dotenv ์ถ”๊ฐ€
@@ -79,13 +75,6 @@ def get_blog_count(keyword):
79
  # ํด๋ผ์ด์–ธํŠธ ID์™€ ์‹œํฌ๋ฆฟ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.
80
  client_id = CLIENT_ID
81
  client_secret = CLIENT_SECRET
82
-
83
- # keyword๊ฐ€ ๋ฐ”์ดํŠธ ํƒ€์ž…์ผ ๊ฒฝ์šฐ ๋””์ฝ”๋”ฉ
84
- if isinstance(keyword, bytes):
85
- keyword = keyword.decode('utf-8')
86
- elif not isinstance(keyword, str):
87
- keyword = str(keyword)
88
-
89
  encText = urllib.parse.quote(keyword)
90
  url = "https://openapi.naver.com/v1/search/blog?query=" + encText
91
  request = urllib.request.Request(url)
@@ -97,127 +86,94 @@ def get_blog_count(keyword):
97
  if rescode == 200:
98
  response_body = response.read()
99
  data = json.loads(response_body.decode('utf-8'))
100
- return data.get('total', 0)
101
  else:
102
  return 0
103
  except Exception as e:
104
  print(f"Error fetching blog count for keyword '{keyword}': {e}")
105
  return 0
106
 
107
- def get_search_volumes(keyword, api):
108
- """
109
- ๋‹จ์ผ ํ‚ค์›Œ๋“œ์˜ ์›” ๊ฒ€์ƒ‰๋Ÿ‰์„ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜.
110
- """
111
- try:
112
- data = api.get_keywords_data([keyword])
113
- if 'keywordList' in data and len(data['keywordList']) > 0:
114
- # keywordList์—์„œ ์ž…๋ ฅํ•œ ํ‚ค์›Œ๋“œ์™€ ์ผ์น˜ํ•˜๋Š” ํ•ญ๋ชฉ์„ ์ฐพ์Šต๋‹ˆ๋‹ค.
115
- for item in data['keywordList']:
116
- if item['relKeyword'].strip().lower() == keyword.strip().lower():
117
- monthly_pc = item.get('monthlyPcQcCnt', 0)
118
- monthly_mobile = item.get('monthlyMobileQcCnt', 0)
119
-
120
- if isinstance(monthly_pc, str):
121
- monthly_pc = monthly_pc.replace(',', '').replace('< 10', '0')
122
- try:
123
- monthly_pc = int(monthly_pc)
124
- except ValueError:
125
- monthly_pc = 0
126
- if isinstance(monthly_mobile, str):
127
- monthly_mobile = monthly_mobile.replace(',', '').replace('< 10', '0')
128
- try:
129
- monthly_mobile = int(monthly_mobile)
130
- except ValueError:
131
- monthly_mobile = 0
132
-
133
- total_searches = monthly_pc + monthly_mobile
134
- blog_count = get_blog_count(keyword)
135
- return (monthly_pc, monthly_mobile, total_searches, blog_count)
136
- # ์ž…๋ ฅํ•œ ํ‚ค์›Œ๋“œ์™€ ์ผ์น˜ํ•˜๋Š” ํ•ญ๋ชฉ์ด ์—†์„ ๊ฒฝ์šฐ
137
- return (0, 0, 0, 0)
138
- else:
139
- return (0, 0, 0, 0)
140
- except Exception as e:
141
- print(f"Error fetching search volumes for keyword '{keyword}': {e}")
142
- return (0, 0, 0, 0)
143
-
144
- def process_excel(file):
145
- # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
146
- df = pd.read_excel(file.name)
147
-
148
- # D์—ด์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
149
- product_names = df.iloc[:, 3].dropna() # D์—ด์€ 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ index๋Š” 3
150
-
151
- # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ๋นˆ๋„ ๊ณ„์‚ฐ
152
- all_keywords = []
153
-
154
- for name in product_names:
155
- # ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค€์œผ๋กœ ๋ถ„ํ• 
156
- words = re.sub(r'[^\w\s]', '', name).split()
157
- # ์ค‘๋ณต ์ œ๊ฑฐ
158
- unique_words = set(words)
159
- all_keywords.extend(unique_words)
160
-
161
- # ๋นˆ๋„ ๊ณ„์‚ฐ
162
- keyword_counts = Counter(all_keywords)
163
-
164
- # ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ์ •๋ฆฌ
165
- result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
166
- result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
167
-
168
- # ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ํ™•์ธ ๋ฐ ํŒŒ์ผ ์ €์žฅ
169
- output_dir = "output"
170
- if not os.path.exists(output_dir):
171
- os.makedirs(output_dir)
172
-
173
- output_file = os.path.join(output_dir, "keyword_counts.xlsx")
174
-
175
- # ์—‘์…€ ํŒŒ์ผ์— ๋ฐ์ดํ„ฐ๋ฅผ A4, B4 ์…€๋ถ€ํ„ฐ ์“ฐ๊ธฐ
176
- with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
177
- result_df.to_excel(writer, index=False, startrow=3) # startrow=3์œผ๋กœ ์„ค์ •ํ•˜์—ฌ 4๋ฒˆ์งธ ํ–‰(A4, B4)๋ถ€ํ„ฐ ์‹œ์ž‘
178
-
179
- # ์ด๋ฏธ์ง€๋ฅผ ์—‘์…€ ํŒŒ์ผ์˜ A1 ์…€์— ์‚ฝ์ž…
180
- wb = load_workbook(output_file)
181
- ws = wb.active
182
-
183
- # ssboost-logo.png ํŒŒ์ผ์„ A1 ์…€์— ์‚ฝ์ž…
184
- if os.path.exists("ssboost-logo.png"):
185
- img = Image("ssboost-logo.png")
186
-
187
- # ์ด๋ฏธ์ง€ ํฌ๊ธฐ ์„ค์ • (1.54cm ๋†’์ด, 5.69cm ๋„ˆ๋น„)
188
- img.height = int(1.54 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํŠธ
189
- img.width = int(5.69 * 28.3465) # 1 cm = 28.3465 ํฌ์ธํŠธ
190
-
191
- ws.add_image(img, "A1")
192
- else:
193
- print("ssboost-logo.png ๏ฟฝ๏ฟฝ๏ฟฝ์ผ์ด ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์ด๋ฏธ์ง€๋ฅผ ์‚ฝ์ž…ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
194
-
195
- # Naver API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ‚ค์›Œ๋“œ ๋ถ„์„ ์ถ”๊ฐ€
196
  api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
- # ํ‚ค์›Œ๋“œ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ C์—ด๋ถ€ํ„ฐ ์ถ”๊ฐ€
199
- for idx, row in result_df.iterrows():
200
- keyword = row['Keyword']
201
- monthly_pc, monthly_mobile, total_searches, blog_count = get_search_volumes(keyword, api)
202
- excel_row = idx + 5 # A5๋ถ€ํ„ฐ ์‹œ์ž‘
203
- ws.cell(row=excel_row, column=3, value=monthly_pc) # C์—ด: PC์›”๊ฒ€์ƒ‰๋Ÿ‰
204
- ws.cell(row=excel_row, column=4, value=monthly_mobile) # D์—ด: ๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰
205
- ws.cell(row=excel_row, column=5, value=total_searches) # E์—ด: ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰
206
- ws.cell(row=excel_row, column=6, value=blog_count) # F์—ด: ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜
207
-
208
- # ์—‘์…€ ํŒŒ์ผ ์ €์žฅ
209
- wb.save(output_file)
210
-
211
- return output_file
212
-
213
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
214
  iface = gr.Interface(
215
- fn=process_excel,
216
- inputs=gr.File(file_types=[".xlsx"]), # ์—‘์…€ ํŒŒ์ผ๋งŒ ์—…๋กœ๋“œํ•  ์ˆ˜ ์žˆ๊ฒŒ ์„ค์ •
217
- outputs="file",
218
- title="Excel Keyword Extractor with Naver Analysis",
219
- description="์—‘์…€ ํŒŒ์ผ์˜ D์—ด์—์„œ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜๊ณ  ๋นˆ๋„๋ฅผ ๊ณ„์‚ฐํ•œ ํ›„, ๊ฐ ํ‚ค์›Œ๋“œ์˜ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ ์ˆ˜๋ฅผ ๋ถ„์„ํ•˜์—ฌ ์ƒˆ๋กœ์šด ์—‘์…€ ํŒŒ์ผ๋กœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
 
 
220
  )
221
 
222
- if __name__ == "__main__":
223
- iface.launch()
 
 
 
 
 
 
 
 
1
  import time
2
  import hashlib
3
  import hmac
4
  import base64
5
  import requests
6
+ import gradio as gr
7
  import urllib.request
8
  import urllib.parse
9
  import json
10
+ import pandas as pd
11
  from concurrent.futures import ThreadPoolExecutor
12
+ import os
13
  import tempfile
14
  from datetime import datetime
15
  from dotenv import load_dotenv # dotenv ์ถ”๊ฐ€
 
75
  # ํด๋ผ์ด์–ธํŠธ ID์™€ ์‹œํฌ๋ฆฟ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.
76
  client_id = CLIENT_ID
77
  client_secret = CLIENT_SECRET
 
 
 
 
 
 
 
78
  encText = urllib.parse.quote(keyword)
79
  url = "https://openapi.naver.com/v1/search/blog?query=" + encText
80
  request = urllib.request.Request(url)
 
86
  if rescode == 200:
87
  response_body = response.read()
88
  data = json.loads(response_body.decode('utf-8'))
89
+ return data['total']
90
  else:
91
  return 0
92
  except Exception as e:
93
  print(f"Error fetching blog count for keyword '{keyword}': {e}")
94
  return 0
95
 
96
+ def get_keywords_data_chunk(chunk):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
98
+ return api.get_keywords_data(chunk)
99
+
100
+ def get_blog_count_parallel(keyword):
101
+ return (keyword, get_blog_count(keyword))
102
+
103
+ def get_monthly_search_volumes(keywords):
104
+ all_data = []
105
+ chunk_size = 10 # ํ‚ค์›Œ๋“œ๋ฅผ 10๊ฐœ์”ฉ ๋‚˜๋ˆ„์–ด ์š”์ฒญ
106
+
107
+ # API ๋ณ‘๋ ฌ ์š”์ฒญ
108
+ with ThreadPoolExecutor(max_workers=5) as executor:
109
+ futures = [executor.submit(get_keywords_data_chunk, keywords[i:i+chunk_size]) for i in range(0, len(keywords), chunk_size)]
110
+ for future in futures:
111
+ try:
112
+ data = future.result()
113
+ if 'keywordList' in data:
114
+ all_data.extend(data['keywordList'])
115
+ except Exception as e:
116
+ print(f"Error fetching keywords data chunk: {e}")
117
+
118
+ if not all_data:
119
+ return [("Error", "๋ฐ์ดํ„ฐ๊ฐ€ ๋ฐ˜ํ™˜๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ API ์‘๋‹ต์ด ์œ ํšจํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.", "", "", "")] # ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ ์ˆ˜ ์นผ๋Ÿผ ์ถ”๊ฐ€
120
+
121
+ results = []
122
+ unique_keywords = set()
123
+ for item in all_data:
124
+ keyword = item['relKeyword']
125
+ if keyword not in unique_keywords:
126
+ unique_keywords.add(keyword)
127
+ monthly_pc = item['monthlyPcQcCnt']
128
+ monthly_mobile = item['monthlyMobileQcCnt']
129
+
130
+ if isinstance(monthly_pc, str):
131
+ monthly_pc = int(monthly_pc.replace(',', '').replace('< 10', '0'))
132
+ if isinstance(monthly_mobile, str):
133
+ monthly_mobile = int(monthly_mobile.replace(',', '').replace('< 10', '0'))
134
+
135
+ total_searches = monthly_pc + monthly_mobile
136
+ results.append((keyword, monthly_pc, monthly_mobile, total_searches))
137
+
138
+ if len(results) >= 100:
139
+ break
140
+
141
+ # ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ ์ˆ˜ ๋ณ‘๋ ฌ ์š”์ฒญ
142
+ with ThreadPoolExecutor(max_workers=5) as executor:
143
+ blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
144
+ for i, future in enumerate(blog_futures):
145
+ try:
146
+ keyword, blog_count = future.result()
147
+ results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], blog_count)
148
+ except Exception as e:
149
+ print(f"Error fetching blog count for keyword '{results[i][0]}': {e}")
150
+ results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], "Error")
151
+
152
+ return results
153
+
154
+ def save_to_excel(results, keyword):
155
+ df = pd.DataFrame(results, columns=["ํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"])
156
+ now = datetime.now().strftime('%Y-%m-%d')
157
+ sanitized_keyword = keyword.replace(' ', '_')
158
+ filename = f"{now}_{sanitized_keyword}_์—ฐ๊ด€๊ฒ€์ƒ‰์–ด.xlsx"
159
+ file_path = os.path.join(tempfile.gettempdir(), filename)
160
+ df.to_excel(file_path, index=False)
161
+ return file_path
162
+
163
+ def display_search_volumes(keywords):
164
+ keyword_list = [keyword.strip() for keyword in keywords.split(',')]
165
+ results = get_monthly_search_volumes(keyword_list)
166
+ file_path = save_to_excel(results, keywords)
167
+ return results, file_path
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  iface = gr.Interface(
170
+ fn=display_search_volumes,
171
+ inputs=gr.Textbox(placeholder="ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”"),
172
+ outputs=[
173
+ gr.Dataframe(headers=["ํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"]),
174
+ gr.File(label="๋‹ค์šด๋กœ๋“œ ์—‘์…€ ํŒŒ์ผ")
175
+ ],
176
+ title="๋„ค์ด๋ฒ„ ์›”๊ฒ€์ƒ‰๋Ÿ‰ ๊ฒ€์ƒ‰๊ธฐ",
177
  )
178
 
179
+ iface.launch(share=True) # share=True๋ฅผ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ณต๊ฐœ ๋งํฌ ์ƒ์„ฑ