Spaces:

Kims12
/

na_ver

Sleeping

App Files Files Community

Kims12 commited on Jan 6

Commit

c85a34f

verified ·

1 Parent(s): c98c67f

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -130

app.py CHANGED Viewed

@@ -1,15 +1,19 @@
 import time
 import hashlib
 import hmac
 import base64
 import requests
-import gradio as gr
 import urllib.request
 import urllib.parse
 import json
-import pandas as pd
 from concurrent.futures import ThreadPoolExecutor
-import os
 import tempfile
 from datetime import datetime
 from dotenv import load_dotenv  # dotenv 추가
@@ -100,18 +104,10 @@ def get_blog_count(keyword):
         print(f"Error fetching blog count for keyword '{keyword}': {e}")
         return 0
-def get_keywords_data_chunk(chunk):
-    api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
-    return api.get_keywords_data(chunk)
-def get_blog_count_parallel(keyword):
-    return (keyword, get_blog_count(keyword))
-def get_search_volumes(keyword):
     """
     단일 키워드의 월 검색량을 가져오는 함수.
     """
-    api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
     try:
         data = api.get_keywords_data([keyword])
         if 'keywordList' in data and len(data['keywordList']) > 0:
@@ -135,131 +131,93 @@ def get_search_volumes(keyword):
                             monthly_mobile = 0
                     total_searches = monthly_pc + monthly_mobile
-                    return (keyword, monthly_pc, monthly_mobile, total_searches)
             # 입력한 키워드와 일치하는 항목이 없을 경우
-            return (keyword, 0, 0, 0)
         else:
-            return (keyword, 0, 0, 0)
     except Exception as e:
         print(f"Error fetching search volumes for keyword '{keyword}': {e}")
-        return (keyword, 0, 0, 0)
-def get_monthly_search_volumes(keywords, include_related_keywords=True):
-    all_data = []
-    results = []
-    if include_related_keywords:
-        chunk_size = 10  # 키워드를 10개씩 나누어 요청
-        # API 병렬 요청
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(get_keywords_data_chunk, keywords[i:i+chunk_size]) for i in range(0, len(keywords), chunk_size)]
-            for future in futures:
-                try:
-                    data = future.result()
-                    if 'keywordList' in data:
-                        all_data.extend(data['keywordList'])
-                except Exception as e:
-                    print(f"Error fetching keywords data chunk: {e}")
-        if not all_data:
-            return [("Error", "데이터가 반환되지 않았거나 API 응답이 유효하지 않습니다.", "", "", "")]
-        unique_keywords = set()
-        for item in all_data:
-            keyword = item['relKeyword']
-            if keyword not in unique_keywords:
-                unique_keywords.add(keyword)
-                monthly_pc = item.get('monthlyPcQcCnt', 0)
-                monthly_mobile = item.get('monthlyMobileQcCnt', 0)
-                if isinstance(monthly_pc, str):
-                    monthly_pc = monthly_pc.replace(',', '').replace('< 10', '0')
-                    try:
-                        monthly_pc = int(monthly_pc)
-                    except ValueError:
-                        monthly_pc = 0
-                if isinstance(monthly_mobile, str):
-                    monthly_mobile = monthly_mobile.replace(',', '').replace('< 10', '0')
-                    try:
-                        monthly_mobile = int(monthly_mobile)
-                    except ValueError:
-                        monthly_mobile = 0
-                total_searches = monthly_pc + monthly_mobile
-                results.append((keyword, monthly_pc, monthly_mobile, total_searches))
-            if len(results) >= 100:
-                break
     else:
-        # 연관검색어를 포함하지 않으므로 입력 키워드만 처리
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(get_search_volumes, keyword) for keyword in keywords]
-            for future in futures:
-                try:
-                    result = future.result()
-                    results.append(result)
-                except Exception as e:
-                    print(f"Error fetching search volumes for keyword '{keyword}': {e}")
-                    results.append((keyword, 0, 0, 0))
-    if not results:
-        return [("Error", "데이터가 반환되지 않았거나 API 응답이 유효하지 않습니다.", "", "", "")]
-    # 블로그 문서 수 병렬 요청
-    with ThreadPoolExecutor(max_workers=5) as executor:
-        if include_related_keywords:
-            blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
-            for i, future in enumerate(blog_futures):
-                try:
-                    keyword, blog_count = future.result()
-                    results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], blog_count)
-                except Exception as e:
-                    print(f"Error fetching blog count for keyword '{results[i][0]}': {e}")
-                    results[i] = (results[i][0], results[i][1], results[i][2], results[i][3], "Error")
-        else:
-            blog_futures = [executor.submit(get_blog_count_parallel, result[0]) for result in results]
-            temp_results = []
-            for future in blog_futures:
-                try:
-                    keyword, blog_count = future.result()
-                    temp_results.append((keyword, results[0][1], results[0][2], results[0][3], blog_count))
-                except Exception as e:
-                    print(f"Error fetching blog count for keyword '{keyword}': {e}")
-                    temp_results.append((keyword, results[0][1], results[0][2], results[0][3], "Error"))
-            results = temp_results
-    return results
-def save_to_excel(results, keyword):
-    df = pd.DataFrame(results, columns=["키워드", "PC월검색량", "모바일월검색량", "토탈월검색량", "블로그문서수"])
-    now = datetime.now().strftime('%Y-%m-%d')
-    sanitized_keyword = keyword.replace(' ', '_')
-    filename = f"{now}_{sanitized_keyword}_연관검색어.xlsx"
-    file_path = os.path.join(tempfile.gettempdir(), filename)
-    df.to_excel(file_path, index=False)
-    return file_path
-def display_search_volumes(keywords, include_related):
-    keyword_list = [keyword.strip() for keyword in keywords.split(',') if keyword.strip()]
-    if not keyword_list:
-        return [("Error", "입력된 키워드가 없습니다.", "", "", "")], None
-    results = get_monthly_search_volumes(keyword_list, include_related_keywords=include_related)
-    file_path = save_to_excel(results, keywords)
-    return results, file_path
 iface = gr.Interface(
-    fn=display_search_volumes,
-    inputs=[
-        gr.Textbox(placeholder="키워드를 입력하세요 (쉼표로 구분)", lines=2),
-        gr.Checkbox(label="연관검색어 포���", value=True)  # 연관검색어 토글 추가
-    ],
-    outputs=[
-        gr.Dataframe(headers=["키워드", "PC월검색량", "모바일월검색량", "토탈월검색량", "블로그문서수"]),
-        gr.File(label="다운로드 엑셀 파일")
-    ],
-    title="네이버 월검색량 검색기",
-    description="키워드의 월 검색량과 블로그 문서 수를 확인할 수 있습니다. 연관검색어를 포함할지 선택하세요.",
 )
-iface.launch(share=True)  # share=True를 추가하여 공개 링크 생성

+import gradio as gr
+import pandas as pd
+import re
+from collections import Counter
+import os
+from openpyxl import load_workbook
+from openpyxl.drawing.image import Image
 import time
 import hashlib
 import hmac
 import base64
 import requests
 import urllib.request
 import urllib.parse
 import json
 from concurrent.futures import ThreadPoolExecutor
 import tempfile
 from datetime import datetime
 from dotenv import load_dotenv  # dotenv 추가
         print(f"Error fetching blog count for keyword '{keyword}': {e}")
         return 0
+def get_search_volumes(keyword, api):
     """
     단일 키워드의 월 검색량을 가져오는 함수.
     """
     try:
         data = api.get_keywords_data([keyword])
         if 'keywordList' in data and len(data['keywordList']) > 0:
                             monthly_mobile = 0
                     total_searches = monthly_pc + monthly_mobile
+                    blog_count = get_blog_count(keyword)
+                    return (monthly_pc, monthly_mobile, total_searches, blog_count)
             # 입력한 키워드와 일치하는 항목이 없을 경우
+            return (0, 0, 0, 0)
         else:
+            return (0, 0, 0, 0)
     except Exception as e:
         print(f"Error fetching search volumes for keyword '{keyword}': {e}")
+        return (0, 0, 0, 0)
+def process_excel(file):
+    # 엑셀 파일 읽기
+    df = pd.read_excel(file.name)
+    # D열의 데이터 추출
+    product_names = df.iloc[:, 3].dropna()  # D열은 0부터 시작하므로 index는 3
+    # 키워드 추출 및 빈도 계산
+    all_keywords = []
+    for name in product_names:
+        # 특수문자 제거 및 공백 기준으로 분할
+        words = re.sub(r'[^\w\s]', '', name).split()
+        # 중복 제거
+        unique_words = set(words)
+        all_keywords.extend(unique_words)
+    # 빈도 계산
+    keyword_counts = Counter(all_keywords)
+    # 결과를 데이터프레임으로 정리
+    result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
+    result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
+    # 디렉토리 생성 확인 및 파일 저장
+    output_dir = "output"
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    output_file = os.path.join(output_dir, "keyword_counts.xlsx")
+    # 엑셀 파일에 데이터를 A4, B4 셀부터 쓰기
+    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
+        result_df.to_excel(writer, index=False, startrow=3)  # startrow=3으로 설정하여 4번째 행(A4, B4)부터 시작
+    # 이미지를 엑셀 파일의 A1 셀에 삽입
+    wb = load_workbook(output_file)
+    ws = wb.active
+    # ssboost-logo.png 파일을 A1 셀에 삽입
+    if os.path.exists("ssboost-logo.png"):
+        img = Image("ssboost-logo.png")
+        # 이미지 크기 설정 (1.54cm 높이, 5.69cm 너비)
+        img.height = int(1.54 * 28.3465)  # 1 cm = 28.3465 포인트
+        img.width = int(5.69 * 28.3465)  # 1 cm = 28.3465 포인트
+        ws.add_image(img, "A1")
     else:
+        print("ssboost-logo.png 파일이 존재하지 않습니다. 이미지를 삽입하지 않습니다.")
+    # Naver API를 사용하여 키워드 분석 추가
+    api = NaverAPI(BASE_URL, API_KEY, SECRET_KEY, CUSTOMER_ID)
+    # 키워드 분석 결과를 C열부터 추가
+    for idx, row in result_df.iterrows():
+        keyword = row['Keyword']
+        monthly_pc, monthly_mobile, total_searches, blog_count = get_search_volumes(keyword, api)
+        excel_row = idx + 5  # A5부터 시작
+        ws.cell(row=excel_row, column=3, value=monthly_pc)        # C열: PC월검색량
+        ws.cell(row=excel_row, column=4, value=monthly_mobile)    # D열: 모바일월검색량
+        ws.cell(row=excel_row, column=5, value=total_searches)   # E열: 토탈월검색량
+        ws.cell(row=excel_row, column=6, value=blog_count)       # F열: 블로그문서수
+    # 엑셀 파일 저장
+    wb.save(output_file)
+    return output_file
+# Gradio 인터페이스 정의
 iface = gr.Interface(
+    fn=process_excel,
+    inputs=gr.File(file_types=[".xlsx"]),  # 엑셀 파일만 업로드할 수 있게 설정
+    outputs="file",
+    title="Excel Keyword Extractor with Naver Analysis",
+    description="엑셀 파일의 D열에서 키워드를 추출하고 빈도를 계산한 후, 각 키워드의 검색량 및 블로그 문서 수를 분석하여 새로운 엑셀 파일로 출력합니다."
 )
+if __name__ == "__main__":
+    iface.launch()