na_ver-1

Sleeping

App Files Files Community

Kims12 commited on Feb 23

Commit

08b8a4f

verified ·

1 Parent(s): 68610a8

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -94

app.py CHANGED Viewed

@@ -1,20 +1,21 @@
 import os
 import time
 import hmac
 import hashlib
 import base64
 import requests
-import pandas as pd
-import tempfile
-import gradio as gr
-# 네이버 광고 API 호출 시 사용할 서명 생성 함수
 def generate_signature(timestamp, method, uri, secret_key):
     message = f"{timestamp}.{method}.{uri}"
     digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
     return base64.b64encode(digest).decode()
-# 네이버 광고 API 호출 헤더 생성 함수
 def get_header(method, uri, api_key, secret_key, customer_id):
     timestamp = str(round(time.time() * 1000))
     signature = generate_signature(timestamp, method, uri, secret_key)
@@ -26,13 +27,14 @@ def get_header(method, uri, api_key, secret_key, customer_id):
         "X-Signature": signature
     }
-# 네이버 광고 API를 통해 단일 키워드의 연관검색어 및 검색량 정보를 가져오는 함수
 def fetch_related_keywords(keyword):
-    # 환경변수에서 광고 API 키값들을 불러옵니다.
-    API_KEY = os.environ["NAVER_API_KEY"]
-    SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
-    CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
     BASE_URL = "https://api.naver.com"
     uri = "/keywordstool"
     method = "GET"
@@ -41,18 +43,20 @@ def fetch_related_keywords(keyword):
         "hintKeywords": [keyword],
         "showDetail": "1"
     }
-    response = requests.get(BASE_URL + uri, params=params, headers=headers)
-    data = response.json()
     if "keywordList" not in data:
         return pd.DataFrame()
     df = pd.DataFrame(data["keywordList"])
     if len(df) > 100:
         df = df.head(100)
     def parse_count(x):
         try:
-            x_str = str(x).replace(",", "")
-            return int(x_str)
         except:
             return 0
@@ -63,93 +67,152 @@ def fetch_related_keywords(keyword):
     result_df = df[["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"]]
     return result_df
-# 네이버 검색 개발 API를 활용하여 블로그 문서수를 조회하는 함수
 def fetch_blog_count(keyword):
-    # 환경변수에서 네이버 검색 API 자격증명을 불러옵니다.
-    client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
-    client_secret = os.environ["NAVER_SEARCH_CLIENT_SECRET"]
     url = "https://openapi.naver.com/v1/search/blog.json"
     headers = {
         "X-Naver-Client-Id": client_id,
         "X-Naver-Client-Secret": client_secret
     }
     params = {"query": keyword, "display": 1}
-    response = requests.get(url, headers=headers, params=params)
-    if response.status_code == 200:
-        data = response.json()
-        return data.get("total", 0)
-    else:
         return 0
-# 임시 엑셀 파일 생성 함수
-def create_excel_file(df):
-    with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
-        excel_path = tmp.name
-    df.to_excel(excel_path, index=False)
-    return excel_path
-# 입력된 여러 키워드를 처리하는 함수
-def process_keyword(keywords: str, include_related: bool):
     """
-    1. 텍스트박스에 엔터로 구분된 여러 키워드를 받아 각 키워드에 대해 네이버 광고 API를 통해 검색량 정보를 조회합니다.
-    2. 각 키워드에 대해 입력한 키워드 자체의 결과를 포함합니다.
-    3. 체크박스(True)인 경우, 첫 번째 키워드에 대해서만 연관검색어(입력 키워드를 제외한 결과)를 추가합니다.
-    4. 마지막으로, 각 "정보키워드"에 대해 네이버 검색 API를 호출하여 블로그 문서수를 조회하고 "블로그문서수" 컬럼에 추가합니다.
     """
-    # 줄바꿈으로 분리하여 입력 키워드 리스트 생성 (빈 줄 제외)
-    input_keywords = [k.strip() for k in keywords.splitlines() if k.strip() != ""]
-    result_dfs = []
-    for idx, kw in enumerate(input_keywords):
-        df_kw = fetch_related_keywords(kw)
-        if df_kw.empty:
-            continue
-        # 입력 키워드 자체의 결과를 우선 포함
-        row_kw = df_kw[df_kw["정보키워드"] == kw]
-        if not row_kw.empty:
-            result_dfs.append(row_kw)
-        else:
-            # 입력 키워드에 해당하는 행이 없으면 첫 번째 행을 대체로 추가
-            result_dfs.append(df_kw.head(1))
-        # 체크박스가 True이고, 첫 번째 키워드에 대해서만 연관검색어 추가 (입력 키워드 제외)
-        if include_related and idx == 0:
-            df_related = df_kw[df_kw["정보키워드"] != kw]
-            if not df_related.empty:
-                result_dfs.append(df_related)
-    if result_dfs:
-        result_df = pd.concat(result_dfs, ignore_index=True)
-        result_df.drop_duplicates(subset=["정보키워드"], inplace=True)
-    else:
-        result_df = pd.DataFrame(columns=["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"])
-    # 블로그 문서수 컬럼 추가: 각 정보키워드마다 네이버 블로그 검색 API로 총 문서수를 조회
-    result_df["블로그문서수"] = result_df["정보키워드"].apply(fetch_blog_count)
-    result_df.sort_values(by="토탈월검색량", ascending=False, inplace=True)
-    return result_df, create_excel_file(result_df)
-# Gradio UI 구성
-with gr.Blocks() as demo:
-    gr.Markdown("### 네이버 연관검색어 및 검색량, 블로그 문서수 조회 앱")
-    gr.Markdown(
-        "여러 키워드를 엔터로 구분하여 입력하면 각 키워드의 검색량 정보를 조회하고, "
-        "첫 번째 키워드의 경우 '연관검색어 포함' 체크 시 연관검색어도 함께 조회합니다. "
-        "또한, 각 정보키워드에 대한 네이버 블로그 문서수도 함께 출력됩니다."
     )
-    with gr.Row():
-        keyword_input = gr.Textbox(label="키워드 입력 (여러 개일 경우 엔터로 구분)", lines=5, placeholder="예:\n강원도풀빌라\n자바스크립트")
-        include_checkbox = gr.Checkbox(label="연관검색어 포함 (첫번째 키워드에 한함)", value=False)
-        search_button = gr.Button("검색")
-    with gr.Row():
-        df_output = gr.Dataframe(label="검색 결과")
-        excel_output = gr.File(label="엑셀 다운로드")
-    # 버튼 클릭 시 process_keyword 함수 실행
-    search_button.click(fn=process_keyword, inputs=[keyword_input, include_checkbox], outputs=[df_output, excel_output])
-# 앱 실행 (Hugging Face Spaces 배포 가능)
-demo.launch()

+import gradio as gr
+import pandas as pd
+import re
+from io import BytesIO
+import tempfile
 import os
 import time
 import hmac
 import hashlib
 import base64
 import requests
+# --- 네이버 광고 API: 서명 생성 및 헤더 구성 ---
 def generate_signature(timestamp, method, uri, secret_key):
     message = f"{timestamp}.{method}.{uri}"
     digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
     return base64.b64encode(digest).decode()
 def get_header(method, uri, api_key, secret_key, customer_id):
     timestamp = str(round(time.time() * 1000))
     signature = generate_signature(timestamp, method, uri, secret_key)
         "X-Signature": signature
     }
+# --- 네이버 광고 API: 검색량 조회 (연관검색어 제외) ---
 def fetch_related_keywords(keyword):
+    API_KEY = os.environ.get("NAVER_API_KEY")
+    SECRET_KEY = os.environ.get("NAVER_SECRET_KEY")
+    CUSTOMER_ID = os.environ.get("NAVER_CUSTOMER_ID")
+    if not API_KEY or not SECRET_KEY or not CUSTOMER_ID:
+        return pd.DataFrame()
     BASE_URL = "https://api.naver.com"
     uri = "/keywordstool"
     method = "GET"
         "hintKeywords": [keyword],
         "showDetail": "1"
     }
+    try:
+        response = requests.get(BASE_URL + uri, params=params, headers=headers)
+        data = response.json()
+    except Exception as e:
+        return pd.DataFrame()
     if "keywordList" not in data:
         return pd.DataFrame()
     df = pd.DataFrame(data["keywordList"])
     if len(df) > 100:
         df = df.head(100)
     def parse_count(x):
         try:
+            return int(str(x).replace(",", ""))
         except:
             return 0
     result_df = df[["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"]]
     return result_df
+# --- 네이버 검색 API: 블로그 문서수 조회 ---
 def fetch_blog_count(keyword):
+    client_id = os.environ.get("NAVER_SEARCH_CLIENT_ID")
+    client_secret = os.environ.get("NAVER_SEARCH_CLIENT_SECRET")
+    if not client_id or not client_secret:
+        return 0
     url = "https://openapi.naver.com/v1/search/blog.json"
     headers = {
         "X-Naver-Client-Id": client_id,
         "X-Naver-Client-Secret": client_secret
     }
     params = {"query": keyword, "display": 1}
+    try:
+        response = requests.get(url, headers=headers, params=params)
+        if response.status_code == 200:
+            data = response.json()
+            return data.get("total", 0)
+        else:
+            return 0
+    except:
         return 0
+def process_excel(file_bytes):
     """
+    업로드된 엑셀 파일에서 D4셀부터 D열의 상품명을 추출하여,
+    각 셀에서 특수문자를 제거한 후 공백 기준으로 키워드를 추출합니다.
+    한 셀 내에서 중복된 키워드는 한 번만 카운트하고, 전체 셀에 대해
+    키워드의 빈도를 계산합니다.
+    이후, 각 키워드에 대해 네이버 API를 활용하여
+      - PC월검색량, 모바일월검색량, 토탈월검색량 및
+      - 네이버 검색 API를 통한 블로그 문서수를 조회하여
+    결과 엑셀 파일과 데이터프레임으로 출력합니다.
+    최종 엑셀 파일의 열 구성은 다음과 같습니다.
+      A열 : 키워드
+      B열 : 빈도수
+      C열 : PC월검색량
+      D열 : 모바일월검색량
+      E열 : 토탈월검색량
+      F열 : 블로그문서수
+    에러 발생 시, 에러 메시지를 텍스트 파일과 데이터프레임 형태로 반환합니다.
     """
+    # 엑셀 파일 읽기
+    try:
+        df = pd.read_excel(BytesIO(file_bytes), header=None, engine="openpyxl")
+    except Exception as e:
+        error_message = "엑셀 파일을 읽는 중 오류가 발생하였습니다: " + str(e)
+        temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
+        temp_error.write(error_message.encode("utf-8"))
+        temp_error.close()
+        error_df = pd.DataFrame({"에러": [error_message]})
+        return temp_error.name, error_df
+    # 엑셀 파일 형식 체크 (최소 4열, 최소 4행)
+    if df.shape[1] < 4 or df.shape[0] < 4:
+        error_message = "엑셀 파일의 형식이 올바르지 않습니다."
+        temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
+        temp_error.write(error_message.encode("utf-8"))
+        temp_error.close()
+        error_df = pd.DataFrame({"에러": [error_message]})
+        return temp_error.name, error_df
+    # D열(4번째 열, 인덱스 3)에서 4행(인덱스 3)부터 데이터를 가져옴
+    product_names_series = df.iloc[3:, 3]
+    product_names_series = product_names_series.dropna()
+    keyword_counts = {}
+    for cell in product_names_series:
+        if not isinstance(cell, str):
+            cell = str(cell)
+        cleaned = re.sub(r'[^0-9a-zA-Z가-힣\s]', '', cell)
+        keywords = cleaned.split()
+        unique_keywords = set(keywords)
+        for keyword in unique_keywords:
+            keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
+    sorted_keywords = sorted(keyword_counts.items(), key=lambda x: (-x[1], x[0]))
+    # 각 키워드에 대해 네이버 API를 활용하여 검색량 및 블로그 문서수 조회
+    result_data = []
+    for keyword, count in sorted_keywords:
+        pc_search = 0
+        mobile_search = 0
+        total_search = 0
+        df_api = fetch_related_keywords(keyword)
+        if not df_api.empty:
+            row = df_api[df_api["정보키워드"] == keyword]
+            if row.empty:
+                row = df_api.iloc[[0]]
+            pc_search = int(row["PC월검색량"].iloc[0])
+            mobile_search = int(row["모바일월검색량"].iloc[0])
+            total_search = int(row["토탈월검색량"].iloc[0])
+        blog_count = fetch_blog_count(keyword)
+        result_data.append({
+            "키워드": keyword,
+            "빈도수": count,
+            "PC월검색량": pc_search,
+            "모바일월검색량": mobile_search,
+            "토탈월검색량": total_search,
+            "블로그문서수": blog_count
+        })
+    result_df = pd.DataFrame(result_data)
+    # 결과 엑셀 파일 생성 (헤더: A열부터 F열까지)
+    output = BytesIO()
+    try:
+        with pd.ExcelWriter(output, engine="openpyxl") as writer:
+            result_df.to_excel(writer, index=False, startrow=1, header=False)
+            worksheet = writer.sheets["Sheet1"]
+            worksheet.cell(row=1, column=1, value="키워드")
+            worksheet.cell(row=1, column=2, value="빈도수")
+            worksheet.cell(row=1, column=3, value="PC월검색량")
+            worksheet.cell(row=1, column=4, value="모바일월검색량")
+            worksheet.cell(row=1, column=5, value="토탈월검색량")
+            worksheet.cell(row=1, column=6, value="블로그문서수")
+        output.seek(0)
+    except Exception as e:
+        error_message = "엑셀 파일을 생성하는 중 오류가 발생하였습니다: " + str(e)
+        temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
+        temp_error.write(error_message.encode("utf-8"))
+        temp_error.close()
+        error_df = pd.DataFrame({"에러": [error_message]})
+        return temp_error.name, error_df
+    temp_excel = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode="wb")
+    temp_excel.write(output.getvalue())
+    temp_excel.close()
+    return temp_excel.name, result_df
+iface = gr.Interface(
+    fn=process_excel,
+    inputs=gr.File(label="엑셀 파일 업로드", type="binary"),
+    outputs=[
+        gr.File(label="결과 엑셀 파일"),
+        gr.DataFrame(label="키워드 분석 표")
+    ],
+    title="엑셀 상품명 키워드 추출 및 검색량/블로그 문서수 조회",
+    description=(
+        "엑셀 파일의 D4셀부터 D열에 있는 상품명 데이터를 분석하여, "
+        "특수문자를 제거한 후 공백 기준으로 키워드를 추출합니다. "
+        "각 키워드에 대해 네이버 API를 활용하여 PC/모바일/토탈 월 검색량과 "
+        "네이버 블로그 문서수를 조회한 결과를 엑셀 파일과 표(데이터프레임)로 출력합니다."
     )
+)
+iface.launch()