na_ver-1

Sleeping

File size: 8,828 Bytes

import gradio as gr
import pandas as pd
import re
from io import BytesIO
import tempfile
import os
import time
import hmac
import hashlib
import base64
import requests

# --- 네이버 광고 API: 서명 생성 및 헤더 구성 ---
def generate_signature(timestamp, method, uri, secret_key):
    message = f"{timestamp}.{method}.{uri}"
    digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
    return base64.b64encode(digest).decode()

def get_header(method, uri, api_key, secret_key, customer_id):
    timestamp = str(round(time.time() * 1000))
    signature = generate_signature(timestamp, method, uri, secret_key)
    return {
        "Content-Type": "application/json; charset=UTF-8",
        "X-Timestamp": timestamp,
        "X-API-KEY": api_key,
        "X-Customer": str(customer_id),
        "X-Signature": signature
    }

# --- 네이버 광고 API: 검색량 조회 (연관검색어 제외) ---
def fetch_related_keywords(keyword):
    API_KEY = os.environ.get("NAVER_API_KEY")
    SECRET_KEY = os.environ.get("NAVER_SECRET_KEY")
    CUSTOMER_ID = os.environ.get("NAVER_CUSTOMER_ID")

    if not API_KEY or not SECRET_KEY or not CUSTOMER_ID:
        return pd.DataFrame()
    BASE_URL = "https://api.naver.com"
    uri = "/keywordstool"
    method = "GET"
    headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
    params = {
        "hintKeywords": [keyword],
        "showDetail": "1"
    }
    try:
        response = requests.get(BASE_URL + uri, params=params, headers=headers)
        data = response.json()
    except Exception as e:
        return pd.DataFrame()
    if "keywordList" not in data:
        return pd.DataFrame()
    df = pd.DataFrame(data["keywordList"])
    if len(df) > 100:
        df = df.head(100)

    def parse_count(x):
        try:
            return int(str(x).replace(",", ""))
        except:
            return 0

    df["PC월검색량"] = df["monthlyPcQcCnt"].apply(parse_count)
    df["모바일월검색량"] = df["monthlyMobileQcCnt"].apply(parse_count)
    df["토탈월검색량"] = df["PC월검색량"] + df["모바일월검색량"]
    df.rename(columns={"relKeyword": "정보키워드"}, inplace=True)
    result_df = df[["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"]]
    return result_df

# --- 네이버 검색 API: 블로그 문서수 조회 ---
def fetch_blog_count(keyword):
    client_id = os.environ.get("NAVER_SEARCH_CLIENT_ID")
    client_secret = os.environ.get("NAVER_SEARCH_CLIENT_SECRET")
    if not client_id or not client_secret:
        return 0
    url = "https://openapi.naver.com/v1/search/blog.json"
    headers = {
        "X-Naver-Client-Id": client_id,
        "X-Naver-Client-Secret": client_secret
    }
    params = {"query": keyword, "display": 1}
    try:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            return data.get("total", 0)
        else:
            return 0
    except:
        return 0

def process_excel(file_bytes):
    """
    업로드된 엑셀 파일에서 D4셀부터 D열의 상품명을 추출하여,
    각 셀에서 특수문자를 제거한 후 공백 기준으로 키워드를 추출합니다.
    한 셀 내에서 중복된 키워드는 한 번만 카운트하고, 전체 셀에 대해
    키워드의 빈도를 계산합니다.

    이후, 각 키워드에 대해 네이버 API를 활용하여
      - PC월검색량, 모바일월검색량, 토탈월검색량 및
      - 네이버 검색 API를 통한 블로그 문서수를 조회하여
    결과 엑셀 파일과 데이터프레임으로 출력합니다.

    최종 엑셀 파일의 열 구성은 다음과 같습니다.
      A열 : 키워드
      B열 : 빈도수
      C열 : PC월검색량
      D열 : 모바일월검색량
      E열 : 토탈월검색량
      F열 : 블로그문서수

    에러 발생 시, 에러 메시지를 텍스트 파일과 데이터프레임 형태로 반환합니다.
    """
    # 엑셀 파일 읽기
    try:
        df = pd.read_excel(BytesIO(file_bytes), header=None, engine="openpyxl")
    except Exception as e:
        error_message = "엑셀 파일을 읽는 중 오류가 발생하였습니다: " + str(e)
        temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
        temp_error.write(error_message.encode("utf-8"))
        temp_error.close()
        error_df = pd.DataFrame({"에러": [error_message]})
        return temp_error.name, error_df

    # 엑셀 파일 형식 체크 (최소 4열, 최소 4행)
    if df.shape[1] < 4 or df.shape[0] < 4:
        error_message = "엑셀 파일의 형식이 올바르지 않습니다."
        temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
        temp_error.write(error_message.encode("utf-8"))
        temp_error.close()
        error_df = pd.DataFrame({"에러": [error_message]})
        return temp_error.name, error_df

    # D열(4번째 열, 인덱스 3)에서 4행(인덱스 3)부터 데이터를 가져옴
    product_names_series = df.iloc[3:, 3]
    product_names_series = product_names_series.dropna()

    keyword_counts = {}
    for cell in product_names_series:
        if not isinstance(cell, str):
            cell = str(cell)
        cleaned = re.sub(r'[^0-9a-zA-Z가-힣\s]', '', cell)
        keywords = cleaned.split()
        unique_keywords = set(keywords)
        for keyword in unique_keywords:
            keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1

    sorted_keywords = sorted(keyword_counts.items(), key=lambda x: (-x[1], x[0]))

    # 각 키워드에 대해 네이버 API를 활용하여 검색량 및 블로그 문서수 조회
    result_data = []
    for keyword, count in sorted_keywords:
        pc_search = 0
        mobile_search = 0
        total_search = 0
        df_api = fetch_related_keywords(keyword)
        if not df_api.empty:
            row = df_api[df_api["정보키워드"] == keyword]
            if row.empty:
                row = df_api.iloc[[0]]
            pc_search = int(row["PC월검색량"].iloc[0])
            mobile_search = int(row["모바일월검색량"].iloc[0])
            total_search = int(row["토탈월검색량"].iloc[0])
        blog_count = fetch_blog_count(keyword)
        result_data.append({
            "키워드": keyword,
            "빈도수": count,
            "PC월검색량": pc_search,
            "모바일월검색량": mobile_search,
            "토탈월검색량": total_search,
            "블로그문서수": blog_count
        })
    result_df = pd.DataFrame(result_data)

    # 결과 엑셀 파일 생성 (헤더: A열부터 F열까지)
    output = BytesIO()
    try:
        with pd.ExcelWriter(output, engine="openpyxl") as writer:
            result_df.to_excel(writer, index=False, startrow=1, header=False)
            worksheet = writer.sheets["Sheet1"]
            worksheet.cell(row=1, column=1, value="키워드")
            worksheet.cell(row=1, column=2, value="빈도수")
            worksheet.cell(row=1, column=3, value="PC월검색량")
            worksheet.cell(row=1, column=4, value="모바일월검색량")
            worksheet.cell(row=1, column=5, value="토탈월검색량")
            worksheet.cell(row=1, column=6, value="블로그문서수")
        output.seek(0)
    except Exception as e:
        error_message = "엑셀 파일을 생성하는 중 오류가 발생하였습니다: " + str(e)
        temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
        temp_error.write(error_message.encode("utf-8"))
        temp_error.close()
        error_df = pd.DataFrame({"에러": [error_message]})
        return temp_error.name, error_df

    temp_excel = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode="wb")
    temp_excel.write(output.getvalue())
    temp_excel.close()

    return temp_excel.name, result_df

iface = gr.Interface(
    fn=process_excel,
    inputs=gr.File(label="엑셀 파일 업로드", type="binary"),
    outputs=[
        gr.File(label="결과 엑셀 파일"),
        gr.DataFrame(label="키워드 분석 표")
    ],
    title="엑셀 상품명 키워드 추출 및 검색량/블로그 문서수 조회",
    description=(
        "엑셀 파일의 D4셀부터 D열에 있는 상품명 데이터를 분석하여, "
        "특수문자를 제거한 후 공백 기준으로 키워드를 추출합니다. "
        "각 키워드에 대해 네이버 API를 활용하여 PC/모바일/토탈 월 검색량과 "
        "네이버 블로그 문서수를 조회한 결과를 엑셀 파일과 표(데이터프레임)로 출력합니다."
    )
)

iface.launch()