na_ver-1

Sleeping

App Files Files Community

na_ver-1 / app.py

Kims12

Update app.py

08b8a4f verified 6 months ago

raw

history blame contribute delete

8.83 kB

	import gradio as gr
	import pandas as pd
	import re
	from io import BytesIO
	import tempfile
	import os
	import time
	import hmac
	import hashlib
	import base64
	import requests

	# --- 네이버 광고 API: 서명 생성 및 헤더 구성 ---
	def generate_signature(timestamp, method, uri, secret_key):
	message = f"{timestamp}.{method}.{uri}"
	digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
	return base64.b64encode(digest).decode()

	def get_header(method, uri, api_key, secret_key, customer_id):
	timestamp = str(round(time.time() * 1000))
	signature = generate_signature(timestamp, method, uri, secret_key)
	return {
	"Content-Type": "application/json; charset=UTF-8",
	"X-Timestamp": timestamp,
	"X-API-KEY": api_key,
	"X-Customer": str(customer_id),
	"X-Signature": signature
	}

	# --- 네이버 광고 API: 검색량 조회 (연관검색어 제외) ---
	def fetch_related_keywords(keyword):
	API_KEY = os.environ.get("NAVER_API_KEY")
	SECRET_KEY = os.environ.get("NAVER_SECRET_KEY")
	CUSTOMER_ID = os.environ.get("NAVER_CUSTOMER_ID")

	if not API_KEY or not SECRET_KEY or not CUSTOMER_ID:
	return pd.DataFrame()
	BASE_URL = "https://api.naver.com"
	uri = "/keywordstool"
	method = "GET"
	headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
	params = {
	"hintKeywords": [keyword],
	"showDetail": "1"
	}
	try:
	response = requests.get(BASE_URL + uri, params=params, headers=headers)
	data = response.json()
	except Exception as e:
	return pd.DataFrame()
	if "keywordList" not in data:
	return pd.DataFrame()
	df = pd.DataFrame(data["keywordList"])
	if len(df) > 100:
	df = df.head(100)

	def parse_count(x):
	try:
	return int(str(x).replace(",", ""))
	except:
	return 0

	df["PC월검색량"] = df["monthlyPcQcCnt"].apply(parse_count)
	df["모바일월검색량"] = df["monthlyMobileQcCnt"].apply(parse_count)
	df["토탈월검색량"] = df["PC월검색량"] + df["모바일월검색량"]
	df.rename(columns={"relKeyword": "정보키워드"}, inplace=True)
	result_df = df[["정보키워드", "PC월검색량", "모바일월검색량", "토탈월검색량"]]
	return result_df

	# --- 네이버 검색 API: 블로그 문서수 조회 ---
	def fetch_blog_count(keyword):
	client_id = os.environ.get("NAVER_SEARCH_CLIENT_ID")
	client_secret = os.environ.get("NAVER_SEARCH_CLIENT_SECRET")
	if not client_id or not client_secret:
	return 0
	url = "https://openapi.naver.com/v1/search/blog.json"
	headers = {
	"X-Naver-Client-Id": client_id,
	"X-Naver-Client-Secret": client_secret
	}
	params = {"query": keyword, "display": 1}
	try:
	response = requests.get(url, headers=headers, params=params)
	if response.status_code == 200:
	data = response.json()
	return data.get("total", 0)
	else:
	return 0
	except:
	return 0

	def process_excel(file_bytes):
	"""
	업로드된 엑셀 파일에서 D4셀부터 D열의 상품명을 추출하여,
	각 셀에서 특수문자를 제거한 후 공백 기준으로 키워드를 추출합니다.
	한 셀 내에서 중복된 키워드는 한 번만 카운트하고, 전체 셀에 대해
	키워드의 빈도를 계산합니다.

	이후, 각 키워드에 대해 네이버 API를 활용하여
	- PC월검색량, 모바일월검색량, 토탈월검색량 및
	- 네이버 검색 API를 통한 블로그 문서수를 조회하여
	결과 엑셀 파일과 데이터프레임으로 출력합니다.

	최종 엑셀 파일의 열 구성은 다음과 같습니다.
	A열 : 키워드
	B열 : 빈도수
	C열 : PC월검색량
	D열 : 모바일월검색량
	E열 : 토탈월검색량
	F열 : 블로그문서수

	에러 발생 시, 에러 메시지를 텍스트 파일과 데이터프레임 형태로 반환합니다.
	"""
	# 엑셀 파일 읽기
	try:
	df = pd.read_excel(BytesIO(file_bytes), header=None, engine="openpyxl")
	except Exception as e:
	error_message = "엑셀 파일을 읽는 중 오류가 발생하였습니다: " + str(e)
	temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
	temp_error.write(error_message.encode("utf-8"))
	temp_error.close()
	error_df = pd.DataFrame({"에러": [error_message]})
	return temp_error.name, error_df

	# 엑셀 파일 형식 체크 (최소 4열, 최소 4행)
	if df.shape[1] < 4 or df.shape[0] < 4:
	error_message = "엑셀 파일의 형식이 올바르지 않습니다."
	temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
	temp_error.write(error_message.encode("utf-8"))
	temp_error.close()
	error_df = pd.DataFrame({"에러": [error_message]})
	return temp_error.name, error_df

	# D열(4번째 열, 인덱스 3)에서 4행(인덱스 3)부터 데이터를 가져옴
	product_names_series = df.iloc[3:, 3]
	product_names_series = product_names_series.dropna()

	keyword_counts = {}
	for cell in product_names_series:
	if not isinstance(cell, str):
	cell = str(cell)
	cleaned = re.sub(r'[^0-9a-zA-Z가-힣\s]', '', cell)
	keywords = cleaned.split()
	unique_keywords = set(keywords)
	for keyword in unique_keywords:
	keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1

	sorted_keywords = sorted(keyword_counts.items(), key=lambda x: (-x[1], x[0]))

	# 각 키워드에 대해 네이버 API를 활용하여 검색량 및 블로그 문서수 조회
	result_data = []
	for keyword, count in sorted_keywords:
	pc_search = 0
	mobile_search = 0
	total_search = 0
	df_api = fetch_related_keywords(keyword)
	if not df_api.empty:
	row = df_api[df_api["정보키워드"] == keyword]
	if row.empty:
	row = df_api.iloc[[0]]
	pc_search = int(row["PC월검색량"].iloc[0])
	mobile_search = int(row["모바일월검색량"].iloc[0])
	total_search = int(row["토탈월검색량"].iloc[0])
	blog_count = fetch_blog_count(keyword)
	result_data.append({
	"키워드": keyword,
	"빈도수": count,
	"PC월검색량": pc_search,
	"모바일월검색량": mobile_search,
	"토탈월검색량": total_search,
	"블로그문서수": blog_count
	})
	result_df = pd.DataFrame(result_data)

	# 결과 엑셀 파일 생성 (헤더: A열부터 F열까지)
	output = BytesIO()
	try:
	with pd.ExcelWriter(output, engine="openpyxl") as writer:
	result_df.to_excel(writer, index=False, startrow=1, header=False)
	worksheet = writer.sheets["Sheet1"]
	worksheet.cell(row=1, column=1, value="키워드")
	worksheet.cell(row=1, column=2, value="빈도수")
	worksheet.cell(row=1, column=3, value="PC월검색량")
	worksheet.cell(row=1, column=4, value="모바일월검색량")
	worksheet.cell(row=1, column=5, value="토탈월검색량")
	worksheet.cell(row=1, column=6, value="블로그문서수")
	output.seek(0)
	except Exception as e:
	error_message = "엑셀 파일을 생성하는 중 오류가 발생하였습니다: " + str(e)
	temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
	temp_error.write(error_message.encode("utf-8"))
	temp_error.close()
	error_df = pd.DataFrame({"에러": [error_message]})
	return temp_error.name, error_df

	temp_excel = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode="wb")
	temp_excel.write(output.getvalue())
	temp_excel.close()

	return temp_excel.name, result_df

	iface = gr.Interface(
	fn=process_excel,
	inputs=gr.File(label="엑셀 파일 업로드", type="binary"),
	outputs=[
	gr.File(label="결과 엑셀 파일"),
	gr.DataFrame(label="키워드 분석 표")
	],
	title="엑셀 상품명 키워드 추출 및 검색량/블로그 문서수 조회",
	description=(
	"엑셀 파일의 D4셀부터 D열에 있는 상품명 데이터를 분석하여, "
	"특수문자를 제거한 후 공백 기준으로 키워드를 추출합니다. "
	"각 키워드에 대해 네이버 API를 활용하여 PC/모바일/토탈 월 검색량과 "
	"네이버 블로그 문서수를 조회한 결과를 엑셀 파일과 표(데이터프레임)로 출력합니다."
	)
	)

	iface.launch()