import pandas as pd
from collections import Counter
import gradio as gr
import openpyxl

def extract_keywords(file):
    # 엑셀 파일 읽기
    df = pd.read_excel(file, engine='openpyxl')
    
    # 상품명 열에서 D4부터 끝까지 텍스트 추출
    product_names = df['상품명'][3:]  # D4 셀부터
    
    keywords = []
    
    for name in product_names:
        if pd.notna(name):  # NaN 값 체크
            words = name.split(" ")  # 공백 기준으로 단어 분리
            keywords.extend(words)
    
    # 키워드 빈도수 계산
    keyword_count = Counter(keywords)
    
    # 데이터프레임으로 변환
    result_df = pd.DataFrame(keyword_count.items(), columns=['키워드', '빈도수'])
    
    # 빈도수 기준으로 내림차순 정렬
    result_df = result_df.sort_values(by='빈도수', ascending=False).reset_index(drop=True)
    
    # 결과를 새로운 엑셀 파일로 저장
    output_path = "/mnt/data/키워드_분석_결과.xlsx"
    result_df.to_excel(output_path, index=False)
    
    return result_df, output_path

def keyword_analysis_interface(file):
    result_df, output_path = extract_keywords(file)
    return result_df, output_path

# Gradio 인터페이스 설정
interface = gr.Interface(
    fn=keyword_analysis_interface, 
    inputs=gr.File(label="엑셀 파일 업로드"), 
    outputs=[gr.Dataframe(headers=["키워드", "빈도수"]), gr.File(label="결과 엑셀 파일 다운로드")],
    title="키워드 빈도수 분석기",
    description="업로드된 엑셀 파일에서 상품명에 대한 키워드를 분석하여 빈도수를 계산합니다."
)

if __name__ == "__main__":
    interface.launch()