import pandas as pd
import re
from datetime import datetime
import pytz
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
import io

app = FastAPI()

def extract_keywords(product_names):
    unique_products = list(set(product_names))  # 중복 제거된 상품명 리스트
    all_unique_words = []
    
    for product in unique_products:
        # 특수 문자를 공백으로 대체
        cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
        words = cleaned_product.split()
        unique_words = set(words)
        all_unique_words.extend(unique_words)
    
    final_unique_words = set(all_unique_words)
    word_count = {word: all_unique_words.count(word) for word in final_unique_words}
    df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
    df = df.sort_values(by='빈도수', ascending=False)

    # Get the current date and time in Korean timezone
    korea_timezone = pytz.timezone('Asia/Seoul')
    now = datetime.now(korea_timezone)
    formatted_date = now.strftime('%Y%m%d_%H%M%S')

    # Create the filename with the current date and time
    filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
    df.to_excel(filename, index=False, startrow=3)  # Save the DataFrame starting from A4

    # Load the workbook and edit the cells
    wb = load_workbook(filename)
    ws = wb.active

    # Insert the image
    logo = Image("ssboost-logo.png")
    logo.height = 55  # set the height to 55px
    logo.width = 206  # set the width to 206px
    ws.add_image(logo, "A1")

    # Add the hyperlink text
    ws['D1'] = "▼ 홈페이지 바로가기 ▼"
    ws['D2'] = "https://www.ssboost.co.kr"
    ws['D2'].hyperlink = "https://www.ssboost.co.kr"
    ws['D2'].style = "Hyperlink"

    wb.save(filename)
    return filename

@app.post("/extract_keywords_from_file/")
async def extract_keywords_from_file(file: UploadFile = File(...)):
    try:
        contents = await file.read()
        df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
        if df.empty:
            raise HTTPException(status_code=400, detail="No data found in the specified range.")
        unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
        output_filename = extract_keywords(unique_product_names)
        return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

@app.post("/extract_keywords_from_text/")
async def extract_keywords_from_text(text: str):
    if not text.strip():
        raise HTTPException(status_code=400, detail="No text provided.")
    product_names = text.split('\n')
    output_filename = extract_keywords(product_names)
    return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)