import pandas as pd import re from datetime import datetime import pytz from openpyxl import load_workbook from openpyxl.drawing.image import Image from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import FileResponse import io app = FastAPI() def extract_keywords(product_names): unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트 all_unique_words = [] for product in unique_products: # 특수 문자를 공백으로 대체 cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower() words = cleaned_product.split() unique_words = set(words) all_unique_words.extend(unique_words) final_unique_words = set(all_unique_words) word_count = {word: all_unique_words.count(word) for word in final_unique_words} df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수']) df = df.sort_values(by='빈도수', ascending=False) # Get the current date and time in Korean timezone korea_timezone = pytz.timezone('Asia/Seoul') now = datetime.now(korea_timezone) formatted_date = now.strftime('%Y%m%d_%H%M%S') # Create the filename with the current date and time filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx' df.to_excel(filename, index=False, startrow=3) # Save the DataFrame starting from A4 # Load the workbook and edit the cells wb = load_workbook(filename) ws = wb.active # Insert the image logo = Image("ssboost-logo.png") logo.height = 55 # set the height to 55px logo.width = 206 # set the width to 206px ws.add_image(logo, "A1") # Add the hyperlink text ws['D1'] = "▼ 홈페이지 바로가기 ▼" ws['D2'] = "https://www.ssboost.co.kr" ws['D2'].hyperlink = "https://www.ssboost.co.kr" ws['D2'].style = "Hyperlink" wb.save(filename) return filename @app.post("/extract_keywords_from_file/") async def extract_keywords_from_file(file: UploadFile = File(...)): try: contents = await file.read() df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl') if df.empty: raise HTTPException(status_code=400, detail="No data found in the specified range.") unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist() output_filename = extract_keywords(unique_product_names) return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename) except Exception as e: raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") @app.post("/extract_keywords_from_text/") async def extract_keywords_from_text(text: str): if not text.strip(): raise HTTPException(status_code=400, detail="No text provided.") product_names = text.split('\n') output_filename = extract_keywords(product_names) return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)