Spaces:
Sleeping
Sleeping
import pandas as pd | |
import re | |
from datetime import datetime | |
import pytz | |
from openpyxl import load_workbook | |
from openpyxl.drawing.image import Image | |
from fastapi import FastAPI, UploadFile, File, HTTPException | |
from fastapi.responses import FileResponse | |
import io | |
app = FastAPI() | |
def extract_keywords(product_names): | |
unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트 | |
all_unique_words = [] | |
for product in unique_products: | |
# 특수 문자를 공백으로 대체 | |
cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower() | |
words = cleaned_product.split() | |
unique_words = set(words) | |
all_unique_words.extend(unique_words) | |
final_unique_words = set(all_unique_words) | |
word_count = {word: all_unique_words.count(word) for word in final_unique_words} | |
df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수']) | |
df = df.sort_values(by='빈도수', ascending=False) | |
# Get the current date and time in Korean timezone | |
korea_timezone = pytz.timezone('Asia/Seoul') | |
now = datetime.now(korea_timezone) | |
formatted_date = now.strftime('%Y%m%d_%H%M%S') | |
# Create the filename with the current date and time | |
filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx' | |
df.to_excel(filename, index=False, startrow=3) # Save the DataFrame starting from A4 | |
# Load the workbook and edit the cells | |
wb = load_workbook(filename) | |
ws = wb.active | |
# Insert the image | |
logo = Image("ssboost-logo.png") | |
logo.height = 55 # set the height to 55px | |
logo.width = 206 # set the width to 206px | |
ws.add_image(logo, "A1") | |
# Add the hyperlink text | |
ws['D1'] = "▼ 홈페이지 바로가기 ▼" | |
ws['D2'] = "https://www.ssboost.co.kr" | |
ws['D2'].hyperlink = "https://www.ssboost.co.kr" | |
ws['D2'].style = "Hyperlink" | |
wb.save(filename) | |
return filename | |
async def extract_keywords_from_file(file: UploadFile = File(...)): | |
try: | |
contents = await file.read() | |
df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl') | |
if df.empty: | |
raise HTTPException(status_code=400, detail="No data found in the specified range.") | |
unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist() | |
output_filename = extract_keywords(unique_product_names) | |
return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") | |
async def extract_keywords_from_text(text: str): | |
if not text.strip(): | |
raise HTTPException(status_code=400, detail="No text provided.") | |
product_names = text.split('\n') | |
output_filename = extract_keywords(product_names) | |
return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=8000) | |