ssboost's picture
Update app.py
cdf24b7 verified
raw
history blame
3.45 kB
import pandas as pd
import re
from datetime import datetime
import pytz
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
import io
import tempfile
import shutil
app = FastAPI()
def extract_keywords(product_names):
unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트
all_unique_words = []
for product in unique_products:
# 특수 문자를 공백으로 대체
cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
words = cleaned_product.split()
unique_words = set(words)
all_unique_words.extend(unique_words)
final_unique_words = set(all_unique_words)
word_count = {word: all_unique_words.count(word) for word in final_unique_words}
df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
df = df.sort_values(by='빈도수', ascending=False)
# Get the current date and time in Korean timezone
korea_timezone = pytz.timezone('Asia/Seoul')
now = datetime.now(korea_timezone)
formatted_date = now.strftime('%Y%m%d_%H%M%S')
# Create the filename with the current date and time
filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
# Save the DataFrame to a temporary file
temp_dir = tempfile.mkdtemp()
file_path = f"{temp_dir}/{filename}"
df.to_excel(file_path, index=False, startrow=3) # Save the DataFrame starting from A4
# Load the workbook and edit the cells
wb = load_workbook(file_path)
ws = wb.active
# Insert the image
logo = Image("ssboost-logo.png")
logo.height = 55 # set the height to 55px
logo.width = 206 # set the width to 206px
ws.add_image(logo, "A1")
# Add the hyperlink text
ws['D1'] = "▼ 홈페이지 바로가기 ▼"
ws['D2'] = "https://www.ssboost.co.kr"
ws['D2'].hyperlink = "https://www.ssboost.co.kr"
ws['D2'].style = "Hyperlink"
wb.save(file_path)
return file_path
@app.post("/extract_keywords_from_file/")
async def extract_keywords_from_file(file: UploadFile = File(...)):
try:
contents = await file.read()
df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
if df.empty:
raise HTTPException(status_code=400, detail="No data found in the specified range.")
unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
file_path = extract_keywords(unique_product_names)
return FileResponse(file_path, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=file_path.split("/")[-1])
except Exception as e:
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
finally:
shutil.rmtree(temp_dir)
@app.post("/extract_keywords_from_text/")
async def extract_keywords_from_text(text: str):
if not text.strip():
raise HTTPException(status_code=400, detail="No text provided.")
product_names = text.split('\n')
file_path = extract_keywords(product_names)
return FileResponse(file_path, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=file_path.split("/")[-1])
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)