Spaces:
Sleeping
Sleeping
File size: 4,053 Bytes
87b0b62 3bd055b cdf24b7 3bd055b 4120bfc 9129679 33fb833 3bd055b cdf24b7 3bd055b cdf24b7 3bd055b cdf24b7 3bd055b 5ad395a 3bd055b 5ad395a 3bd055b 5ad395a cdf24b7 5ad395a 3bd055b 5ad395a cdf24b7 5ad395a 3bd055b cdf24b7 3bd055b e376942 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os # 이 줄을 추가하세요
import pandas as pd
import re
from datetime import datetime
import pytz
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
import io
import tempfile
import shutil
app = FastAPI()
@app.get("/")
async def read_root():
return {"message": "Welcome to the FastAPI application!"}
@app.get("/test")
async def test_endpoint():
return {"status": "This is a test endpoint and it's working!"}
@app.get("/health")
async def health_check():
return {"status": "healthy"}
def extract_keywords(product_names):
unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트
all_unique_words = []
for product in unique_products:
# 특수 문자를 공백으로 대체
cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
words = cleaned_product.split()
unique_words = set(words)
all_unique_words.extend(unique_words)
final_unique_words = set(all_unique_words)
word_count = {word: all_unique_words.count(word) for word in final_unique_words}
df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
df = df.sort_values(by='빈도수', ascending=False)
# Get the current date and time in Korean timezone
korea_timezone = pytz.timezone('Asia/Seoul')
now = datetime.now(korea_timezone)
formatted_date = now.strftime('%Y%m%d_%H%M%S')
# Create the filename with the current date and time
filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
# Save the DataFrame to a temporary file
temp_dir = tempfile.mkdtemp()
file_path = f"{temp_dir}/{filename}"
df.to_excel(file_path, index=False, startrow=3) # Save the DataFrame starting from A4
# Load the workbook and edit the cells
wb = load_workbook(file_path)
ws = wb.active
# Insert the image
logo = Image("ssboost-logo.png")
logo.height = 55 # set the height to 55px
logo.width = 206 # set the width to 206px
ws.add_image(logo, "A1")
# Add the hyperlink text
ws['D1'] = "▼ 홈페이지 바로가기 ▼"
ws['D2'] = "https://www.ssboost.co.kr"
ws['D2'].hyperlink = "https://www.ssboost.co.kr"
ws['D2'].style = "Hyperlink"
wb.save(file_path)
return file_path
@app.post("/extract_keywords_from_file/")
async def extract_keywords_from_file(file: UploadFile = File(...)):
temp_dir = None # temp_dir 변수를 None으로 초기화
try:
contents = await file.read()
df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
if df.empty:
raise HTTPException(status_code=400, detail="지정된 범위 내에 데이터가 없습니다.")
unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
temp_dir = tempfile.mkdtemp() # 임시 디렉토리 생성
file_path = extract_keywords(unique_product_names)
return FileResponse(file_path, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=os.path.basename(file_path))
except Exception as e:
raise HTTPException(status_code=500, detail=f"오류가 발생했습니다: {str(e)}")
finally:
if temp_dir and os.path.exists(temp_dir): # temp_dir이 None이 아니고 존재할 때만 삭제
shutil.rmtree(temp_dir)
@app.post("/extract_keywords_from_text/")
async def extract_keywords_from_text(text: str):
if not text.strip():
raise HTTPException(status_code=400, detail="No text provided.")
product_names = text.split('\n')
file_path = extract_keywords(product_names)
return FileResponse(file_path, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=file_path.split("/")[-1])
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|