File size: 4,053 Bytes
87b0b62
3bd055b
 
 
 
 
 
 
 
 
cdf24b7
 
3bd055b
 
 
4120bfc
 
 
 
9129679
 
 
 
 
33fb833
 
 
 
3bd055b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdf24b7
 
 
 
 
3bd055b
 
cdf24b7
3bd055b
 
 
 
 
 
 
 
 
 
 
 
 
 
cdf24b7
 
3bd055b
 
 
5ad395a
3bd055b
 
 
 
5ad395a
3bd055b
5ad395a
cdf24b7
5ad395a
3bd055b
5ad395a
cdf24b7
5ad395a
 
3bd055b
 
 
 
 
 
cdf24b7
 
3bd055b
 
 
e376942
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os  # 이 줄을 추가하세요
import pandas as pd
import re
from datetime import datetime
import pytz
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
import io
import tempfile
import shutil

app = FastAPI()

@app.get("/")
async def read_root():
    return {"message": "Welcome to the FastAPI application!"}

@app.get("/test")
async def test_endpoint():
    return {"status": "This is a test endpoint and it's working!"}


@app.get("/health")
async def health_check():
    return {"status": "healthy"}

def extract_keywords(product_names):
    unique_products = list(set(product_names))  # 중복 제거된 상품명 리스트
    all_unique_words = []
    
    for product in unique_products:
        # 특수 문자를 공백으로 대체
        cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
        words = cleaned_product.split()
        unique_words = set(words)
        all_unique_words.extend(unique_words)
    
    final_unique_words = set(all_unique_words)
    word_count = {word: all_unique_words.count(word) for word in final_unique_words}
    df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
    df = df.sort_values(by='빈도수', ascending=False)

    # Get the current date and time in Korean timezone
    korea_timezone = pytz.timezone('Asia/Seoul')
    now = datetime.now(korea_timezone)
    formatted_date = now.strftime('%Y%m%d_%H%M%S')

    # Create the filename with the current date and time
    filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
    
    # Save the DataFrame to a temporary file
    temp_dir = tempfile.mkdtemp()
    file_path = f"{temp_dir}/{filename}"
    df.to_excel(file_path, index=False, startrow=3)  # Save the DataFrame starting from A4

    # Load the workbook and edit the cells
    wb = load_workbook(file_path)
    ws = wb.active

    # Insert the image
    logo = Image("ssboost-logo.png")
    logo.height = 55  # set the height to 55px
    logo.width = 206  # set the width to 206px
    ws.add_image(logo, "A1")

    # Add the hyperlink text
    ws['D1'] = "▼ 홈페이지 바로가기 ▼"
    ws['D2'] = "https://www.ssboost.co.kr"
    ws['D2'].hyperlink = "https://www.ssboost.co.kr"
    ws['D2'].style = "Hyperlink"

    wb.save(file_path)
    return file_path

@app.post("/extract_keywords_from_file/")
async def extract_keywords_from_file(file: UploadFile = File(...)):
    temp_dir = None  # temp_dir 변수를 None으로 초기화
    try:
        contents = await file.read()
        df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
        if df.empty:
            raise HTTPException(status_code=400, detail="지정된 범위 내에 데이터가 없습니다.")
        unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
        temp_dir = tempfile.mkdtemp()  # 임시 디렉토리 생성
        file_path = extract_keywords(unique_product_names)
        return FileResponse(file_path, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=os.path.basename(file_path))
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"오류가 발생했습니다: {str(e)}")
    finally:
        if temp_dir and os.path.exists(temp_dir):  # temp_dir이 None이 아니고 존재할 때만 삭제
            shutil.rmtree(temp_dir)

@app.post("/extract_keywords_from_text/")
async def extract_keywords_from_text(text: str):
    if not text.strip():
        raise HTTPException(status_code=400, detail="No text provided.")
    product_names = text.split('\n')
    file_path = extract_keywords(product_names)
    return FileResponse(file_path, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=file_path.split("/")[-1])

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)