ssboost commited on
Commit
3bd055b
·
verified ·
1 Parent(s): f0d70fd

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +80 -0
main.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+ from datetime import datetime
4
+ import pytz
5
+ from openpyxl import load_workbook
6
+ from openpyxl.drawing.image import Image
7
+ from fastapi import FastAPI, UploadFile, File, HTTPException
8
+ from fastapi.responses import FileResponse
9
+ import io
10
+
11
+ app = FastAPI()
12
+
13
+ def extract_keywords(product_names):
14
+ unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트
15
+ all_unique_words = []
16
+
17
+ for product in unique_products:
18
+ # 특수 문자를 공백으로 대체
19
+ cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
20
+ words = cleaned_product.split()
21
+ unique_words = set(words)
22
+ all_unique_words.extend(unique_words)
23
+
24
+ final_unique_words = set(all_unique_words)
25
+ word_count = {word: all_unique_words.count(word) for word in final_unique_words}
26
+ df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
27
+ df = df.sort_values(by='빈도수', ascending=False)
28
+
29
+ # Get the current date and time in Korean timezone
30
+ korea_timezone = pytz.timezone('Asia/Seoul')
31
+ now = datetime.now(korea_timezone)
32
+ formatted_date = now.strftime('%Y%m%d_%H%M%S')
33
+
34
+ # Create the filename with the current date and time
35
+ filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
36
+ df.to_excel(filename, index=False, startrow=3) # Save the DataFrame starting from A4
37
+
38
+ # Load the workbook and edit the cells
39
+ wb = load_workbook(filename)
40
+ ws = wb.active
41
+
42
+ # Insert the image
43
+ logo = Image("ssboost-logo.png")
44
+ logo.height = 55 # set the height to 55px
45
+ logo.width = 206 # set the width to 206px
46
+ ws.add_image(logo, "A1")
47
+
48
+ # Add the hyperlink text
49
+ ws['D1'] = "▼ 홈페이지 바로가기 ▼"
50
+ ws['D2'] = "https://www.ssboost.co.kr"
51
+ ws['D2'].hyperlink = "https://www.ssboost.co.kr"
52
+ ws['D2'].style = "Hyperlink"
53
+
54
+ wb.save(filename)
55
+ return filename
56
+
57
+ @app.post("/extract_keywords_from_file/")
58
+ async def extract_keywords_from_file(file: UploadFile = File(...)):
59
+ try:
60
+ contents = await file.read()
61
+ df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
62
+ if df.empty:
63
+ raise HTTPException(status_code=400, detail="No data found in the specified range.")
64
+ unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
65
+ output_filename = extract_keywords(unique_product_names)
66
+ return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
67
+ except Exception as e:
68
+ raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
69
+
70
+ @app.post("/extract_keywords_from_text/")
71
+ async def extract_keywords_from_text(text: str):
72
+ if not text.strip():
73
+ raise HTTPException(status_code=400, detail="No text provided.")
74
+ product_names = text.split('\n')
75
+ output_filename = extract_keywords(product_names)
76
+ return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
77
+
78
+ if __name__ == "__main__":
79
+ import uvicorn
80
+ uvicorn.run(app, host="0.0.0.0", port=8000)