Spaces:

ssboost
/

NOW_Product_Keyword

Sleeping

App Files Files Community

NOW_Product_Keyword / app.py

ssboost

Rename main.py to app.py

df7dc1b verified 12 months ago

raw

history blame

3.25 kB

	import pandas as pd
	import re
	from datetime import datetime
	import pytz
	from openpyxl import load_workbook
	from openpyxl.drawing.image import Image
	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.responses import FileResponse
	import io

	app = FastAPI()

	def extract_keywords(product_names):
	unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트
	all_unique_words = []

	for product in unique_products:
	# 특수 문자를 공백으로 대체
	cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
	words = cleaned_product.split()
	unique_words = set(words)
	all_unique_words.extend(unique_words)

	final_unique_words = set(all_unique_words)
	word_count = {word: all_unique_words.count(word) for word in final_unique_words}
	df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
	df = df.sort_values(by='빈도수', ascending=False)

	# Get the current date and time in Korean timezone
	korea_timezone = pytz.timezone('Asia/Seoul')
	now = datetime.now(korea_timezone)
	formatted_date = now.strftime('%Y%m%d_%H%M%S')

	# Create the filename with the current date and time
	filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
	df.to_excel(filename, index=False, startrow=3) # Save the DataFrame starting from A4

	# Load the workbook and edit the cells
	wb = load_workbook(filename)
	ws = wb.active

	# Insert the image
	logo = Image("ssboost-logo.png")
	logo.height = 55 # set the height to 55px
	logo.width = 206 # set the width to 206px
	ws.add_image(logo, "A1")

	# Add the hyperlink text
	ws['D1'] = "▼ 홈페이지 바로가기 ▼"
	ws['D2'] = "https://www.ssboost.co.kr"
	ws['D2'].hyperlink = "https://www.ssboost.co.kr"
	ws['D2'].style = "Hyperlink"

	wb.save(filename)
	return filename

	@app.post("/extract_keywords_from_file/")
	async def extract_keywords_from_file(file: UploadFile = File(...)):
	try:
	contents = await file.read()
	df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
	if df.empty:
	raise HTTPException(status_code=400, detail="No data found in the specified range.")
	unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
	output_filename = extract_keywords(unique_product_names)
	return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

	@app.post("/extract_keywords_from_text/")
	async def extract_keywords_from_text(text: str):
	if not text.strip():
	raise HTTPException(status_code=400, detail="No text provided.")
	product_names = text.split('\n')
	output_filename = extract_keywords(product_names)
	return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)