Spaces:
Sleeping
Sleeping
File size: 3,488 Bytes
7cfcd5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import pandas as pd
import gradio as gr
from datetime import datetime
from dotenv import load_dotenv
# ํ๊ฒฝ ๋ณ์ ๋ก๋
load_dotenv()
# OpenAI API ํค ์ค์ (ํ์ฌ๋ ์ฌ์ฉํ์ง ์์ง๋ง ํ์ ์ ์ ์ง)
openai_api_key = os.getenv("OPENAI_API_KEY")
# ์์
๋ฐ์ดํฐ ์ฝ๊ธฐ ํจ์
def read_excel_data(file):
df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1,
names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl')
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4)
df['Option1'] = df['Option'].astype(str).str.split(" / ").str[0] # 1์ฐจ ์ต์
์ถ์ถ
df['Review Length'] = df['Review'].str.len() # ๋ฆฌ๋ทฐ ๊ธธ์ด ๊ณ์ฐ
return df
# ๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐํํ๋ ํจ์
def get_positive_reviews(df):
# ๋ฆฌ๋ทฐ ์ ์๊ฐ 4 ์ด์์ธ ๊ธ์ ๋ฆฌ๋ทฐ ํํฐ๋ง
positive_reviews = df[df['ReviewScore'] >= 4].sort_values(by='Review Length', ascending=False)
positive_reviews = positive_reviews.head(20) # ์์ 20๊ฐ ๋ฆฌ๋ทฐ ์ ํ
positive_reviews.reset_index(drop=True, inplace=True)
positive_reviews.index += 1
positive_reviews['์๋ฒ'] = positive_reviews.index
# ๋ฆฌ๋ทฐ ํ์ ์ง์
positive_output = "\n\n".join(positive_reviews.apply(
lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
return positive_output
# ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐํํ๋ ํจ์
def get_negative_reviews(df):
# ๋ฆฌ๋ทฐ ์ ์๊ฐ 2 ์ดํ์ธ ๋ถ์ ๋ฆฌ๋ทฐ ํํฐ๋ง
negative_reviews = df[df['ReviewScore'] <= 2].sort_values(by='Review Length', ascending=False)
negative_reviews = negative_reviews.head(30) # ์์ 30๊ฐ ๋ฆฌ๋ทฐ ์ ํ
negative_reviews.reset_index(drop=True, inplace=True)
negative_reviews.index += 1
negative_reviews['์๋ฒ'] = negative_reviews.index
# ๋ฆฌ๋ทฐ ํ์ ์ง์
negative_output = "\n\n".join(negative_reviews.apply(
lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
return negative_output
# ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ์ฒ๋ฆฌํ์ฌ ๊ธ์ ๋ฐ ๋ถ์ ๋ฆฌ๋ทฐ๋ฅผ ์ถ์ถํ๋ ํจ์
def process_reviews(file):
df = read_excel_data(file)
positive_reviews = get_positive_reviews(df)
negative_reviews = get_negative_reviews(df)
return positive_reviews, negative_reviews
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("### ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ ์
๋ก๋")
file_input = gr.File(label="์์
ํ์ผ ์
๋ก๋", file_types=[".xlsx"])
analyze_button = gr.Button("๋ฆฌ๋ทฐ ์ถ์ถํ๊ธฐ")
with gr.Column():
gr.Markdown("### ๊ธ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ (์ต๋ 20๊ฐ)")
positive_reviews_output = gr.Textbox(label="๊ธ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ", interactive=False, lines=20)
gr.Markdown("### ๋ถ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ (์ต๋ 30๊ฐ)")
negative_reviews_output = gr.Textbox(label="๋ถ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ", interactive=False, lines=30)
analyze_button.click(
fn=process_reviews,
inputs=[file_input],
outputs=[positive_reviews_output, negative_reviews_output]
)
return demo
if __name__ == "__main__":
interface = create_interface()
interface.launch()
|