Spaces:

soojeongcrystal
/

hybridRAG

Sleeping

App Files Files Community

hybridRAG / app.py

soojeongcrystal

Update app.py

2e6bb20 verified 12 months ago

raw

history blame

6.13 kB

	import gradio as gr
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import networkx as nx
	import matplotlib.pyplot as plt
	import csv
	import io
	import os

	# Sentence-BERT 모델 로드
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# 추천 결과를 실제 파일로 저장하는 함수
	def save_recommendations_to_file(recommendations):
	file_path = "recommendations.csv"
	with open(file_path, mode='w', newline='', encoding='utf-8') as file:
	writer = csv.writer(file)
	writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])

	# 추천 결과 CSV 파일에 기록
	for rec in recommendations:
	writer.writerow(rec)

	return file_path

	# 자동으로 열을 매칭하는 함수
	def auto_match_columns(df, required_cols):
	matched_cols = {}
	for req_col in required_cols:
	matched_col = None
	for col in df.columns:
	if req_col in col.lower():
	matched_col = col
	break
	matched_cols[req_col] = matched_col
	return matched_cols

	# 직원 및 프로그램 데이터의 열을 자동으로 매칭하거나, 선택하게 하는 함수
	def validate_and_get_columns(employee_df, program_df):
	required_employee_cols = ["employee_id", "employee_name", "current_skills"]
	required_program_cols = ["program_name", "skills_acquired", "duration"]

	employee_cols = auto_match_columns(employee_df, required_employee_cols)
	program_cols = auto_match_columns(program_df, required_program_cols)

	for key, value in employee_cols.items():
	if value is None:
	return f"직원 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None

	for key, value in program_cols.items():
	if value is None:
	return f"프로그램 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None

	return None, employee_cols, program_cols

	# 직원 데이터를 분석하여 교육 프로그램을 추천하고 그래프를 그리는 함수
	def analyze_data(employee_file, program_file):
	employee_df = pd.read_csv(employee_file.name)
	program_df = pd.read_csv(program_file.name)

	error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
	if error_msg:
	return error_msg, None, None

	employee_skills = employee_df[employee_cols["current_skills"]].tolist()
	program_skills = program_df[program_cols["skills_acquired"]].tolist()
	employee_embeddings = model.encode(employee_skills)
	program_embeddings = model.encode(program_skills)

	similarities = cosine_similarity(employee_embeddings, program_embeddings)

	recommendations = []
	recommendation_rows = [] # CSV로 저장할 데이터
	for i, employee in employee_df.iterrows():
	recommended_programs = []
	for j, program in program_df.iterrows():
	if similarities[i][j] > 0.5:
	recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")

	if recommended_programs:
	recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}"
	recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
	else:
	recommendation = f"직원 {employee[employee_cols['employee_name']]}에게 적합한 프로그램이 없습니다."
	recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "적합한 프로그램 없음"])

	recommendations.append(recommendation)

	G = nx.Graph()
	for employee in employee_df[employee_cols['employee_name']]:
	G.add_node(employee, type='employee')

	for program in program_df[program_cols['program_name']]:
	G.add_node(program, type='program')

	for i, employee in employee_df.iterrows():
	for j, program in program_df.iterrows():
	if similarities[i][j] > 0.5:
	G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])

	plt.figure(figsize=(10, 8))
	pos = nx.spring_layout(G)
	nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
	plt.title("직원과 프로그램 간의 관계", fontsize=14, fontweight='bold')
	plt.tight_layout()

	# CSV 파일로 추천 결과 반환
	csv_output = save_recommendations_to_file(recommendation_rows)

	return "\n".join(recommendations), plt.gcf(), csv_output

	# Gradio 블록
	with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
	gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>💼 HybridRAG 시스템</h1>")

	with gr.Row():
	with gr.Column(scale=1, min_width=300):
	gr.Markdown("<h3 style='color: #34495e;'>1. 직원 및 프로그램 데이터를 업로드하세요</h3>")
	employee_file = gr.File(label="직원 데이터 업로드", interactive=True)
	program_file = gr.File(label="교육 프로그램 데이터 업로드", interactive=True)
	analyze_button = gr.Button("분석 시작", elem_classes="gradio-button")
	output_text = gr.Textbox(label="분석 결과", interactive=False, elem_classes="gradio-textbox")

	with gr.Column(scale=2, min_width=500):
	gr.Markdown("<h3 style='color: #34495e;'>2. 분석 결과 및 시각화</h3>")
	chart_output = gr.Plot(label="시각화 차트")
	csv_download = gr.File(label="추천 결과 다운로드")

	analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])

	# Gradio 인터페이스 실행
	demo.launch()