Spaces:
Sleeping
Sleeping
File size: 6,132 Bytes
1ec6c27 3db0045 1ec6c27 1ff04a5 402b304 2e6bb20 1ec6c27 3db0045 2e6bb20 402b304 2e6bb20 402b304 65f9910 6ec37d8 65f9910 3db0045 5ad04e8 1ec6c27 65f9910 3db0045 1ec6c27 65f9910 3db0045 1ec6c27 3db0045 6ec37d8 65f9910 1ec6c27 65f9910 1ec6c27 65f9910 1ff04a5 1ec6c27 3db0045 65f9910 3db0045 1ec6c27 65f9910 3db0045 65f9910 3db0045 5ad04e8 1ec6c27 3db0045 2e6bb20 402b304 1ec6c27 3db0045 2e6bb20 65f9910 5ad04e8 1ec6c27 65f9910 5ad04e8 1ff04a5 65f9910 402b304 5ad04e8 402b304 1ec6c27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import matplotlib.pyplot as plt
import csv
import io
import os
# Sentence-BERT λͺ¨λΈ λ‘λ
model = SentenceTransformer('all-MiniLM-L6-v2')
# μΆμ² κ²°κ³Όλ₯Ό μ€μ νμΌλ‘ μ μ₯νλ ν¨μ
def save_recommendations_to_file(recommendations):
file_path = "recommendations.csv"
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
# μΆμ² κ²°κ³Ό CSV νμΌμ κΈ°λ‘
for rec in recommendations:
writer.writerow(rec)
return file_path
# μλμΌλ‘ μ΄μ λ§€μΉνλ ν¨μ
def auto_match_columns(df, required_cols):
matched_cols = {}
for req_col in required_cols:
matched_col = None
for col in df.columns:
if req_col in col.lower():
matched_col = col
break
matched_cols[req_col] = matched_col
return matched_cols
# μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°μ μ΄μ μλμΌλ‘ λ§€μΉνκ±°λ, μ ννκ² νλ ν¨μ
def validate_and_get_columns(employee_df, program_df):
required_employee_cols = ["employee_id", "employee_name", "current_skills"]
required_program_cols = ["program_name", "skills_acquired", "duration"]
employee_cols = auto_match_columns(employee_df, required_employee_cols)
program_cols = auto_match_columns(program_df, required_program_cols)
for key, value in employee_cols.items():
if value is None:
return f"μ§μ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
for key, value in program_cols.items():
if value is None:
return f"νλ‘κ·Έλ¨ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
return None, employee_cols, program_cols
# μ§μ λ°μ΄ν°λ₯Ό λΆμνμ¬ κ΅μ‘ νλ‘κ·Έλ¨μ μΆμ²νκ³ κ·Έλνλ₯Ό 그리λ ν¨μ
def analyze_data(employee_file, program_file):
employee_df = pd.read_csv(employee_file.name)
program_df = pd.read_csv(program_file.name)
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
if error_msg:
return error_msg, None, None
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
program_skills = program_df[program_cols["skills_acquired"]].tolist()
employee_embeddings = model.encode(employee_skills)
program_embeddings = model.encode(program_skills)
similarities = cosine_similarity(employee_embeddings, program_embeddings)
recommendations = []
recommendation_rows = [] # CSVλ‘ μ μ₯ν λ°μ΄ν°
for i, employee in employee_df.iterrows():
recommended_programs = []
for j, program in program_df.iterrows():
if similarities[i][j] > 0.5:
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
if recommended_programs:
recommendation = f"μ§μ {employee[employee_cols['employee_name']]}μ μΆμ² νλ‘κ·Έλ¨: {', '.join(recommended_programs)}"
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
else:
recommendation = f"μ§μ {employee[employee_cols['employee_name']]}μκ² μ ν©ν νλ‘κ·Έλ¨μ΄ μμ΅λλ€."
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "μ ν©ν νλ‘κ·Έλ¨ μμ"])
recommendations.append(recommendation)
G = nx.Graph()
for employee in employee_df[employee_cols['employee_name']]:
G.add_node(employee, type='employee')
for program in program_df[program_cols['program_name']]:
G.add_node(program, type='program')
for i, employee in employee_df.iterrows():
for j, program in program_df.iterrows():
if similarities[i][j] > 0.5:
G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
plt.title("μ§μκ³Ό νλ‘κ·Έλ¨ κ°μ κ΄κ³", fontsize=14, fontweight='bold')
plt.tight_layout()
# CSV νμΌλ‘ μΆμ² κ²°κ³Ό λ°ν
csv_output = save_recommendations_to_file(recommendation_rows)
return "\n".join(recommendations), plt.gcf(), csv_output
# Gradio λΈλ‘
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>πΌ HybridRAG μμ€ν
</h1>")
with gr.Row():
with gr.Column(scale=1, min_width=300):
gr.Markdown("<h3 style='color: #34495e;'>1. μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°λ₯Ό μ
λ‘λνμΈμ</h3>")
employee_file = gr.File(label="μ§μ λ°μ΄ν° μ
λ‘λ", interactive=True)
program_file = gr.File(label="κ΅μ‘ νλ‘κ·Έλ¨ λ°μ΄ν° μ
λ‘λ", interactive=True)
analyze_button = gr.Button("λΆμ μμ", elem_classes="gradio-button")
output_text = gr.Textbox(label="λΆμ κ²°κ³Ό", interactive=False, elem_classes="gradio-textbox")
with gr.Column(scale=2, min_width=500):
gr.Markdown("<h3 style='color: #34495e;'>2. λΆμ κ²°κ³Ό λ° μκ°ν</h3>")
chart_output = gr.Plot(label="μκ°ν μ°¨νΈ")
csv_download = gr.File(label="μΆμ² κ²°κ³Ό λ€μ΄λ‘λ")
analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
# Gradio μΈν°νμ΄μ€ μ€ν
demo.launch() |