File size: 6,132 Bytes
1ec6c27
 
3db0045
 
 
1ec6c27
1ff04a5
402b304
2e6bb20
1ec6c27
3db0045
 
 
2e6bb20
 
 
 
 
 
 
 
 
 
402b304
2e6bb20
402b304
65f9910
 
 
 
 
 
6ec37d8
65f9910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3db0045
5ad04e8
1ec6c27
 
 
65f9910
 
 
 
 
 
3db0045
 
 
 
 
1ec6c27
65f9910
3db0045
1ec6c27
3db0045
6ec37d8
65f9910
1ec6c27
 
65f9910
 
1ec6c27
65f9910
 
1ff04a5
1ec6c27
 
3db0045
65f9910
3db0045
1ec6c27
65f9910
3db0045
 
 
 
65f9910
 
3db0045
 
 
5ad04e8
 
1ec6c27
3db0045
2e6bb20
 
402b304
 
1ec6c27
3db0045
2e6bb20
65f9910
5ad04e8
1ec6c27
65f9910
5ad04e8
 
 
 
 
1ff04a5
65f9910
 
402b304
 
5ad04e8
 
402b304
1ec6c27
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import matplotlib.pyplot as plt
import csv
import io
import os

# Sentence-BERT λͺ¨λΈ λ‘œλ“œ
model = SentenceTransformer('all-MiniLM-L6-v2')

# μΆ”μ²œ κ²°κ³Όλ₯Ό μ‹€μ œ 파일둜 μ €μž₯ν•˜λŠ” ν•¨μˆ˜
def save_recommendations_to_file(recommendations):
    file_path = "recommendations.csv"
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
        
        # μΆ”μ²œ κ²°κ³Ό CSV νŒŒμΌμ— 기둝
        for rec in recommendations:
            writer.writerow(rec)
    
    return file_path

# μžλ™μœΌλ‘œ 열을 λ§€μΉ­ν•˜λŠ” ν•¨μˆ˜
def auto_match_columns(df, required_cols):
    matched_cols = {}
    for req_col in required_cols:
        matched_col = None
        for col in df.columns:
            if req_col in col.lower():
                matched_col = col
                break
        matched_cols[req_col] = matched_col
    return matched_cols

# 직원 및 ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ˜ 열을 μžλ™μœΌλ‘œ λ§€μΉ­ν•˜κ±°λ‚˜, μ„ νƒν•˜κ²Œ ν•˜λŠ” ν•¨μˆ˜
def validate_and_get_columns(employee_df, program_df):
    required_employee_cols = ["employee_id", "employee_name", "current_skills"]
    required_program_cols = ["program_name", "skills_acquired", "duration"]

    employee_cols = auto_match_columns(employee_df, required_employee_cols)
    program_cols = auto_match_columns(program_df, required_program_cols)
    
    for key, value in employee_cols.items():
        if value is None:
            return f"직원 λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
    
    for key, value in program_cols.items():
        if value is None:
            return f"ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None

    return None, employee_cols, program_cols

# 직원 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ ꡐ윑 ν”„λ‘œκ·Έλž¨μ„ μΆ”μ²œν•˜κ³  κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” ν•¨μˆ˜
def analyze_data(employee_file, program_file):
    employee_df = pd.read_csv(employee_file.name)
    program_df = pd.read_csv(program_file.name)

    error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
    if error_msg:
        return error_msg, None, None

    employee_skills = employee_df[employee_cols["current_skills"]].tolist()
    program_skills = program_df[program_cols["skills_acquired"]].tolist()
    employee_embeddings = model.encode(employee_skills)
    program_embeddings = model.encode(program_skills)

    similarities = cosine_similarity(employee_embeddings, program_embeddings)

    recommendations = []
    recommendation_rows = []  # CSV둜 μ €μž₯ν•  데이터
    for i, employee in employee_df.iterrows():
        recommended_programs = []
        for j, program in program_df.iterrows():
            if similarities[i][j] > 0.5:
                recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
        
        if recommended_programs:
            recommendation = f"직원 {employee[employee_cols['employee_name']]}의 μΆ”μ²œ ν”„λ‘œκ·Έλž¨: {', '.join(recommended_programs)}"
            recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
        else:
            recommendation = f"직원 {employee[employee_cols['employee_name']]}μ—κ²Œ μ ν•©ν•œ ν”„λ‘œκ·Έλž¨μ΄ μ—†μŠ΅λ‹ˆλ‹€."
            recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "μ ν•©ν•œ ν”„λ‘œκ·Έλž¨ μ—†μŒ"])

        recommendations.append(recommendation)

    G = nx.Graph()
    for employee in employee_df[employee_cols['employee_name']]:
        G.add_node(employee, type='employee')

    for program in program_df[program_cols['program_name']]:
        G.add_node(program, type='program')

    for i, employee in employee_df.iterrows():
        for j, program in program_df.iterrows():
            if similarities[i][j] > 0.5:
                G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])

    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G)
    nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
    plt.title("직원과 ν”„λ‘œκ·Έλž¨ κ°„μ˜ 관계", fontsize=14, fontweight='bold')
    plt.tight_layout()

    # CSV 파일둜 μΆ”μ²œ κ²°κ³Ό λ°˜ν™˜
    csv_output = save_recommendations_to_file(recommendation_rows)
    
    return "\n".join(recommendations), plt.gcf(), csv_output

# Gradio 블둝
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
    gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>πŸ’Ό HybridRAG μ‹œμŠ€ν…œ</h1>")
    
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("<h3 style='color: #34495e;'>1. 직원 및 ν”„λ‘œκ·Έλž¨ 데이터λ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”</h3>")
            employee_file = gr.File(label="직원 데이터 μ—…λ‘œλ“œ", interactive=True)
            program_file = gr.File(label="ꡐ윑 ν”„λ‘œκ·Έλž¨ 데이터 μ—…λ‘œλ“œ", interactive=True)
            analyze_button = gr.Button("뢄석 μ‹œμž‘", elem_classes="gradio-button")
            output_text = gr.Textbox(label="뢄석 κ²°κ³Ό", interactive=False, elem_classes="gradio-textbox")

        with gr.Column(scale=2, min_width=500):
            gr.Markdown("<h3 style='color: #34495e;'>2. 뢄석 κ²°κ³Ό 및 μ‹œκ°ν™”</h3>")
            chart_output = gr.Plot(label="μ‹œκ°ν™” 차트")
            csv_download = gr.File(label="μΆ”μ²œ κ²°κ³Ό λ‹€μš΄λ‘œλ“œ")

    analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])

# Gradio μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
demo.launch()