File size: 8,037 Bytes
1ec6c27
 
3db0045
 
 
1ec6c27
1ff04a5
402b304
9b227e3
09fbbfa
1ec6c27
9b227e3
 
 
 
dd3cae2
9b227e3
 
3db0045
09fbbfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e6bb20
 
 
 
 
 
 
 
 
 
402b304
2e6bb20
402b304
65f9910
 
 
 
 
 
6ec37d8
65f9910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b227e3
09fbbfa
 
1ec6c27
 
 
65f9910
 
9b227e3
65f9910
 
 
3db0045
 
 
 
 
1ec6c27
9b227e3
3db0045
1ec6c27
3db0045
6ec37d8
65f9910
1ec6c27
 
65f9910
 
1ec6c27
65f9910
 
1ff04a5
1ec6c27
 
09fbbfa
 
 
 
 
 
 
 
 
 
 
 
 
3db0045
65f9910
3db0045
1ec6c27
65f9910
3db0045
 
 
 
65f9910
 
3db0045
 
 
9b227e3
 
1ec6c27
3db0045
2e6bb20
 
402b304
9b227e3
 
 
 
1ec6c27
3db0045
2e6bb20
65f9910
5ad04e8
1ec6c27
65f9910
5ad04e8
 
 
 
9b227e3
 
1ff04a5
65f9910
 
402b304
5ad04e8
9b227e3
09fbbfa
402b304
1ec6c27
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import matplotlib.pyplot as plt
import csv
import io
import matplotlib.font_manager as fm
from neo4j import GraphDatabase

# ํ•œ๊ตญ์–ด ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ KoSentence-BERT ๋ชจ๋ธ ๋กœ๋“œ
model = SentenceTransformer('jhgan/ko-sbert-sts')

# ๋‚˜๋ˆ”๋ฐ”๋ฅธ๊ณ ๋”• ํฐํŠธ ์„ค์ •
font_path = "NanumBarunGothic.ttf"  # Hugging Face ๋ฃจํŠธ์— ์ €์žฅ๋œ ํฐํŠธ ๊ฒฝ๋กœ
fontprop = fm.FontProperties(fname=font_path)
plt.rc('font', family=fontprop.get_name())

# Neo4j ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์—ฐ๊ฒฐ ํด๋ž˜์Šค
class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.driver = GraphDatabase.driver(uri, auth=(user, pwd))

    def close(self):
        self.driver.close()

    def query(self, query, parameters=None, db=None):
        session = None
        response = None
        try:
            session = self.driver.session(database=db) if db else self.driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session:
                session.close()
        return response

# Neo4j ์—ฐ๊ฒฐ ์„ค์ •
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="your_password")

# ์ถ”์ฒœ ๊ฒฐ๊ณผ๋ฅผ ์‹ค์ œ ํŒŒ์ผ๋กœ ์ €์žฅํ•˜๋Š” ํ•จ์ˆ˜
def save_recommendations_to_file(recommendations):
    file_path = "recommendations.csv"
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
        
        # ์ถ”์ฒœ ๊ฒฐ๊ณผ CSV ํŒŒ์ผ์— ๊ธฐ๋ก
        for rec in recommendations:
            writer.writerow(rec)
    
    return file_path

# ์ž๋™์œผ๋กœ ์—ด์„ ๋งค์นญํ•˜๋Š” ํ•จ์ˆ˜
def auto_match_columns(df, required_cols):
    matched_cols = {}
    for req_col in required_cols:
        matched_col = None
        for col in df.columns:
            if req_col in col.lower():
                matched_col = col
                break
        matched_cols[req_col] = matched_col
    return matched_cols

# ์ง์› ๋ฐ ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ์˜ ์—ด์„ ์ž๋™์œผ๋กœ ๋งค์นญํ•˜๊ฑฐ๋‚˜, ์„ ํƒํ•˜๊ฒŒ ํ•˜๋Š” ํ•จ์ˆ˜
def validate_and_get_columns(employee_df, program_df):
    required_employee_cols = ["employee_id", "employee_name", "current_skills"]
    required_program_cols = ["program_name", "skills_acquired", "duration"]

    employee_cols = auto_match_columns(employee_df, required_employee_cols)
    program_cols = auto_match_columns(program_df, required_program_cols)
    
    for key, value in employee_cols.items():
        if value is None:
            return f"์ง์› ๋ฐ์ดํ„ฐ์—์„œ '{key}' ์—ด์„ ์„ ํƒํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์˜ฌ๋ฐ”๋ฅธ ์—ด์„ ์„ ํƒํ•˜์„ธ์š”.", None, None
    
    for key, value in program_cols.items():
        if value is None:
            return f"ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ์—์„œ '{key}' ์—ด์„ ์„ ํƒํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์˜ฌ๋ฐ”๋ฅธ ์—ด์„ ์„ ํƒํ•˜์„ธ์š”.", None, None

    return None, employee_cols, program_cols

# ์ง์› ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ์„ ์ถ”์ฒœํ•˜๊ณ , ํ…Œ์ด๋ธ”๊ณผ ๊ทธ๋ž˜ํ”„๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
def hybrid_rag(employee_file, program_file):
    # 1. VectorRAG: KoSentence-BERT๋ฅผ ์ด์šฉํ•œ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
    employee_df = pd.read_csv(employee_file.name)
    program_df = pd.read_csv(program_file.name)

    error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
    if error_msg:
        return error_msg, None, None, None

    employee_skills = employee_df[employee_cols["current_skills"]].tolist()
    program_skills = program_df[program_cols["skills_acquired"]].tolist()
    employee_embeddings = model.encode(employee_skills)
    program_embeddings = model.encode(program_skills)

    similarities = cosine_similarity(employee_embeddings, program_embeddings)

    recommendations = []
    recommendation_rows = []  # ํ…Œ์ด๋ธ” ๋ฐ CSV๋กœ ์ €์žฅํ•  ๋ฐ์ดํ„ฐ
    for i, employee in employee_df.iterrows():
        recommended_programs = []
        for j, program in program_df.iterrows():
            if similarities[i][j] > 0.5:
                recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
        
        if recommended_programs:
            recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์˜ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ: {', '.join(recommended_programs)}"
            recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
        else:
            recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์—๊ฒŒ ์ ํ•ฉํ•œ ํ”„๋กœ๊ทธ๋žจ์ด ์—†์Šต๋‹ˆ๋‹ค."
            recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "์ ํ•ฉํ•œ ํ”„๋กœ๊ทธ๋žจ ์—†์Œ"])

        recommendations.append(recommendation)

    # 2. GraphRAG: Neo4j์—์„œ ํ”„๋กœ๊ทธ๋žจ ์ถ”์ฒœ์„ ๊ฐ€์ ธ์˜ด
    query = """
    MATCH (e:Employee)-[:HAS_SKILL]->(p:Program)
    RETURN e.name AS employee_name, p.name AS program_name, p.duration AS duration
    """
    graph_rag_results = conn.query(query)

    # GraphRAG ๊ฒฐ๊ณผ ์ถ”๊ฐ€
    for record in graph_rag_results:
        for row in recommendation_rows:
            if record['employee_name'] == row[1]:
                row[2] += f", {record['program_name']} (GraphRAG)"

    G = nx.Graph()
    for employee in employee_df[employee_cols['employee_name']]:
        G.add_node(employee, type='employee')

    for program in program_df[program_cols['program_name']]:
        G.add_node(program, type='program')

    for i, employee in employee_df.iterrows():
        for j, program in program_df.iterrows():
            if similarities[i][j] > 0.5:
                G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])

    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G)
    nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray', fontproperties=fontprop)
    plt.title("์ง์›๊ณผ ํ”„๋กœ๊ทธ๋žจ ๊ฐ„์˜ ๊ด€๊ณ„", fontsize=14, fontweight='bold', fontproperties=fontprop)
    plt.tight_layout()

    # CSV ํŒŒ์ผ๋กœ ์ถ”์ฒœ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
    csv_output = save_recommendations_to_file(recommendation_rows)
    
    # ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ
    result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs"])
    
    return result_df, plt.gcf(), csv_output

# Gradio ๋ธ”๋ก
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
    gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐Ÿ’ผ HybridRAG ์‹œ์Šคํ…œ</h1>")
    
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("<h3 style='color: #34495e;'>1. ์ง์› ๋ฐ ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”</h3>")
            employee_file = gr.File(label="์ง์› ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
            program_file = gr.File(label="๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
            analyze_button = gr.Button("๋ถ„์„ ์‹œ์ž‘", elem_classes="gradio-button")
            output_table = gr.DataFrame(label="๋ถ„์„ ๊ฒฐ๊ณผ (ํ…Œ์ด๋ธ”)")
            csv_download = gr.File(label="์ถ”์ฒœ ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")

        with gr.Column(scale=2, min_width=500):
            gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ„์„ ๊ฒฐ๊ณผ ๋ฐ ์‹œ๊ฐํ™”</h3>")
            chart_output = gr.Plot(label="์‹œ๊ฐํ™” ์ฐจํŠธ")

    # ๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ…Œ์ด๋ธ”, ์ฐจํŠธ, ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ๋ฅผ ์—…๋ฐ์ดํŠธ
    analyze_button.click(hybrid_rag, inputs=[employee_file, program_file], outputs=[output_table, chart_output, csv_download])

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
demo.launch()