Spaces:
Sleeping
Sleeping
File size: 8,037 Bytes
1ec6c27 3db0045 1ec6c27 1ff04a5 402b304 9b227e3 09fbbfa 1ec6c27 9b227e3 dd3cae2 9b227e3 3db0045 09fbbfa 2e6bb20 402b304 2e6bb20 402b304 65f9910 6ec37d8 65f9910 9b227e3 09fbbfa 1ec6c27 65f9910 9b227e3 65f9910 3db0045 1ec6c27 9b227e3 3db0045 1ec6c27 3db0045 6ec37d8 65f9910 1ec6c27 65f9910 1ec6c27 65f9910 1ff04a5 1ec6c27 09fbbfa 3db0045 65f9910 3db0045 1ec6c27 65f9910 3db0045 65f9910 3db0045 9b227e3 1ec6c27 3db0045 2e6bb20 402b304 9b227e3 1ec6c27 3db0045 2e6bb20 65f9910 5ad04e8 1ec6c27 65f9910 5ad04e8 9b227e3 1ff04a5 65f9910 402b304 5ad04e8 9b227e3 09fbbfa 402b304 1ec6c27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import matplotlib.pyplot as plt
import csv
import io
import matplotlib.font_manager as fm
from neo4j import GraphDatabase
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
model = SentenceTransformer('jhgan/ko-sbert-sts')
# ๋๋๋ฐ๋ฅธ๊ณ ๋ ํฐํธ ์ค์
font_path = "NanumBarunGothic.ttf" # Hugging Face ๋ฃจํธ์ ์ ์ฅ๋ ํฐํธ ๊ฒฝ๋ก
fontprop = fm.FontProperties(fname=font_path)
plt.rc('font', family=fontprop.get_name())
# Neo4j ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ฐ๊ฒฐ ํด๋์ค
class Neo4jConnection:
def __init__(self, uri, user, pwd):
self.driver = GraphDatabase.driver(uri, auth=(user, pwd))
def close(self):
self.driver.close()
def query(self, query, parameters=None, db=None):
session = None
response = None
try:
session = self.driver.session(database=db) if db else self.driver.session()
response = list(session.run(query, parameters))
except Exception as e:
print("Query failed:", e)
finally:
if session:
session.close()
return response
# Neo4j ์ฐ๊ฒฐ ์ค์
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="your_password")
# ์ถ์ฒ ๊ฒฐ๊ณผ๋ฅผ ์ค์ ํ์ผ๋ก ์ ์ฅํ๋ ํจ์
def save_recommendations_to_file(recommendations):
file_path = "recommendations.csv"
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
# ์ถ์ฒ ๊ฒฐ๊ณผ CSV ํ์ผ์ ๊ธฐ๋ก
for rec in recommendations:
writer.writerow(rec)
return file_path
# ์๋์ผ๋ก ์ด์ ๋งค์นญํ๋ ํจ์
def auto_match_columns(df, required_cols):
matched_cols = {}
for req_col in required_cols:
matched_col = None
for col in df.columns:
if req_col in col.lower():
matched_col = col
break
matched_cols[req_col] = matched_col
return matched_cols
# ์ง์ ๋ฐ ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ์ ์ด์ ์๋์ผ๋ก ๋งค์นญํ๊ฑฐ๋, ์ ํํ๊ฒ ํ๋ ํจ์
def validate_and_get_columns(employee_df, program_df):
required_employee_cols = ["employee_id", "employee_name", "current_skills"]
required_program_cols = ["program_name", "skills_acquired", "duration"]
employee_cols = auto_match_columns(employee_df, required_employee_cols)
program_cols = auto_match_columns(program_df, required_program_cols)
for key, value in employee_cols.items():
if value is None:
return f"์ง์ ๋ฐ์ดํฐ์์ '{key}' ์ด์ ์ ํํ ์ ์์ต๋๋ค. ์ฌ๋ฐ๋ฅธ ์ด์ ์ ํํ์ธ์.", None, None
for key, value in program_cols.items():
if value is None:
return f"ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ์์ '{key}' ์ด์ ์ ํํ ์ ์์ต๋๋ค. ์ฌ๋ฐ๋ฅธ ์ด์ ์ ํํ์ธ์.", None, None
return None, employee_cols, program_cols
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ , ํ
์ด๋ธ๊ณผ ๊ทธ๋ํ๋ฅผ ์์ฑํ๋ ํจ์
def hybrid_rag(employee_file, program_file):
# 1. VectorRAG: KoSentence-BERT๋ฅผ ์ด์ฉํ ์ ์ฌ๋ ๊ณ์ฐ
employee_df = pd.read_csv(employee_file.name)
program_df = pd.read_csv(program_file.name)
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
if error_msg:
return error_msg, None, None, None
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
program_skills = program_df[program_cols["skills_acquired"]].tolist()
employee_embeddings = model.encode(employee_skills)
program_embeddings = model.encode(program_skills)
similarities = cosine_similarity(employee_embeddings, program_embeddings)
recommendations = []
recommendation_rows = [] # ํ
์ด๋ธ ๋ฐ CSV๋ก ์ ์ฅํ ๋ฐ์ดํฐ
for i, employee in employee_df.iterrows():
recommended_programs = []
for j, program in program_df.iterrows():
if similarities[i][j] > 0.5:
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
if recommended_programs:
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
else:
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์๊ฒ ์ ํฉํ ํ๋ก๊ทธ๋จ์ด ์์ต๋๋ค."
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "์ ํฉํ ํ๋ก๊ทธ๋จ ์์"])
recommendations.append(recommendation)
# 2. GraphRAG: Neo4j์์ ํ๋ก๊ทธ๋จ ์ถ์ฒ์ ๊ฐ์ ธ์ด
query = """
MATCH (e:Employee)-[:HAS_SKILL]->(p:Program)
RETURN e.name AS employee_name, p.name AS program_name, p.duration AS duration
"""
graph_rag_results = conn.query(query)
# GraphRAG ๊ฒฐ๊ณผ ์ถ๊ฐ
for record in graph_rag_results:
for row in recommendation_rows:
if record['employee_name'] == row[1]:
row[2] += f", {record['program_name']} (GraphRAG)"
G = nx.Graph()
for employee in employee_df[employee_cols['employee_name']]:
G.add_node(employee, type='employee')
for program in program_df[program_cols['program_name']]:
G.add_node(program, type='program')
for i, employee in employee_df.iterrows():
for j, program in program_df.iterrows():
if similarities[i][j] > 0.5:
G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray', fontproperties=fontprop)
plt.title("์ง์๊ณผ ํ๋ก๊ทธ๋จ ๊ฐ์ ๊ด๊ณ", fontsize=14, fontweight='bold', fontproperties=fontprop)
plt.tight_layout()
# CSV ํ์ผ๋ก ์ถ์ฒ ๊ฒฐ๊ณผ ๋ฐํ
csv_output = save_recommendations_to_file(recommendation_rows)
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs"])
return result_df, plt.gcf(), csv_output
# Gradio ๋ธ๋ก
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐ผ HybridRAG ์์คํ
</h1>")
with gr.Row():
with gr.Column(scale=1, min_width=300):
gr.Markdown("<h3 style='color: #34495e;'>1. ์ง์ ๋ฐ ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ๋ฅผ ์
๋ก๋ํ์ธ์</h3>")
employee_file = gr.File(label="์ง์ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
program_file = gr.File(label="๊ต์ก ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
analyze_button = gr.Button("๋ถ์ ์์", elem_classes="gradio-button")
output_table = gr.DataFrame(label="๋ถ์ ๊ฒฐ๊ณผ (ํ
์ด๋ธ)")
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋")
with gr.Column(scale=2, min_width=500):
gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ์ ๊ฒฐ๊ณผ ๋ฐ ์๊ฐํ</h3>")
chart_output = gr.Plot(label="์๊ฐํ ์ฐจํธ")
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ, ์ฐจํธ, ํ์ผ ๋ค์ด๋ก๋๋ฅผ ์
๋ฐ์ดํธ
analyze_button.click(hybrid_rag, inputs=[employee_file, program_file], outputs=[output_table, chart_output, csv_download])
# Gradio ์ธํฐํ์ด์ค ์คํ
demo.launch() |