Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ try:
|
|
32 |
with open("jangtest.json", "r", encoding="utf-8") as f:
|
33 |
dataset = json.load(f)
|
34 |
logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
|
35 |
-
logging.debug(f"First item in dataset: {dataset[0]}")
|
36 |
except json.JSONDecodeError as e:
|
37 |
logging.error(f"Error decoding JSON: {e}")
|
38 |
logging.error("Please check the 'jangtest.json' file for any formatting errors.")
|
@@ -85,19 +85,27 @@ async def generate_response(message):
|
|
85 |
user_input = message.content
|
86 |
user_mention = message.author.mention
|
87 |
|
|
|
|
|
88 |
# ์ ์ฌํ ๋ฐ์ดํฐ ์ฐพ๊ธฐ
|
89 |
most_similar_data = find_most_similar_data(user_input)
|
90 |
|
|
|
|
|
91 |
if not most_similar_data:
|
92 |
return f"{user_mention}, ์ฃ์กํฉ๋๋ค. ๊ทํ์ ์ง๋ฌธ๊ณผ ๊ด๋ จ๋ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
93 |
|
94 |
system_message = f"""
|
95 |
๋น์ ์ 'kAI'๋ผ๋ ์ด๋ฆ์ ํ๊ตญ ๋ณดํ ์ํ์ ๋ํ AI ์กฐ์ธ์์
๋๋ค.
|
96 |
-
๋ฐ๋์ ์ ๊ณต๋
|
97 |
์ ๊ณต๋ ๋ฐ์ดํฐ์ ์๋ ์ ๋ณด์ ๋ํด์๋ ์ ๋ ๋ต๋ณํ์ง ๋ง์ธ์.
|
98 |
๋ชจ๋ ๋ต๋ณ์ ํ๊ธ๋ก ํ๊ณ , markdown ํ์์ผ๋ก ์ถ๋ ฅํ์ธ์.
|
99 |
๋ค์์ ์ง๋ฌธ์ ๊ด๋ จ๋ ๋ฐ์ดํฐ์
๋๋ค. ์ด ๋ฐ์ดํฐ๋ง์ ์ฌ์ฉํ์ฌ ๋ต๋ณํ์ธ์:
|
100 |
{most_similar_data}
|
|
|
|
|
|
|
|
|
101 |
"""
|
102 |
|
103 |
conversation_history.append({"role": "user", "content": user_input})
|
@@ -122,22 +130,27 @@ async def generate_response(message):
|
|
122 |
return f"{user_mention}, {full_response_text}"
|
123 |
|
124 |
def find_most_similar_data(query):
|
125 |
-
if not
|
126 |
logging.warning("Dataset is empty")
|
127 |
return None
|
128 |
|
129 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
130 |
|
131 |
cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
|
132 |
-
|
133 |
|
134 |
logging.debug(f"Query: {query}")
|
135 |
-
logging.debug(f"Top similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
if
|
138 |
-
|
139 |
-
logging.debug(f"Found similar data: {result}")
|
140 |
-
return result
|
141 |
else:
|
142 |
logging.debug("No similar data found")
|
143 |
return None
|
|
|
32 |
with open("jangtest.json", "r", encoding="utf-8") as f:
|
33 |
dataset = json.load(f)
|
34 |
logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
|
35 |
+
logging.debug(f"First item in dataset: {json.dumps(dataset[0], ensure_ascii=False, indent=2)}")
|
36 |
except json.JSONDecodeError as e:
|
37 |
logging.error(f"Error decoding JSON: {e}")
|
38 |
logging.error("Please check the 'jangtest.json' file for any formatting errors.")
|
|
|
85 |
user_input = message.content
|
86 |
user_mention = message.author.mention
|
87 |
|
88 |
+
logging.debug(f"User input: {user_input}")
|
89 |
+
|
90 |
# ์ ์ฌํ ๋ฐ์ดํฐ ์ฐพ๊ธฐ
|
91 |
most_similar_data = find_most_similar_data(user_input)
|
92 |
|
93 |
+
logging.debug(f"Most similar data: {most_similar_data}")
|
94 |
+
|
95 |
if not most_similar_data:
|
96 |
return f"{user_mention}, ์ฃ์กํฉ๋๋ค. ๊ทํ์ ์ง๋ฌธ๊ณผ ๊ด๋ จ๋ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
97 |
|
98 |
system_message = f"""
|
99 |
๋น์ ์ 'kAI'๋ผ๋ ์ด๋ฆ์ ํ๊ตญ ๋ณดํ ์ํ์ ๋ํ AI ์กฐ์ธ์์
๋๋ค.
|
100 |
+
๋ฐ๋์ ์ ๊ณต๋ ๋ฐ์ดํฐ์
์ ์ ๋ณด๋ง์ ์ฌ์ฉํ์ฌ ๋ต๋ณํด์ผ ํฉ๋๋ค.
|
101 |
์ ๊ณต๋ ๋ฐ์ดํฐ์ ์๋ ์ ๋ณด์ ๋ํด์๋ ์ ๋ ๋ต๋ณํ์ง ๋ง์ธ์.
|
102 |
๋ชจ๋ ๋ต๋ณ์ ํ๊ธ๋ก ํ๊ณ , markdown ํ์์ผ๋ก ์ถ๋ ฅํ์ธ์.
|
103 |
๋ค์์ ์ง๋ฌธ์ ๊ด๋ จ๋ ๋ฐ์ดํฐ์
๋๋ค. ์ด ๋ฐ์ดํฐ๋ง์ ์ฌ์ฉํ์ฌ ๋ต๋ณํ์ธ์:
|
104 |
{most_similar_data}
|
105 |
+
|
106 |
+
์ฌ์ฉ์ ์ง๋ฌธ: {user_input}
|
107 |
+
|
108 |
+
์ ๋ฐ์ดํฐ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ต๋ณํ์ธ์. ๋ฐ์ดํฐ์ ์๋ ์ ๋ณด๋ ์ธ๊ธํ์ง ๋ง์ธ์.
|
109 |
"""
|
110 |
|
111 |
conversation_history.append({"role": "user", "content": user_input})
|
|
|
130 |
return f"{user_mention}, {full_response_text}"
|
131 |
|
132 |
def find_most_similar_data(query):
|
133 |
+
if not dataset:
|
134 |
logging.warning("Dataset is empty")
|
135 |
return None
|
136 |
|
137 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
138 |
|
139 |
cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
|
140 |
+
top_results = torch.topk(cos_scores, k=3) # ์์ 3๊ฐ ๊ฒฐ๊ณผ ๋ฐํ
|
141 |
|
142 |
logging.debug(f"Query: {query}")
|
143 |
+
logging.debug(f"Top similarity scores: {top_results.values}")
|
144 |
+
|
145 |
+
similar_data = []
|
146 |
+
for i, score in enumerate(top_results.values):
|
147 |
+
if score > 0.2: # ์๊ณ๊ฐ์ 0.2๋ก ๋ฎ์ถค
|
148 |
+
item = dataset[top_results.indices[i]]
|
149 |
+
similar_data.append(item)
|
150 |
+
logging.debug(f"Similar data found: {json.dumps(item, ensure_ascii=False, indent=2)}")
|
151 |
|
152 |
+
if similar_data:
|
153 |
+
return json.dumps(similar_data, ensure_ascii=False, indent=2)
|
|
|
|
|
154 |
else:
|
155 |
logging.debug("No similar data found")
|
156 |
return None
|