Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -29,9 +29,10 @@ conversation_history = []
|
|
29 |
|
30 |
# JSON 데이터셋 로드
|
31 |
try:
|
32 |
-
with open("jangtest.json", "r", encoding="utf-8") as f:
|
33 |
dataset = json.load(f)
|
34 |
logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
|
|
|
35 |
except json.JSONDecodeError as e:
|
36 |
logging.error(f"Error decoding JSON: {e}")
|
37 |
logging.error("Please check the 'jangtest.json' file for any formatting errors.")
|
@@ -122,17 +123,23 @@ async def generate_response(message):
|
|
122 |
|
123 |
def find_most_similar_data(query):
|
124 |
if not dataset_embeddings.numel():
|
|
|
125 |
return None
|
126 |
|
127 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
128 |
|
129 |
-
# 코사인 유사도 계산
|
130 |
cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
|
131 |
top_result = torch.topk(cos_scores, k=1)
|
132 |
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
135 |
else:
|
|
|
136 |
return None
|
137 |
|
138 |
if __name__ == "__main__":
|
|
|
29 |
|
30 |
# JSON 데이터셋 로드
|
31 |
try:
|
32 |
+
with open("./app.py/jangtest.json", "r", encoding="utf-8") as f:
|
33 |
dataset = json.load(f)
|
34 |
logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
|
35 |
+
logging.debug(f"First item in dataset: {dataset[0]}")
|
36 |
except json.JSONDecodeError as e:
|
37 |
logging.error(f"Error decoding JSON: {e}")
|
38 |
logging.error("Please check the 'jangtest.json' file for any formatting errors.")
|
|
|
123 |
|
124 |
def find_most_similar_data(query):
|
125 |
if not dataset_embeddings.numel():
|
126 |
+
logging.warning("Dataset is empty")
|
127 |
return None
|
128 |
|
129 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
130 |
|
|
|
131 |
cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
|
132 |
top_result = torch.topk(cos_scores, k=1)
|
133 |
|
134 |
+
logging.debug(f"Query: {query}")
|
135 |
+
logging.debug(f"Top similarity score: {top_result.values[0]}")
|
136 |
+
|
137 |
+
if top_result.values[0] > 0.3: # 임계값을 0.3으로 낮춤
|
138 |
+
result = json.dumps(dataset[top_result.indices[0]], ensure_ascii=False, indent=2)
|
139 |
+
logging.debug(f"Found similar data: {result}")
|
140 |
+
return result
|
141 |
else:
|
142 |
+
logging.debug("No similar data found")
|
143 |
return None
|
144 |
|
145 |
if __name__ == "__main__":
|