kai-llm-insu

Running

seawolf2357 commited on Jul 28, 2024

Commit

36f21c0

verified ·

1 Parent(s): 37e7059

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,9 +29,10 @@ conversation_history = []
 # JSON 데이터셋 로드
 try:
-    with open("jangtest.json", "r", encoding="utf-8") as f:
         dataset = json.load(f)
     logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
 except json.JSONDecodeError as e:
     logging.error(f"Error decoding JSON: {e}")
     logging.error("Please check the 'jangtest.json' file for any formatting errors.")
@@ -122,17 +123,23 @@ async def generate_response(message):
 def find_most_similar_data(query):
     if not dataset_embeddings.numel():
         return None
     query_embedding = model.encode(query, convert_to_tensor=True)
-    # 코사인 유사도 계산
     cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
     top_result = torch.topk(cos_scores, k=1)
-    if top_result.values[0] > 0.5:  # 임계값 설정
-        return json.dumps(dataset[top_result.indices[0]], ensure_ascii=False, indent=2)
     else:
         return None
 if __name__ == "__main__":

 # JSON 데이터셋 로드
 try:
+    with open("./app.py/jangtest.json", "r", encoding="utf-8") as f:
         dataset = json.load(f)
     logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
+    logging.debug(f"First item in dataset: {dataset[0]}")
 except json.JSONDecodeError as e:
     logging.error(f"Error decoding JSON: {e}")
     logging.error("Please check the 'jangtest.json' file for any formatting errors.")
 def find_most_similar_data(query):
     if not dataset_embeddings.numel():
+        logging.warning("Dataset is empty")
         return None
     query_embedding = model.encode(query, convert_to_tensor=True)
     cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
     top_result = torch.topk(cos_scores, k=1)
+    logging.debug(f"Query: {query}")
+    logging.debug(f"Top similarity score: {top_result.values[0]}")
+    if top_result.values[0] > 0.3:  # 임계값을 0.3으로 낮춤
+        result = json.dumps(dataset[top_result.indices[0]], ensure_ascii=False, indent=2)
+        logging.debug(f"Found similar data: {result}")
+        return result
     else:
+        logging.debug("No similar data found")
         return None
 if __name__ == "__main__":