seawolf2357 commited on
Commit
36f21c0
·
verified ·
1 Parent(s): 37e7059

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -29,9 +29,10 @@ conversation_history = []
29
 
30
  # JSON 데이터셋 로드
31
  try:
32
- with open("jangtest.json", "r", encoding="utf-8") as f:
33
  dataset = json.load(f)
34
  logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
 
35
  except json.JSONDecodeError as e:
36
  logging.error(f"Error decoding JSON: {e}")
37
  logging.error("Please check the 'jangtest.json' file for any formatting errors.")
@@ -122,17 +123,23 @@ async def generate_response(message):
122
 
123
  def find_most_similar_data(query):
124
  if not dataset_embeddings.numel():
 
125
  return None
126
 
127
  query_embedding = model.encode(query, convert_to_tensor=True)
128
 
129
- # 코사인 유사도 계산
130
  cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
131
  top_result = torch.topk(cos_scores, k=1)
132
 
133
- if top_result.values[0] > 0.5: # 임계값 설정
134
- return json.dumps(dataset[top_result.indices[0]], ensure_ascii=False, indent=2)
 
 
 
 
 
135
  else:
 
136
  return None
137
 
138
  if __name__ == "__main__":
 
29
 
30
  # JSON 데이터셋 로드
31
  try:
32
+ with open("./app.py/jangtest.json", "r", encoding="utf-8") as f:
33
  dataset = json.load(f)
34
  logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
35
+ logging.debug(f"First item in dataset: {dataset[0]}")
36
  except json.JSONDecodeError as e:
37
  logging.error(f"Error decoding JSON: {e}")
38
  logging.error("Please check the 'jangtest.json' file for any formatting errors.")
 
123
 
124
  def find_most_similar_data(query):
125
  if not dataset_embeddings.numel():
126
+ logging.warning("Dataset is empty")
127
  return None
128
 
129
  query_embedding = model.encode(query, convert_to_tensor=True)
130
 
 
131
  cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
132
  top_result = torch.topk(cos_scores, k=1)
133
 
134
+ logging.debug(f"Query: {query}")
135
+ logging.debug(f"Top similarity score: {top_result.values[0]}")
136
+
137
+ if top_result.values[0] > 0.3: # 임계값을 0.3으로 낮춤
138
+ result = json.dumps(dataset[top_result.indices[0]], ensure_ascii=False, indent=2)
139
+ logging.debug(f"Found similar data: {result}")
140
+ return result
141
  else:
142
+ logging.debug("No similar data found")
143
  return None
144
 
145
  if __name__ == "__main__":