seawolf2357 commited on
Commit
4531be2
·
verified ·
1 Parent(s): efb2b1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -28,15 +28,27 @@ SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
28
  conversation_history = []
29
 
30
  # JSON 데이터셋 로드
31
- with open("jangtest.json", "r", encoding="utf-8") as f:
32
- dataset = json.load(f)
 
 
 
 
 
 
 
 
 
33
 
34
  # 문장 임베딩 모델 로드
35
  model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
36
 
37
  # 데이터셋의 임베딩을 미리 계산
38
- dataset_texts = [json.dumps(item, ensure_ascii=False) for item in dataset]
39
- dataset_embeddings = model.encode(dataset_texts, convert_to_tensor=True)
 
 
 
40
 
41
  class MyClient(discord.Client):
42
  def __init__(self, *args, **kwargs):
@@ -109,6 +121,9 @@ async def generate_response(message):
109
  return f"{user_mention}, {full_response_text}"
110
 
111
  def find_most_similar_data(query):
 
 
 
112
  query_embedding = model.encode(query, convert_to_tensor=True)
113
 
114
  # 코사인 유사도 계산
 
28
  conversation_history = []
29
 
30
  # JSON 데이터셋 로드
31
+ try:
32
+ with open("jangtest.json", "r", encoding="utf-8") as f:
33
+ dataset = json.load(f)
34
+ logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
35
+ except json.JSONDecodeError as e:
36
+ logging.error(f"Error decoding JSON: {e}")
37
+ logging.error("Please check the 'jangtest.json' file for any formatting errors.")
38
+ dataset = []
39
+ except FileNotFoundError:
40
+ logging.error("The 'jangtest.json' file was not found.")
41
+ dataset = []
42
 
43
  # 문장 임베딩 모델 로드
44
  model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
45
 
46
  # 데이터셋의 임베딩을 미리 계산
47
+ if dataset:
48
+ dataset_texts = [json.dumps(item, ensure_ascii=False) for item in dataset]
49
+ dataset_embeddings = model.encode(dataset_texts, convert_to_tensor=True)
50
+ else:
51
+ dataset_embeddings = torch.tensor([])
52
 
53
  class MyClient(discord.Client):
54
  def __init__(self, *args, **kwargs):
 
121
  return f"{user_mention}, {full_response_text}"
122
 
123
  def find_most_similar_data(query):
124
+ if not dataset_embeddings.numel():
125
+ return None
126
+
127
  query_embedding = model.encode(query, convert_to_tensor=True)
128
 
129
  # 코사인 유사도 계산