Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -28,15 +28,27 @@ SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
|
|
28 |
conversation_history = []
|
29 |
|
30 |
# JSON 데이터셋 로드
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# 문장 임베딩 모델 로드
|
35 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
36 |
|
37 |
# 데이터셋의 임베딩을 미리 계산
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
class MyClient(discord.Client):
|
42 |
def __init__(self, *args, **kwargs):
|
@@ -109,6 +121,9 @@ async def generate_response(message):
|
|
109 |
return f"{user_mention}, {full_response_text}"
|
110 |
|
111 |
def find_most_similar_data(query):
|
|
|
|
|
|
|
112 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
113 |
|
114 |
# 코사인 유사도 계산
|
|
|
28 |
conversation_history = []
|
29 |
|
30 |
# JSON 데이터셋 로드
|
31 |
+
try:
|
32 |
+
with open("jangtest.json", "r", encoding="utf-8") as f:
|
33 |
+
dataset = json.load(f)
|
34 |
+
logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
|
35 |
+
except json.JSONDecodeError as e:
|
36 |
+
logging.error(f"Error decoding JSON: {e}")
|
37 |
+
logging.error("Please check the 'jangtest.json' file for any formatting errors.")
|
38 |
+
dataset = []
|
39 |
+
except FileNotFoundError:
|
40 |
+
logging.error("The 'jangtest.json' file was not found.")
|
41 |
+
dataset = []
|
42 |
|
43 |
# 문장 임베딩 모델 로드
|
44 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
45 |
|
46 |
# 데이터셋의 임베딩을 미리 계산
|
47 |
+
if dataset:
|
48 |
+
dataset_texts = [json.dumps(item, ensure_ascii=False) for item in dataset]
|
49 |
+
dataset_embeddings = model.encode(dataset_texts, convert_to_tensor=True)
|
50 |
+
else:
|
51 |
+
dataset_embeddings = torch.tensor([])
|
52 |
|
53 |
class MyClient(discord.Client):
|
54 |
def __init__(self, *args, **kwargs):
|
|
|
121 |
return f"{user_mention}, {full_response_text}"
|
122 |
|
123 |
def find_most_similar_data(query):
|
124 |
+
if not dataset_embeddings.numel():
|
125 |
+
return None
|
126 |
+
|
127 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
128 |
|
129 |
# 코사인 유사도 계산
|