seawolf2357 commited on
Commit
ea543ab
ยท
verified ยท
1 Parent(s): 223809d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -9
app.py CHANGED
@@ -32,7 +32,7 @@ try:
32
  with open("jangtest.json", "r", encoding="utf-8") as f:
33
  dataset = json.load(f)
34
  logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
35
- logging.debug(f"First item in dataset: {dataset[0]}")
36
  except json.JSONDecodeError as e:
37
  logging.error(f"Error decoding JSON: {e}")
38
  logging.error("Please check the 'jangtest.json' file for any formatting errors.")
@@ -85,19 +85,27 @@ async def generate_response(message):
85
  user_input = message.content
86
  user_mention = message.author.mention
87
 
 
 
88
  # ์œ ์‚ฌํ•œ ๋ฐ์ดํ„ฐ ์ฐพ๊ธฐ
89
  most_similar_data = find_most_similar_data(user_input)
90
 
 
 
91
  if not most_similar_data:
92
  return f"{user_mention}, ์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ท€ํ•˜์˜ ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
93
 
94
  system_message = f"""
95
  ๋‹น์‹ ์€ 'kAI'๋ผ๋Š” ์ด๋ฆ„์˜ ํ•œ๊ตญ ๋ณดํ—˜ ์ƒํ’ˆ์— ๋Œ€ํ•œ AI ์กฐ์–ธ์ž์ž…๋‹ˆ๋‹ค.
96
- ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฐ์ดํ„ฐ์…‹("json")์˜ ์ •๋ณด๋งŒ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
97
  ์ œ๊ณต๋œ ๋ฐ์ดํ„ฐ์— ์—†๋Š” ์ •๋ณด์— ๋Œ€ํ•ด์„œ๋Š” ์ ˆ๋Œ€ ๋‹ต๋ณ€ํ•˜์ง€ ๋งˆ์„ธ์š”.
98
  ๋ชจ๋“  ๋‹ต๋ณ€์€ ํ•œ๊ธ€๋กœ ํ•˜๊ณ , markdown ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅํ•˜์„ธ์š”.
99
  ๋‹ค์Œ์€ ์งˆ๋ฌธ์— ๊ด€๋ จ๋œ ๋ฐ์ดํ„ฐ์ž…๋‹ˆ๋‹ค. ์ด ๋ฐ์ดํ„ฐ๋งŒ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”:
100
  {most_similar_data}
 
 
 
 
101
  """
102
 
103
  conversation_history.append({"role": "user", "content": user_input})
@@ -122,22 +130,27 @@ async def generate_response(message):
122
  return f"{user_mention}, {full_response_text}"
123
 
124
  def find_most_similar_data(query):
125
- if not dataset_embeddings.numel():
126
  logging.warning("Dataset is empty")
127
  return None
128
 
129
  query_embedding = model.encode(query, convert_to_tensor=True)
130
 
131
  cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
132
- top_result = torch.topk(cos_scores, k=1)
133
 
134
  logging.debug(f"Query: {query}")
135
- logging.debug(f"Top similarity score: {top_result.values[0]}")
 
 
 
 
 
 
 
136
 
137
- if top_result.values[0] > 0.3: # ์ž„๊ณ„๊ฐ’์„ 0.3์œผ๋กœ ๋‚ฎ์ถค
138
- result = json.dumps(dataset[top_result.indices[0]], ensure_ascii=False, indent=2)
139
- logging.debug(f"Found similar data: {result}")
140
- return result
141
  else:
142
  logging.debug("No similar data found")
143
  return None
 
32
  with open("jangtest.json", "r", encoding="utf-8") as f:
33
  dataset = json.load(f)
34
  logging.info(f"Successfully loaded dataset with {len(dataset)} items.")
35
+ logging.debug(f"First item in dataset: {json.dumps(dataset[0], ensure_ascii=False, indent=2)}")
36
  except json.JSONDecodeError as e:
37
  logging.error(f"Error decoding JSON: {e}")
38
  logging.error("Please check the 'jangtest.json' file for any formatting errors.")
 
85
  user_input = message.content
86
  user_mention = message.author.mention
87
 
88
+ logging.debug(f"User input: {user_input}")
89
+
90
  # ์œ ์‚ฌํ•œ ๋ฐ์ดํ„ฐ ์ฐพ๊ธฐ
91
  most_similar_data = find_most_similar_data(user_input)
92
 
93
+ logging.debug(f"Most similar data: {most_similar_data}")
94
+
95
  if not most_similar_data:
96
  return f"{user_mention}, ์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ท€ํ•˜์˜ ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
97
 
98
  system_message = f"""
99
  ๋‹น์‹ ์€ 'kAI'๋ผ๋Š” ์ด๋ฆ„์˜ ํ•œ๊ตญ ๋ณดํ—˜ ์ƒํ’ˆ์— ๋Œ€ํ•œ AI ์กฐ์–ธ์ž์ž…๋‹ˆ๋‹ค.
100
+ ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฐ์ดํ„ฐ์…‹์˜ ์ •๋ณด๋งŒ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
101
  ์ œ๊ณต๋œ ๋ฐ์ดํ„ฐ์— ์—†๋Š” ์ •๋ณด์— ๋Œ€ํ•ด์„œ๋Š” ์ ˆ๋Œ€ ๋‹ต๋ณ€ํ•˜์ง€ ๋งˆ์„ธ์š”.
102
  ๋ชจ๋“  ๋‹ต๋ณ€์€ ํ•œ๊ธ€๋กœ ํ•˜๊ณ , markdown ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅํ•˜์„ธ์š”.
103
  ๋‹ค์Œ์€ ์งˆ๋ฌธ์— ๊ด€๋ จ๋œ ๋ฐ์ดํ„ฐ์ž…๋‹ˆ๋‹ค. ์ด ๋ฐ์ดํ„ฐ๋งŒ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”:
104
  {most_similar_data}
105
+
106
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ: {user_input}
107
+
108
+ ์œ„ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜์„ธ์š”. ๋ฐ์ดํ„ฐ์— ์—†๋Š” ์ •๋ณด๋Š” ์–ธ๊ธ‰ํ•˜์ง€ ๋งˆ์„ธ์š”.
109
  """
110
 
111
  conversation_history.append({"role": "user", "content": user_input})
 
130
  return f"{user_mention}, {full_response_text}"
131
 
132
  def find_most_similar_data(query):
133
+ if not dataset:
134
  logging.warning("Dataset is empty")
135
  return None
136
 
137
  query_embedding = model.encode(query, convert_to_tensor=True)
138
 
139
  cos_scores = util.pytorch_cos_sim(query_embedding, dataset_embeddings)[0]
140
+ top_results = torch.topk(cos_scores, k=3) # ์ƒ์œ„ 3๊ฐœ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
141
 
142
  logging.debug(f"Query: {query}")
143
+ logging.debug(f"Top similarity scores: {top_results.values}")
144
+
145
+ similar_data = []
146
+ for i, score in enumerate(top_results.values):
147
+ if score > 0.2: # ์ž„๊ณ„๊ฐ’์„ 0.2๋กœ ๋‚ฎ์ถค
148
+ item = dataset[top_results.indices[i]]
149
+ similar_data.append(item)
150
+ logging.debug(f"Similar data found: {json.dumps(item, ensure_ascii=False, indent=2)}")
151
 
152
+ if similar_data:
153
+ return json.dumps(similar_data, ensure_ascii=False, indent=2)
 
 
154
  else:
155
  logging.debug("No similar data found")
156
  return None