Walelign commited on
Commit
0e5c52a
Β·
verified Β·
1 Parent(s): e7f9460

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +15 -23
  2. chatbot_utils.py +14 -3
app.py CHANGED
@@ -68,31 +68,23 @@ for msg in st.session_state.messages:
68
  st.markdown('</div>', unsafe_allow_html=True) # Close chat-box
69
  st.markdown('</div>', unsafe_allow_html=True) # Close chat-container
70
 
71
- if "input" not in st.session_state:
72
- st.session_state.input = ""
73
-
74
- with st.form(key="chat_form"):
75
- user_input = st.text_input("πŸ’¬ αŒ₯α‹«α‰„α‹ŽαŠ• α‹«αˆ΅αŒˆα‰‘:", value=st.session_state.input, key="input_form")
76
  submit = st.form_submit_button("መልሡ αŠ α‹αŒ£")
77
 
78
- if submit:
79
- user_input = st.session_state.input_form.strip()
80
-
81
- if user_input == "":
82
- st.warning("αŠ₯α‰£αŠ­α‹Ž αŒ₯ያቄ α‹«αˆ΅αŒˆα‰‘α’")
83
- else:
84
- # Append user input
85
- st.session_state.messages.append({"sender": "user", "text": user_input})
86
-
87
- # Generate response
88
- response = bot.get_answer(user_input)
89
- if response == "__OUT_OF_SCOPE__":
90
- response = "α‹­α‰…αˆ­α‰³α£ α‹­αˆ…αŠ•αŠ• αŒ₯ያቄ αˆ›αˆ΅α‰°α‹‹αˆ αŠ αˆα‰»αˆαŠ©αˆα’ αŠ₯α‰£αŠ­α‹Ž α‰ αˆŒαˆ‹ αˆ˜αŠ•αŒˆα‹΅ α‹­αˆžαŠ­αˆ©α’"
91
-
92
- st.session_state.messages.append({"sender": "bot", "text": response})
93
-
94
- # Reset input manually
95
- st.session_state.input_form = ""
96
 
 
 
 
 
97
 
 
 
98
 
 
68
  st.markdown('</div>', unsafe_allow_html=True) # Close chat-box
69
  st.markdown('</div>', unsafe_allow_html=True) # Close chat-container
70
 
71
+ # Form with clear_on_submit and direct input capture
72
+ with st.form(key="chat_form", clear_on_submit=True):
73
+ user_input = st.text_input("πŸ’¬ αŒ₯α‹«α‰„α‹ŽαŠ• α‹«αˆ΅αŒˆα‰‘:")
 
 
74
  submit = st.form_submit_button("መልሡ αŠ α‹αŒ£")
75
 
76
+ if submit:
77
+ if user_input.strip() == "":
78
+ st.warning("αŠ₯α‰£αŠ­α‹Ž αŒ₯ያቄ α‹«αˆ΅αŒˆα‰‘α’")
79
+ else:
80
+ # Append user input
81
+ st.session_state.messages.append({"sender": "user", "text": user_input})
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Generate response
84
+ response = bot.get_answer(user_input)
85
+ if response == "__OUT_OF_SCOPE__":
86
+ response = "α‹­α‰…αˆ­α‰³α£ α‹­αˆ…αŠ•αŠ• αŒ₯ያቄ αˆ›αˆ΅α‰°α‹‹αˆ αŠ αˆα‰»αˆαŠ©αˆ ከαŠ₯αŠ” αˆ˜αˆ¨αŒƒ ውαŒͺ αŠα‹α’ αŠ₯α‰£αŠ­α‹Ž α‰ αˆŒαˆ‹ αˆ˜αŠ•αŒˆα‹΅ α‹­αˆžαŠ­αˆ©α’"
87
 
88
+ # Append bot response
89
+ st.session_state.messages.append({"sender": "bot", "text": response})
90
 
chatbot_utils.py CHANGED
@@ -6,7 +6,7 @@ import numpy as np
6
  from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  class AmharicChatbot:
9
- def __init__(self, csv_path, threshold=0.70):
10
  self.df = pd.read_csv(csv_path)
11
  self.model = SentenceTransformer("intfloat/multilingual-e5-small")
12
  self.threshold = threshold
@@ -23,10 +23,21 @@ class AmharicChatbot:
23
  def get_answer(self, user_question, k=1):
24
  user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
25
  D, I = self.index.search(np.array([user_embedding]), k)
 
 
 
 
26
  top_idx = I[0][0]
27
- top_question = self.df.iloc[top_idx]["question"]
28
- top_embedding = self.model.encode([f"passage: {top_question}"])[0]
 
 
 
 
29
  score = cosine_similarity([user_embedding], [top_embedding])[0][0]
 
30
  if score < self.threshold:
31
  return "__OUT_OF_SCOPE__"
 
32
  return self.df.iloc[top_idx]["answer"]
 
 
6
  from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  class AmharicChatbot:
9
+ def __init__(self, csv_path, threshold=0.80):
10
  self.df = pd.read_csv(csv_path)
11
  self.model = SentenceTransformer("intfloat/multilingual-e5-small")
12
  self.threshold = threshold
 
23
  def get_answer(self, user_question, k=1):
24
  user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
25
  D, I = self.index.search(np.array([user_embedding]), k)
26
+
27
+ if len(I[0]) == 0:
28
+ return "__OUT_OF_SCOPE__"
29
+
30
  top_idx = I[0][0]
31
+ top_embedding = self.embeddings[top_idx]
32
+
33
+ # Normalize embeddings before cosine similarity
34
+ user_embedding = user_embedding / np.linalg.norm(user_embedding)
35
+ top_embedding = top_embedding / np.linalg.norm(top_embedding)
36
+
37
  score = cosine_similarity([user_embedding], [top_embedding])[0][0]
38
+
39
  if score < self.threshold:
40
  return "__OUT_OF_SCOPE__"
41
+
42
  return self.df.iloc[top_idx]["answer"]
43
+