Walelign commited on
Commit
54c32b3
Β·
verified Β·
1 Parent(s): 8275471

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +1 -4
  2. app.py +3 -28
  3. chatbot_utils.py +1 -18
  4. requirements.txt +2 -1
README.md CHANGED
@@ -1,4 +1,4 @@
1
- ---
2
  title: Amharic Srh Chatbot
3
  emoji: πŸ‘€
4
  colorFrom: purple
@@ -7,6 +7,3 @@ sdk: streamlit
7
  sdk_version: 1.44.1
8
  app_file: app.py
9
  pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
  title: Amharic Srh Chatbot
3
  emoji: πŸ‘€
4
  colorFrom: purple
 
7
  sdk_version: 1.44.1
8
  app_file: app.py
9
  pinned: false
 
 
 
app.py CHANGED
@@ -1,16 +1,15 @@
 
1
  import streamlit as st
2
  from chatbot_utils import AmharicChatbot
3
 
4
  st.set_page_config(page_title="Amharic SRH Chatbot", layout="centered")
5
 
6
- # Load the chatbot
7
  @st.cache_resource
8
  def load_bot():
9
  return AmharicChatbot("amharic_srh_qa.csv")
10
 
11
  bot = load_bot()
12
 
13
- # Inject Custom CSS
14
  st.markdown("""
15
  <style>
16
  .chat-container {
@@ -48,26 +47,6 @@ st.markdown("""
48
  align-self: flex-start;
49
  margin-right: auto;
50
  }
51
- .input-area {
52
- display: flex;
53
- gap: 10px;
54
- }
55
- .input-text {
56
- flex: 1;
57
- padding: 10px;
58
- border-radius: 5px;
59
- border: 1px solid #ccc;
60
- }
61
- .send-btn {
62
- padding: 10px 20px;
63
- background-color: #4CAF50;
64
- color: white;
65
- border: none;
66
- border-radius: 5px;
67
- }
68
- .send-btn:hover {
69
- background-color: #45a049;
70
- }
71
  </style>
72
  """, unsafe_allow_html=True)
73
 
@@ -76,7 +55,6 @@ st.markdown("## πŸ€– αŠ αˆ›αˆ­αŠ› αŒ€αŠ“ ቻቡቦቡ", unsafe_allow_html=True)
76
  st.markdown("ሡለ α‹ˆαˆŠα‹΅αŠ“ α‹¨αŠ α‰£αˆ‹α‹˜αˆ­ α‰ αˆ½α‰³ αŒ₯ያቄ αŠ αˆŽα‰΅? αŠ₯α‰£αŠ­α‹Ž α‹«α‰€αˆ­α‰‘α’", unsafe_allow_html=True)
77
  st.markdown('<div class="chat-box" id="chat-box">', unsafe_allow_html=True)
78
 
79
- # Use session state to track messages
80
  if "messages" not in st.session_state:
81
  st.session_state.messages = []
82
 
@@ -84,9 +62,8 @@ for msg in st.session_state.messages:
84
  css_class = "user-message" if msg["sender"] == "user" else "bot-message"
85
  st.markdown(f'<div class="message {css_class}">{msg["text"]}</div>', unsafe_allow_html=True)
86
 
87
- st.markdown('</div>', unsafe_allow_html=True) # Close chat-box
88
 
89
- # Input area
90
  with st.form(key="chat_form"):
91
  user_input = st.text_input("πŸ’¬ αŒ₯α‹«α‰„α‹ŽαŠ• α‹«αˆ΅αŒˆα‰‘:", key="input")
92
  submit = st.form_submit_button("መልሡ αŠ α‹αŒ£")
@@ -94,10 +71,8 @@ with st.form(key="chat_form"):
94
  if submit and user_input:
95
  st.session_state.messages.append({"sender": "user", "text": user_input})
96
  response = bot.get_answer(user_input)
97
-
98
  if response == "__OUT_OF_SCOPE__":
99
  response = "α‹­α‰…αˆ­α‰³α£ α‹­αˆ…αŠ•αŠ• αŒ₯ያቄ αˆ›αˆ΅α‰°α‹‹αˆ αŠ αˆα‰»αˆαŠ©αˆα’ αŠ₯α‰£αŠ­α‹Ž α‰ αˆŒαˆ‹ αˆ˜αŠ•αŒˆα‹΅ α‹­αˆžαŠ­αˆ©α’"
100
-
101
  st.session_state.messages.append({"sender": "bot", "text": response})
102
 
103
- st.markdown('</div>', unsafe_allow_html=True) # Close chat-container
 
1
+
2
  import streamlit as st
3
  from chatbot_utils import AmharicChatbot
4
 
5
  st.set_page_config(page_title="Amharic SRH Chatbot", layout="centered")
6
 
 
7
  @st.cache_resource
8
  def load_bot():
9
  return AmharicChatbot("amharic_srh_qa.csv")
10
 
11
  bot = load_bot()
12
 
 
13
  st.markdown("""
14
  <style>
15
  .chat-container {
 
47
  align-self: flex-start;
48
  margin-right: auto;
49
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  </style>
51
  """, unsafe_allow_html=True)
52
 
 
55
  st.markdown("ሡለ α‹ˆαˆŠα‹΅αŠ“ α‹¨αŠ α‰£αˆ‹α‹˜αˆ­ α‰ αˆ½α‰³ αŒ₯ያቄ αŠ αˆŽα‰΅? αŠ₯α‰£αŠ­α‹Ž α‹«α‰€αˆ­α‰‘α’", unsafe_allow_html=True)
56
  st.markdown('<div class="chat-box" id="chat-box">', unsafe_allow_html=True)
57
 
 
58
  if "messages" not in st.session_state:
59
  st.session_state.messages = []
60
 
 
62
  css_class = "user-message" if msg["sender"] == "user" else "bot-message"
63
  st.markdown(f'<div class="message {css_class}">{msg["text"]}</div>', unsafe_allow_html=True)
64
 
65
+ st.markdown('</div>', unsafe_allow_html=True)
66
 
 
67
  with st.form(key="chat_form"):
68
  user_input = st.text_input("πŸ’¬ αŒ₯α‹«α‰„α‹ŽαŠ• α‹«αˆ΅αŒˆα‰‘:", key="input")
69
  submit = st.form_submit_button("መልሡ αŠ α‹αŒ£")
 
71
  if submit and user_input:
72
  st.session_state.messages.append({"sender": "user", "text": user_input})
73
  response = bot.get_answer(user_input)
 
74
  if response == "__OUT_OF_SCOPE__":
75
  response = "α‹­α‰…αˆ­α‰³α£ α‹­αˆ…αŠ•αŠ• αŒ₯ያቄ αˆ›αˆ΅α‰°α‹‹αˆ αŠ αˆα‰»αˆαŠ©αˆα’ αŠ₯α‰£αŠ­α‹Ž α‰ αˆŒαˆ‹ αˆ˜αŠ•αŒˆα‹΅ α‹­αˆžαŠ­αˆ©α’"
 
76
  st.session_state.messages.append({"sender": "bot", "text": response})
77
 
78
+ st.markdown('</div>', unsafe_allow_html=True)
chatbot_utils.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import pandas as pd
2
  import faiss
3
  from sentence_transformers import SentenceTransformer
@@ -12,7 +13,6 @@ class AmharicChatbot:
12
  self.build_index()
13
 
14
  def build_index(self):
15
- # Encode questions using the E5 small model; "passage:" prefix for context
16
  self.embeddings = self.model.encode(
17
  ["passage: " + q for q in self.df["question"].tolist()],
18
  show_progress_bar=True
@@ -21,29 +21,12 @@ class AmharicChatbot:
21
  self.index.add(self.embeddings)
22
 
23
  def get_answer(self, user_question, k=1):
24
- # Encode the user question with "query:" prefix for best retrieval
25
  user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
26
  D, I = self.index.search(np.array([user_embedding]), k)
27
-
28
  top_idx = I[0][0]
29
  top_question = self.df.iloc[top_idx]["question"]
30
  top_embedding = self.model.encode([f"passage: {top_question}"])[0]
31
-
32
- # Cosine similarity score between user and top retrieved question
33
  score = cosine_similarity([user_embedding], [top_embedding])[0][0]
34
-
35
  if score < self.threshold:
36
  return "__OUT_OF_SCOPE__"
37
-
38
  return self.df.iloc[top_idx]["answer"]
39
-
40
- # Optional: retrieve top-K relevant Q&A pairs for further use
41
- def get_top_k(self, user_question, k=3):
42
- user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
43
- D, I = self.index.search(np.array([user_embedding]), k)
44
- results = []
45
- for idx in I[0]:
46
- question = self.df.iloc[idx]["question"]
47
- answer = self.df.iloc[idx]["answer"]
48
- results.append((question, answer))
49
- return results
 
1
+
2
  import pandas as pd
3
  import faiss
4
  from sentence_transformers import SentenceTransformer
 
13
  self.build_index()
14
 
15
  def build_index(self):
 
16
  self.embeddings = self.model.encode(
17
  ["passage: " + q for q in self.df["question"].tolist()],
18
  show_progress_bar=True
 
21
  self.index.add(self.embeddings)
22
 
23
  def get_answer(self, user_question, k=1):
 
24
  user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
25
  D, I = self.index.search(np.array([user_embedding]), k)
 
26
  top_idx = I[0][0]
27
  top_question = self.df.iloc[top_idx]["question"]
28
  top_embedding = self.model.encode([f"passage: {top_question}"])[0]
 
 
29
  score = cosine_similarity([user_embedding], [top_embedding])[0][0]
 
30
  if score < self.threshold:
31
  return "__OUT_OF_SCOPE__"
 
32
  return self.df.iloc[top_idx]["answer"]
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,6 +1,7 @@
 
1
  streamlit
2
  sentence-transformers
3
  faiss-cpu
4
  pandas
 
5
  huggingface-hub
6
- scikit-learn
 
1
+
2
  streamlit
3
  sentence-transformers
4
  faiss-cpu
5
  pandas
6
+ scikit-learn
7
  huggingface-hub