Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- README.md +1 -4
- app.py +3 -28
- chatbot_utils.py +1 -18
- requirements.txt +2 -1
README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
title: Amharic Srh Chatbot
|
3 |
emoji: π
|
4 |
colorFrom: purple
|
@@ -7,6 +7,3 @@ sdk: streamlit
|
|
7 |
sdk_version: 1.44.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
|
2 |
title: Amharic Srh Chatbot
|
3 |
emoji: π
|
4 |
colorFrom: purple
|
|
|
7 |
sdk_version: 1.44.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
|
app.py
CHANGED
@@ -1,16 +1,15 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
from chatbot_utils import AmharicChatbot
|
3 |
|
4 |
st.set_page_config(page_title="Amharic SRH Chatbot", layout="centered")
|
5 |
|
6 |
-
# Load the chatbot
|
7 |
@st.cache_resource
|
8 |
def load_bot():
|
9 |
return AmharicChatbot("amharic_srh_qa.csv")
|
10 |
|
11 |
bot = load_bot()
|
12 |
|
13 |
-
# Inject Custom CSS
|
14 |
st.markdown("""
|
15 |
<style>
|
16 |
.chat-container {
|
@@ -48,26 +47,6 @@ st.markdown("""
|
|
48 |
align-self: flex-start;
|
49 |
margin-right: auto;
|
50 |
}
|
51 |
-
.input-area {
|
52 |
-
display: flex;
|
53 |
-
gap: 10px;
|
54 |
-
}
|
55 |
-
.input-text {
|
56 |
-
flex: 1;
|
57 |
-
padding: 10px;
|
58 |
-
border-radius: 5px;
|
59 |
-
border: 1px solid #ccc;
|
60 |
-
}
|
61 |
-
.send-btn {
|
62 |
-
padding: 10px 20px;
|
63 |
-
background-color: #4CAF50;
|
64 |
-
color: white;
|
65 |
-
border: none;
|
66 |
-
border-radius: 5px;
|
67 |
-
}
|
68 |
-
.send-btn:hover {
|
69 |
-
background-color: #45a049;
|
70 |
-
}
|
71 |
</style>
|
72 |
""", unsafe_allow_html=True)
|
73 |
|
@@ -76,7 +55,6 @@ st.markdown("## π€ α ααα α€α α»α΅α¦α΅", unsafe_allow_html=True)
|
|
76 |
st.markdown("α΅α ααα΅α α¨α α£ααα α α½α³ α₯α«α α αα΅? α₯α£αα α«ααα‘α’", unsafe_allow_html=True)
|
77 |
st.markdown('<div class="chat-box" id="chat-box">', unsafe_allow_html=True)
|
78 |
|
79 |
-
# Use session state to track messages
|
80 |
if "messages" not in st.session_state:
|
81 |
st.session_state.messages = []
|
82 |
|
@@ -84,9 +62,8 @@ for msg in st.session_state.messages:
|
|
84 |
css_class = "user-message" if msg["sender"] == "user" else "bot-message"
|
85 |
st.markdown(f'<div class="message {css_class}">{msg["text"]}</div>', unsafe_allow_html=True)
|
86 |
|
87 |
-
st.markdown('</div>', unsafe_allow_html=True)
|
88 |
|
89 |
-
# Input area
|
90 |
with st.form(key="chat_form"):
|
91 |
user_input = st.text_input("π¬ α₯α«ααα α«α΅αα‘:", key="input")
|
92 |
submit = st.form_submit_button("ααα΅ α αα£")
|
@@ -94,10 +71,8 @@ with st.form(key="chat_form"):
|
|
94 |
if submit and user_input:
|
95 |
st.session_state.messages.append({"sender": "user", "text": user_input})
|
96 |
response = bot.get_answer(user_input)
|
97 |
-
|
98 |
if response == "__OUT_OF_SCOPE__":
|
99 |
response = "αα
αα³α£ αα
αα α₯α«α αα΅α°αα α αα»αα©αα’ α₯α£αα α αα αααα΅ αααα©α’"
|
100 |
-
|
101 |
st.session_state.messages.append({"sender": "bot", "text": response})
|
102 |
|
103 |
-
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
1 |
+
|
2 |
import streamlit as st
|
3 |
from chatbot_utils import AmharicChatbot
|
4 |
|
5 |
st.set_page_config(page_title="Amharic SRH Chatbot", layout="centered")
|
6 |
|
|
|
7 |
@st.cache_resource
|
8 |
def load_bot():
|
9 |
return AmharicChatbot("amharic_srh_qa.csv")
|
10 |
|
11 |
bot = load_bot()
|
12 |
|
|
|
13 |
st.markdown("""
|
14 |
<style>
|
15 |
.chat-container {
|
|
|
47 |
align-self: flex-start;
|
48 |
margin-right: auto;
|
49 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
</style>
|
51 |
""", unsafe_allow_html=True)
|
52 |
|
|
|
55 |
st.markdown("α΅α ααα΅α α¨α α£ααα α α½α³ α₯α«α α αα΅? α₯α£αα α«ααα‘α’", unsafe_allow_html=True)
|
56 |
st.markdown('<div class="chat-box" id="chat-box">', unsafe_allow_html=True)
|
57 |
|
|
|
58 |
if "messages" not in st.session_state:
|
59 |
st.session_state.messages = []
|
60 |
|
|
|
62 |
css_class = "user-message" if msg["sender"] == "user" else "bot-message"
|
63 |
st.markdown(f'<div class="message {css_class}">{msg["text"]}</div>', unsafe_allow_html=True)
|
64 |
|
65 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
66 |
|
|
|
67 |
with st.form(key="chat_form"):
|
68 |
user_input = st.text_input("π¬ α₯α«ααα α«α΅αα‘:", key="input")
|
69 |
submit = st.form_submit_button("ααα΅ α αα£")
|
|
|
71 |
if submit and user_input:
|
72 |
st.session_state.messages.append({"sender": "user", "text": user_input})
|
73 |
response = bot.get_answer(user_input)
|
|
|
74 |
if response == "__OUT_OF_SCOPE__":
|
75 |
response = "αα
αα³α£ αα
αα α₯α«α αα΅α°αα α αα»αα©αα’ α₯α£αα α αα αααα΅ αααα©α’"
|
|
|
76 |
st.session_state.messages.append({"sender": "bot", "text": response})
|
77 |
|
78 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
chatbot_utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
import faiss
|
3 |
from sentence_transformers import SentenceTransformer
|
@@ -12,7 +13,6 @@ class AmharicChatbot:
|
|
12 |
self.build_index()
|
13 |
|
14 |
def build_index(self):
|
15 |
-
# Encode questions using the E5 small model; "passage:" prefix for context
|
16 |
self.embeddings = self.model.encode(
|
17 |
["passage: " + q for q in self.df["question"].tolist()],
|
18 |
show_progress_bar=True
|
@@ -21,29 +21,12 @@ class AmharicChatbot:
|
|
21 |
self.index.add(self.embeddings)
|
22 |
|
23 |
def get_answer(self, user_question, k=1):
|
24 |
-
# Encode the user question with "query:" prefix for best retrieval
|
25 |
user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
|
26 |
D, I = self.index.search(np.array([user_embedding]), k)
|
27 |
-
|
28 |
top_idx = I[0][0]
|
29 |
top_question = self.df.iloc[top_idx]["question"]
|
30 |
top_embedding = self.model.encode([f"passage: {top_question}"])[0]
|
31 |
-
|
32 |
-
# Cosine similarity score between user and top retrieved question
|
33 |
score = cosine_similarity([user_embedding], [top_embedding])[0][0]
|
34 |
-
|
35 |
if score < self.threshold:
|
36 |
return "__OUT_OF_SCOPE__"
|
37 |
-
|
38 |
return self.df.iloc[top_idx]["answer"]
|
39 |
-
|
40 |
-
# Optional: retrieve top-K relevant Q&A pairs for further use
|
41 |
-
def get_top_k(self, user_question, k=3):
|
42 |
-
user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
|
43 |
-
D, I = self.index.search(np.array([user_embedding]), k)
|
44 |
-
results = []
|
45 |
-
for idx in I[0]:
|
46 |
-
question = self.df.iloc[idx]["question"]
|
47 |
-
answer = self.df.iloc[idx]["answer"]
|
48 |
-
results.append((question, answer))
|
49 |
-
return results
|
|
|
1 |
+
|
2 |
import pandas as pd
|
3 |
import faiss
|
4 |
from sentence_transformers import SentenceTransformer
|
|
|
13 |
self.build_index()
|
14 |
|
15 |
def build_index(self):
|
|
|
16 |
self.embeddings = self.model.encode(
|
17 |
["passage: " + q for q in self.df["question"].tolist()],
|
18 |
show_progress_bar=True
|
|
|
21 |
self.index.add(self.embeddings)
|
22 |
|
23 |
def get_answer(self, user_question, k=1):
|
|
|
24 |
user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
|
25 |
D, I = self.index.search(np.array([user_embedding]), k)
|
|
|
26 |
top_idx = I[0][0]
|
27 |
top_question = self.df.iloc[top_idx]["question"]
|
28 |
top_embedding = self.model.encode([f"passage: {top_question}"])[0]
|
|
|
|
|
29 |
score = cosine_similarity([user_embedding], [top_embedding])[0][0]
|
|
|
30 |
if score < self.threshold:
|
31 |
return "__OUT_OF_SCOPE__"
|
|
|
32 |
return self.df.iloc[top_idx]["answer"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
|
|
1 |
streamlit
|
2 |
sentence-transformers
|
3 |
faiss-cpu
|
4 |
pandas
|
|
|
5 |
huggingface-hub
|
6 |
-
scikit-learn
|
|
|
1 |
+
|
2 |
streamlit
|
3 |
sentence-transformers
|
4 |
faiss-cpu
|
5 |
pandas
|
6 |
+
scikit-learn
|
7 |
huggingface-hub
|
|