husseinelsaadi commited on
Commit
0bd189c
·
1 Parent(s): 9019090
app.py CHANGED
@@ -54,13 +54,59 @@ import shutil
54
  shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
55
  CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
56
  CHATBOT_DB_DIR = "/tmp/chroma_db"
57
- # API credentials for Groq. These values mirror those in the standalone
58
- # ``chatbot/chatbot.py`` script. If you need to update your API key or
59
- # model name, modify these constants. The API key is public in this
60
- # repository purely for demonstration purposes; in a real deployment it
61
- # should be stored securely (e.g. via environment variables or Secrets).
62
- GROQ_API_KEY = "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"
63
- GROQ_MODEL = "llama3-8b-8192"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Global objects used by the chatbot. They remain ``None`` until
66
  # ``init_chatbot()`` runs. After initialisation, ``_chatbot_embedder`` holds
@@ -129,16 +175,18 @@ def init_chatbot() -> None:
129
  _chatbot_collection = collection
130
 
131
  def get_chatbot_response(query: str) -> str:
132
- """Generate a reply to the user's query using the knowledge base and Groq API.
133
-
134
- The function first calls ``init_chatbot()`` to ensure that the embedding
135
- model and Chroma collection are loaded. It then embeds the user's query
136
- and retrieves the top three most relevant context chunks via a nearest
137
- neighbour search. These chunks are concatenated and passed to the
138
- Groq API via the OpenAI client. The system prompt constrains the model
139
- to only answer questions about Codingo; for unrelated queries it will
140
- politely decline to answer. Any exceptions during the API call are
141
- propagated to the caller.
 
 
142
 
143
  Parameters
144
  ----------
@@ -150,45 +198,51 @@ def get_chatbot_response(query: str) -> str:
150
  str
151
  The assistant's reply.
152
  """
 
153
  init_chatbot()
154
- # Local imports to avoid pulling heavy dependencies on module import.
155
- import openai
156
  embedder = _chatbot_embedder
157
  collection = _chatbot_collection
158
-
 
159
  query_embedding = embedder.encode([query])[0]
160
  results = collection.query(query_embeddings=[query_embedding], n_results=3)
161
- retrieved_docs = results['documents'][0]
162
  context = "\n".join(retrieved_docs)
163
-
 
164
  system_prompt = (
165
  "You are a helpful assistant for the Codingo website. "
166
  "Only answer questions that are directly relevant to the context provided. "
167
  "If the user asks anything unrelated, politely refuse by saying: "
168
  "\"I'm only trained to answer questions about the Codingo platform.\""
169
  )
170
- user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
171
-
172
- # Configure the OpenAI client to talk to the Groq API. The base URL is
173
- # set here rather than globally to avoid interfering with other parts of
174
- # the application that might use OpenAI for different providers.
175
- openai.api_key = GROQ_API_KEY
176
- openai.api_base = "https://api.groq.com/openai/v1"
177
- from openai import OpenAI
178
-
179
- client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
180
-
181
- completion = client.chat.completions.create(
182
- model=GROQ_MODEL,
183
- messages=[
184
- {"role": "system", "content": system_prompt},
185
- {"role": "user", "content": user_prompt},
186
- ],
187
- max_tokens=200,
188
- temperature=0.3,
189
  )
190
-
191
- return completion.choices[0].message["content"].strip()
 
 
 
 
 
192
 
193
  # Initialize Flask app
194
  app = Flask(
 
54
  shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
55
  CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
56
  CHATBOT_DB_DIR = "/tmp/chroma_db"
57
+ # -----------------------------------------------------------------------------
58
+ # Hugging Face model configuration
59
+ #
60
+ # The original chatbot implementation sent queries to the Groq API via the
61
+ # OpenAI client. To remove that dependency we now load a small conversational
62
+ # model from Hugging Face. ``HF_MODEL_NAME`` defines which model to use. The
63
+ # default value, ``facebook/blenderbot-400M-distill``, provides a good
64
+ # balance between quality and resource consumption and is available on
65
+ # Hugging Face without requiring authentication. Should you wish to swap to
66
+ # another conversational model (e.g. ``microsoft/DialoGPT-medium``), update
67
+ # this constant accordingly. The model and tokenizer are loaded lazily in
68
+ # ``init_hf_model()`` to avoid impacting application startup time.
69
+ HF_MODEL_NAME = "facebook/blenderbot-400M-distill"
70
+
71
+ # Global Hugging Face model and tokenizer. These variables remain ``None``
72
+ # until ``init_hf_model()`` is called. They are reused across all chatbot
73
+ # requests to prevent repeatedly loading the large model into memory.
74
+ _hf_model = None
75
+ _hf_tokenizer = None
76
+
77
+ def init_hf_model() -> None:
78
+ """Initialise the Hugging Face conversational model and tokenizer.
79
+
80
+ Loading large Transformer models can be expensive. This helper ensures
81
+ that we only perform the download and model initialisation once. On
82
+ subsequent calls the function returns immediately if the model and
83
+ tokenizer are already loaded. The model is moved to GPU if one is
84
+ available; otherwise it will run on the CPU. Any import of heavy
85
+ dependencies such as ``transformers`` or ``torch`` is performed inside
86
+ this function to keep the global import section lightweight.
87
+ """
88
+ global _hf_model, _hf_tokenizer
89
+ if _hf_model is not None and _hf_tokenizer is not None:
90
+ return
91
+ # Local imports to avoid pulling heavy dependencies during module import.
92
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
93
+ import torch
94
+
95
+ # Determine execution device. Prefer CUDA if available; otherwise
96
+ # fallback to CPU. The application will run correctly on CPU-only
97
+ # systems albeit with higher latency.
98
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
99
+
100
+ # Load tokenizer and model. The model weights will be downloaded the
101
+ # first time this function runs. Hugging Face caches models under
102
+ # ``HF_HOME`` / ``TRANSFORMERS_CACHE`` which are set at the top of
103
+ # this file to a writable temporary directory.
104
+ tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)
105
+ model = AutoModelForSeq2SeqLM.from_pretrained(HF_MODEL_NAME)
106
+ model.to(device)
107
+
108
+ _hf_model = model
109
+ _hf_tokenizer = tokenizer
110
 
111
  # Global objects used by the chatbot. They remain ``None`` until
112
  # ``init_chatbot()`` runs. After initialisation, ``_chatbot_embedder`` holds
 
175
  _chatbot_collection = collection
176
 
177
  def get_chatbot_response(query: str) -> str:
178
+ """Generate a reply to the user's query using the knowledge base and a Hugging Face model.
179
+
180
+ This function performs a two‑stage process to answer user questions. First
181
+ it ensures that the vector store and embedder are available via
182
+ ``init_chatbot()``, then embeds the query to retrieve the most relevant
183
+ context chunks from ``chatbot.txt`` using Chroma. Second, it calls
184
+ ``init_hf_model()`` to lazily load a conversational model from Hugging
185
+ Face. The retrieved context, together with a system instruction,
186
+ constitute the prompt for the model. The model is then run to
187
+ generate an answer. If the user asks a question unrelated to the
188
+ Codingo platform the system prompt instructs the model to refuse
189
+ politely.
190
 
191
  Parameters
192
  ----------
 
198
  str
199
  The assistant's reply.
200
  """
201
+ # Ensure the embedding model and vector store are ready.
202
  init_chatbot()
203
+ init_hf_model()
 
204
  embedder = _chatbot_embedder
205
  collection = _chatbot_collection
206
+ # Compute embedding for the query and retrieve the top three matching
207
+ # context chunks. Chroma returns a list of documents for each query.
208
  query_embedding = embedder.encode([query])[0]
209
  results = collection.query(query_embeddings=[query_embedding], n_results=3)
210
+ retrieved_docs = results.get('documents', [[]])[0] if results else []
211
  context = "\n".join(retrieved_docs)
212
+ # Construct the system prompt. This instruction encourages the model to
213
+ # answer only questions related to the context and to decline otherwise.
214
  system_prompt = (
215
  "You are a helpful assistant for the Codingo website. "
216
  "Only answer questions that are directly relevant to the context provided. "
217
  "If the user asks anything unrelated, politely refuse by saying: "
218
  "\"I'm only trained to answer questions about the Codingo platform.\""
219
  )
220
+ # Compose the complete prompt with context and user question. Including
221
+ # the system prompt inline helps guide smaller conversational models.
222
+ prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"
223
+ # Generate a response using the Hugging Face model. The global model
224
+ # variables are guaranteed to be initialised by ``init_hf_model()``.
225
+ model = _hf_model
226
+ tokenizer = _hf_tokenizer
227
+ device = model.device
228
+ # Encode the prompt and perform generation. ``generate`` will
229
+ # automatically use the model's device (CPU or GPU). We limit the
230
+ # response length to 200 tokens to keep answers concise.
231
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
232
+ output_ids = model.generate(
233
+ **inputs,
234
+ max_length=200,
235
+ num_beams=1,
236
+ do_sample=False,
237
+ early_stopping=True
 
238
  )
239
+ reply = tokenizer.decode(output_ids[0], skip_special_tokens=True)
240
+ # The reply may include the prompt prefix; extract the generated answer
241
+ # following the original prompt. If the model echoes the prompt, we
242
+ # remove the prompt part to return only the answer.
243
+ if reply.startswith(prompt):
244
+ reply = reply[len(prompt):]
245
+ return reply.strip()
246
 
247
  # Initialize Flask app
248
  app = Flask(
backend/templates/base.html CHANGED
@@ -804,6 +804,7 @@
804
  position: fixed;
805
  bottom: 80px;
806
  right: 20px;
 
807
  width: 300px;
808
  height: 400px;
809
  background: white;
@@ -829,6 +830,26 @@
829
  max-height: 300px;
830
  }
831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
  #chat-input {
833
  border: none;
834
  border-top: 1px solid #ccc;
 
804
  position: fixed;
805
  bottom: 80px;
806
  right: 20px;
807
+ /* Default dimensions for larger screens */
808
  width: 300px;
809
  height: 400px;
810
  background: white;
 
830
  max-height: 300px;
831
  }
832
 
833
+ /* Responsive adjustments for small screens */
834
+ @media (max-width: 600px) {
835
+ #chatbot-box {
836
+ width: 90vw;
837
+ height: 60vh;
838
+ bottom: 70px;
839
+ right: 5vw;
840
+ }
841
+ #chat-messages {
842
+ max-height: calc(60vh - 100px);
843
+ }
844
+ }
845
+ @media (max-width: 400px) {
846
+ #chatbot-toggle {
847
+ bottom: 10px;
848
+ right: 10px;
849
+ padding: 10px 12px;
850
+ }
851
+ }
852
+
853
  #chat-input {
854
  border: none;
855
  border-top: 1px solid #ccc;
chatbot/requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
  flask
2
- flask-cors
3
- groq
 
1
  flask
2
+ flask-cors
 
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
 
2
 
 
3
  flask
4
  flask_login
5
  flask_sqlalchemy
@@ -48,15 +49,13 @@ gunicorn
48
  python-dotenv
49
 
50
  # --- Chatbot Dependencies ---
51
- # The chatbot feature relies on a vector database and external API calls via the
52
- # OpenAI client. ``chromadb`` provides a simple embedding store for semantic
53
- # search over the knowledge base stored in ``chatbot/chatbot.txt``. ``openai``
54
- # is required to communicate with the Groq API endpoint (which is compatible
55
- # with the OpenAI client). ``flask-cors`` allows cross‑origin requests if we
56
- # decide to decouple the chat interface in the future.
57
  chromadb>=0.4.0
58
- # openai>=1.8.0
59
- openai==0.28
60
  flask-cors>=4.0.0
61
 
62
  # Audio format conversion (critical for WebM/WAV handling)
 
1
 
2
 
3
+
4
  flask
5
  flask_login
6
  flask_sqlalchemy
 
49
  python-dotenv
50
 
51
  # --- Chatbot Dependencies ---
52
+ # The chatbot feature relies on a vector database for semantic search over
53
+ # the knowledge base stored in ``chatbot/chatbot.txt``. ``chromadb`` provides
54
+ # this capability. We removed the OpenAI dependency in favour of a local
55
+ # Hugging Face model, so no openai package is required. ``flask-cors`` is
56
+ # retained to allow cross‑origin requests should the chat UI be decoupled in
57
+ # the future.
58
  chromadb>=0.4.0
 
 
59
  flask-cors>=4.0.0
60
 
61
  # Audio format conversion (critical for WebM/WAV handling)