husseinelsaadi commited on
Commit
72f831c
·
1 Parent(s): bef6630
Files changed (1) hide show
  1. chatbot/chatbot.py +35 -143
chatbot/chatbot.py CHANGED
@@ -1,121 +1,65 @@
1
- """
2
- Chatbot module for Codingo
3
- ==========================
4
 
5
- This module encapsulates all functionality required to serve answers to
6
- questions about the Codingo platform. It loads a small conversational
7
- model from Hugging Face and a lightweight vector database populated from
8
- ``chatbot.txt``. When a user asks a question, the module retrieves
9
- relevant snippets from the knowledge base and feeds them into the
10
- language model to generate a friendly response.
11
-
12
- Key features:
13
-
14
- * Completely self‑contained: there are no OpenAI or external API
15
- dependencies. Only free, locally hosted Hugging Face models are used.
16
- * Lazy initialisation: the model and vector store are loaded on the
17
- first call to ``get_chatbot_response``. Subsequent calls reuse
18
- existing objects, avoiding expensive reloads.
19
- * GPU support: if a CUDA device is available, the model is automatically
20
- moved onto the GPU for faster inference.
21
-
22
- This file lives inside ``codingo/chatbot`` alongside ``chatbot.txt``.
23
- ``chatbot.txt`` should contain a plain‑text knowledge base of
24
- Codingo‑specific information and FAQs. Feel free to update the
25
- contents of that file without touching any code here.
26
-
27
- """
28
-
29
- from __future__ import annotations
30
 
31
  import os
32
  import shutil
33
  from typing import List
34
 
35
- # -----------------------------------------------------------------------------
36
- # Environment configuration
37
- #
38
- # We set a few environment variables to force Hugging Face to store model
39
- # weights and tokeniser files inside ``/tmp``. Hugging Face Spaces
40
- # provisions a read‑only file system outside of ``/tmp``, so without these
41
- # settings the transformers library might attempt to write into
42
- # unwritable locations. These variables have no effect if the same
43
- # variables are already set by the hosting environment.
44
-
45
  os.environ.setdefault("HF_HOME", "/tmp/huggingface")
46
  os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
47
  os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
48
 
49
- # -----------------------------------------------------------------------------
50
- # Module‑level state
51
- _hf_model = None # type: ignore[assignment]
52
- _hf_tokenizer = None # type: ignore[assignment]
53
- _chatbot_embedder = None # type: ignore[assignment]
54
- _chatbot_collection = None # type: ignore[assignment]
55
 
56
- # Paths
57
  _current_dir = os.path.dirname(os.path.abspath(__file__))
58
  _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
59
  _chroma_db_dir = "/tmp/chroma_db"
60
 
61
- # Default Hugging Face model for FAQ‑style Q&A. You can override this
62
- # behaviour at deployment time by setting the ``HF_CHATBOT_MODEL``
63
- # environment variable. DialoGPT is a lightweight conversational model
64
- # suitable for generating coherent short answers. If you need more
65
- # open‑domain capability, consider ``facebook/blenderbot-400M-distill``.
66
- DEFAULT_MODEL_NAME = "microsoft/DialoGPT-medium"
67
-
68
 
69
  def _init_hf_model() -> None:
70
- """Load the Hugging Face model and tokenizer if not already loaded."""
 
 
 
 
 
 
71
  global _hf_model, _hf_tokenizer
72
  if _hf_model is not None and _hf_tokenizer is not None:
73
  return
74
 
75
- from transformers import AutoModelForCausalLM, AutoTokenizer # slow import
76
- import torch
77
-
78
  model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
79
- # Choose GPU if available; otherwise CPU
80
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
81
-
82
- # Download and load tokenizer and model. They will be cached under
83
- # the directories specified above. If running for the first time on
84
- # Hugging Face Spaces, model download may take a while.
85
  tokenizer = AutoTokenizer.from_pretrained(model_name)
86
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
87
-
88
- # Ensure the pad token is defined. Many casual conversation models
89
- # reuse the end‑of‑sentence token for padding.
 
90
  if tokenizer.pad_token is None:
91
  tokenizer.pad_token = tokenizer.eos_token
92
 
93
  _hf_model = model
94
  _hf_tokenizer = tokenizer
95
 
96
-
97
  def _init_vector_store() -> None:
98
- """Initialise the Chroma vector store from ``chatbot.txt`` if needed."""
99
  global _chatbot_embedder, _chatbot_collection
100
  if _chatbot_embedder is not None and _chatbot_collection is not None:
101
  return
102
 
103
- # Import heavy dependencies lazily to reduce module import time
104
  from langchain.text_splitter import RecursiveCharacterTextSplitter
105
  from sentence_transformers import SentenceTransformer
106
  import chromadb
107
  from chromadb.config import Settings
108
 
109
- # Clear out any legacy database path that might be unwritable. Previous
110
- # versions of this project wrote under ``/app/chatbot/chroma_db`` which
111
- # fails on Hugging Face Spaces. The ``ignore_errors=True`` flag
112
- # suppresses FileNotFoundError.
113
  shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
114
-
115
  os.makedirs(_chroma_db_dir, exist_ok=True)
116
-
117
- # Read the knowledge base file. If the file is missing, fall back to a
118
- # minimal description of Codingo so the chatbot can still respond.
119
  try:
120
  with open(_knowledge_base_path, encoding="utf-8") as f:
121
  raw_text = f.read()
@@ -127,27 +71,16 @@ def _init_vector_store() -> None:
127
  "and intelligent recommendations."
128
  )
129
 
130
- # Split the knowledge base into overlapping chunks for semantic search.
131
  splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
132
  docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
133
-
134
- # Embed the chunks using a small sentence transformer. This model is
135
- # lightweight (~80 MB) and works well for semantic similarity tasks.
136
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
137
  embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
138
-
139
- # Initialise a persistent Chroma client. We disable anonymous telemetry
140
- # because the environment has no outbound internet access.
141
  client = chromadb.Client(Settings(
142
  persist_directory=_chroma_db_dir,
143
  anonymized_telemetry=False,
144
  is_persistent=True,
145
  ))
146
-
147
- # Create or retrieve the "chatbot" collection within the database.
148
  collection = client.get_or_create_collection("chatbot")
149
-
150
- # If no documents are present, populate the collection with our chunks.
151
  try:
152
  existing = collection.get(limit=1)
153
  if not existing.get("documents"):
@@ -159,77 +92,39 @@ def _init_vector_store() -> None:
159
  _chatbot_embedder = embedder
160
  _chatbot_collection = collection
161
 
162
-
163
  def get_chatbot_response(query: str) -> str:
164
- """
165
- Generate a chatbot reply to the given user query.
166
-
167
- The response is generated by retrieving up to three relevant snippets
168
- from the knowledge base using the MiniLM embeddings and then feeding
169
- those snippets together with the user question into the conversational
170
- model. If no relevant information is found or the model generates
171
- an empty response, a helpful fallback message is returned.
172
-
173
- Parameters
174
- ----------
175
- query : str
176
- The user's message. Should be non‑empty and related to the
177
- Codingo platform.
178
-
179
- Returns
180
- -------
181
- str
182
- The chatbot's reply, always a string.
183
- """
184
- # Basic validation of the query string
185
  if not query or not query.strip():
186
  return "Please type a question about the Codingo platform."
187
 
188
- # Lazy initialisation of the vector store and Hugging Face model
189
  _init_vector_store()
190
  _init_hf_model()
191
-
192
- # Unpack state
193
- embedder = _chatbot_embedder # type: ignore[assignment]
194
- collection = _chatbot_collection # type: ignore[assignment]
195
- model = _hf_model # type: ignore[assignment]
196
- tokenizer = _hf_tokenizer # type: ignore[assignment]
197
 
198
  import torch
199
 
200
- # Embed the incoming query using the same sentence transformer
201
- query_embedding = embedder.encode([query])[0] # type: ignore[operator]
202
- # Retrieve the three most similar documents from the vector store
203
  results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
204
  retrieved_docs = results.get("documents", [[]])[0] if results else []
205
-
206
- # Build a context string from the retrieved documents
207
  context = "\n".join(retrieved_docs[:3])
208
 
209
- # Compose the system instruction. The model is prompted as a
210
- # persona called LUNA AI. Keep responses concise and friendly, and
211
- # redirect politely on irrelevant questions.
212
  system_instruction = (
213
  "You are LUNA AI, a helpful assistant for the Codingo recruitment "
214
  "platform. Use the provided context to answer questions about "
215
  "Codingo. If the question is not related to Codingo, politely "
216
  "redirect the conversation. Keep responses concise and friendly."
217
  )
 
 
 
 
218
 
219
- prompt = (
220
- f"{system_instruction}\n\nContext:\n{context}\n\n"
221
- f"User: {query}\nLUNA AI:"
222
- )
223
-
224
- # Tokenise the prompt and truncate to the maximum input length supported
225
- inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=512, padding=True)
226
- inputs = inputs.to(model.device)
227
-
228
- # Generate a continuation from the model
229
  with torch.no_grad():
230
  output_ids = model.generate(
231
  inputs,
232
- max_length=inputs.shape[1] + 150,
233
  num_beams=3,
234
  do_sample=True,
235
  temperature=0.7,
@@ -238,17 +133,14 @@ def get_chatbot_response(query: str) -> str:
238
  early_stopping=True,
239
  )
240
 
241
- # Decode the output and strip the prompt from the beginning
242
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
243
  if "LUNA AI:" in response:
244
  response = response.split("LUNA AI:")[-1].strip()
245
  elif prompt in response:
246
  response = response.replace(prompt, "").strip()
247
 
248
- # Fallback if the model didn't return anything useful
249
- if not response:
250
- return (
251
- "I'm here to help you with questions about the Codingo platform. "
252
- "What would you like to know?"
253
- )
254
- return response
 
1
+ # codingo/chatbot/chatbot.py
2
+ """Chatbot module for Codingo
 
3
 
4
+ Default model changed to blenderbot-400M-distill; generation uses max_new_tokens; fallback between causal and seq2seq models."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import os
7
  import shutil
8
  from typing import List
9
 
 
 
 
 
 
 
 
 
 
 
10
  os.environ.setdefault("HF_HOME", "/tmp/huggingface")
11
  os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
12
  os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
13
 
14
+ _hf_model = None
15
+ _hf_tokenizer = None
16
+ _chatbot_embedder = None
17
+ _chatbot_collection = None
 
 
18
 
 
19
  _current_dir = os.path.dirname(os.path.abspath(__file__))
20
  _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
21
  _chroma_db_dir = "/tmp/chroma_db"
22
 
23
+ DEFAULT_MODEL_NAME = "facebook/blenderbot-400M-distill"
 
 
 
 
 
 
24
 
25
  def _init_hf_model() -> None:
26
+ from transformers import (
27
+ AutoModelForCausalLM,
28
+ AutoModelForSeq2SeqLM,
29
+ AutoTokenizer,
30
+ )
31
+ import torch
32
+
33
  global _hf_model, _hf_tokenizer
34
  if _hf_model is not None and _hf_tokenizer is not None:
35
  return
36
 
 
 
 
37
  model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
 
38
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
+ try:
41
+ model = AutoModelForCausalLM.from_pretrained(model_name)
42
+ except Exception:
43
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
44
+ model = model.to(device)
45
  if tokenizer.pad_token is None:
46
  tokenizer.pad_token = tokenizer.eos_token
47
 
48
  _hf_model = model
49
  _hf_tokenizer = tokenizer
50
 
 
51
  def _init_vector_store() -> None:
 
52
  global _chatbot_embedder, _chatbot_collection
53
  if _chatbot_embedder is not None and _chatbot_collection is not None:
54
  return
55
 
 
56
  from langchain.text_splitter import RecursiveCharacterTextSplitter
57
  from sentence_transformers import SentenceTransformer
58
  import chromadb
59
  from chromadb.config import Settings
60
 
 
 
 
 
61
  shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
 
62
  os.makedirs(_chroma_db_dir, exist_ok=True)
 
 
 
63
  try:
64
  with open(_knowledge_base_path, encoding="utf-8") as f:
65
  raw_text = f.read()
 
71
  "and intelligent recommendations."
72
  )
73
 
 
74
  splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
75
  docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
 
 
 
76
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
77
  embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
 
 
 
78
  client = chromadb.Client(Settings(
79
  persist_directory=_chroma_db_dir,
80
  anonymized_telemetry=False,
81
  is_persistent=True,
82
  ))
 
 
83
  collection = client.get_or_create_collection("chatbot")
 
 
84
  try:
85
  existing = collection.get(limit=1)
86
  if not existing.get("documents"):
 
92
  _chatbot_embedder = embedder
93
  _chatbot_collection = collection
94
 
 
95
  def get_chatbot_response(query: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  if not query or not query.strip():
97
  return "Please type a question about the Codingo platform."
98
 
 
99
  _init_vector_store()
100
  _init_hf_model()
101
+ embedder = _chatbot_embedder
102
+ collection = _chatbot_collection
103
+ model = _hf_model
104
+ tokenizer = _hf_tokenizer
 
 
105
 
106
  import torch
107
 
108
+ query_embedding = embedder.encode([query])[0]
 
 
109
  results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
110
  retrieved_docs = results.get("documents", [[]])[0] if results else []
 
 
111
  context = "\n".join(retrieved_docs[:3])
112
 
 
 
 
113
  system_instruction = (
114
  "You are LUNA AI, a helpful assistant for the Codingo recruitment "
115
  "platform. Use the provided context to answer questions about "
116
  "Codingo. If the question is not related to Codingo, politely "
117
  "redirect the conversation. Keep responses concise and friendly."
118
  )
119
+ prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
120
+ inputs = tokenizer.encode(
121
+ prompt, return_tensors="pt", truncation=True, max_length=512, padding=True
122
+ ).to(model.device)
123
 
 
 
 
 
 
 
 
 
 
 
124
  with torch.no_grad():
125
  output_ids = model.generate(
126
  inputs,
127
+ max_new_tokens=150,
128
  num_beams=3,
129
  do_sample=True,
130
  temperature=0.7,
 
133
  early_stopping=True,
134
  )
135
 
 
136
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
137
  if "LUNA AI:" in response:
138
  response = response.split("LUNA AI:")[-1].strip()
139
  elif prompt in response:
140
  response = response.replace(prompt, "").strip()
141
 
142
+ return (
143
+ response
144
+ if response
145
+ else "I'm here to help you with questions about the Codingo platform. What would you like to know?"
146
+ )