Codingo / backend /services /codingo_chatbot.py
husseinelsaadi's picture
updated
5fed273
"""
codingo_chatbot.py
===================
This module encapsulates the logic for Codingo's website chatbot. It
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
database using Chroma and SentenceTransformers, and uses the shared
Groq language model (imported from ``backend.services.interview_engine``)
to generate answers constrained to the retrieved context. If a Groq API
key is not configured, a lightweight dummy model will be used as a
fallback. TinyLlama and other local models are no longer used in this
module.
"""
from __future__ import annotations
import os
import threading
from typing import List
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
# Import the shared Groq LLM instance from the interview engine. This ensures
# that the chatbot uses the exact same language model as the interview API.
from backend.services.interview_engine import groq_llm
# The llama_cpp dependency is no longer used for the chatbot. We keep the
# import guarded to avoid breaking environments where llama_cpp is not
# installed, but it is no longer required for generating responses.
try:
from llama_cpp import Llama # type: ignore # noqa: F401
except Exception:
# We don't raise here because the Groq LLM will be used instead. If
# llama_cpp is unavailable, it won't affect chatbot functionality.
Llama = None # type: ignore
# Configuration
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")
# Generation parameters for the Groq LLM. These values can be adjusted via
# environment variables if desired. They loosely mirror the previous TinyLlama
# settings but are applied when constructing prompts for the Groq LLM. Note
# that Groq models internally determine sampling behaviour; these variables
# mainly govern how much content we include in the prompt and do not directly
# control the sampling temperature of the Groq API.
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) # kept for compatibility
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) # unused but retained
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) # unused but retained
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # unused
# Thread lock and globals
_init_lock = threading.Lock()
_embedder: SentenceTransformer | None = None
_collection: chromadb.Collection | None = None
_llm = None # This will be set to the shared Groq LLM instance
def _load_chatbot_text() -> str:
"""Read the chatbot knowledge base from disk."""
try:
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
content = f.read()
# Clean up the content to avoid meta-descriptions
# Remove any lines that look like instructions about the chatbot
lines = content.split('\n')
cleaned_lines = []
for line in lines:
# Skip lines that describe what the chatbot does
if any(phrase in line.lower() for phrase in [
'the chatbot', 'this bot', 'the bot provides',
'chatbot provides', 'chatbot is used for',
'official chatbot of'
]):
continue
cleaned_lines.append(line)
return '\n'.join(cleaned_lines)
except FileNotFoundError:
return (
"Codingo is an AI‑powered recruitment platform designed to "
"streamline job applications, candidate screening and hiring. "
"We make hiring smarter, faster and fairer through automation "
"and intelligent recommendations."
)
def init_embedder_and_db() -> None:
"""Initialize the SentenceTransformer embedder and Chroma vector DB."""
global _embedder, _collection
if _embedder is not None and _collection is not None:
return
with _init_lock:
if _embedder is not None and _collection is not None:
return
os.makedirs(CHROMA_DB_DIR, exist_ok=True)
text = _load_chatbot_text()
# Split into chunks
splitter = RecursiveCharacterTextSplitter(
chunk_size=500, # Increased for better context
chunk_overlap=100,
separators=["\n\n", "\n", ". ", " "],
)
docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]
# Initialize embedder
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
# Initialize Chroma
client = chromadb.Client(
Settings(
persist_directory=CHROMA_DB_DIR,
anonymized_telemetry=False,
is_persistent=True,
)
)
# Create or get collection
collection = client.get_or_create_collection("codingo_chatbot")
# Populate if empty
need_populate = False
try:
existing = collection.get(limit=1)
if not existing or not existing.get("documents"):
need_populate = True
except Exception:
need_populate = True
if need_populate:
ids = [f"doc_{i}" for i in range(len(docs))]
collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
_embedder = embedder
_collection = collection
def init_llm() -> None:
"""
Initialize the chatbot's language model. This function now assigns
the globally shared Groq LLM instance imported from the interview
engine. If the Groq API key is unavailable, the fallback dummy
model defined in the interview engine will be used automatically.
"""
global _llm
if _llm is not None:
return
with _init_lock:
if _llm is not None:
return
# Assign the shared Groq LLM instance. This may be a DummyGroq when
# no API key is provided. We avoid loading any local GGUF models.
_llm = groq_llm
def _build_prompt(query: str, context: str) -> str:
"""
Construct a prompt for the Groq LLM. The prompt instructs the model to
behave as LUNA, Codingo's friendly assistant. It emphasises using only
information from the provided context to answer the question and
encourages the model to admit when the answer is unknown. This plain
format works well with ChatGroq's ``invoke`` API.
Args:
query: The user's question.
context: Concatenated snippets from the knowledge base deemed
relevant to the query.
Returns:
A formatted string prompt ready for submission to the Groq LLM.
"""
system_prompt = """
You are LUNA, the official AI assistant of Codingo, an AI-powered recruitment platform.
You must:
- Answer questions using ONLY the provided context.
- Be concise, clear, and professional.
- If the context does not have the answer, politely say you do not know.
- Never make up features or information not in the context.
- Always focus on Codingo’s platform, services, and functionality.
"""
if context:
return (
f"{system_prompt}\n\n"
f"Context:\n{context}\n\n"
f"Question: {query}\n"
f"Answer:"
)
else:
# When no context is available, still pass an empty context so the
# model knows there is no supporting information.
return (
f"{system_prompt}\n\n"
"Context:\n\n"
f"Question: {query}\n"
f"Answer:"
)
def get_response(query: str, k: int = 3, score_threshold: float = 2.0) -> str:
"""
Generate a response to the user's query using the shared Groq LLM and the
chatbot's knowledge base. The function retrieves relevant context
passages from the vector store, constructs a prompt instructing the
model to answer as LUNA using only that context, and returns the
resulting answer. If no context is available, a polite fallback
message is returned without calling the LLM.
Args:
query: The user's question or statement.
k: Number of nearest neighbour documents to retrieve from the
knowledge base (default 3).
score_threshold: Maximum distance for a document to be considered
relevant (smaller means more similar).
Returns:
A string response appropriate for the chatbot UI.
"""
# Handle empty queries gracefully
if not query or not query.strip():
return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"
# Initialise embedder, vector DB and LLM if necessary
init_embedder_and_db()
init_llm()
# If embedder or collection or LLM didn't initialise, provide a safe fallback
if _embedder is None or _collection is None or _llm is None:
return "I'm sorry, I'm unable to process your request right now. Please try again later."
# Normalise for simple greetings
greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
if query.lower().strip() in greetings:
return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"
# Embed query and search for relevant documents
query_vector = _embedder.encode([query])[0]
results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)
docs = results.get("documents", [[]])[0] if results else []
distances = results.get("distances", [[]])[0] if results else []
# Filter by distance threshold
relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]
# If no relevant context is found, politely admit ignorance
if not relevant:
try:
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
full_context = f.read()
context = full_context
except FileNotFoundError:
return (
"I'm sorry, I don't know the answer to that question based on my knowledge. "
"Could you ask something else about Codingo or its services?"
)
else:
context = "\n\n".join(relevant[:2])
# Concatenate the most relevant passages for context (use top 2)
context = "\n\n".join(relevant[:2])
prompt = _build_prompt(query, context)
try:
# Invoke the Groq LLM. The ``invoke`` method may return an object
# with a ``content`` attribute or a plain string, depending on the
# backend. We handle both cases transparently.
response = _llm.invoke(prompt)
except Exception:
# If invocation fails, return a generic error message
return "I'm sorry, I encountered an error while generating a response. Please try again later."
# Extract text from the LLM response
if hasattr(response, 'content'):
text = str(response.content).strip()
elif isinstance(response, dict):
# Some wrappers may return dicts (e.g. ChatCompletion). Try common keys.
text = response.get('message', '') or response.get('text', '') or str(response)
text = text.strip()
else:
text = str(response).strip()
# Post-process the answer: remove unwanted phrases referring to the bot
lines = text.split('\n')
cleaned_lines = []
for line in lines:
lower_line = line.lower()
if any(phrase in lower_line for phrase in [
'the chatbot', 'this bot', 'the bot provides',
'in response to', 'overall,',
'as an ai language model'
]):
continue
cleaned_lines.append(line)
cleaned_text = '\n'.join(cleaned_lines).strip()
# Ensure we return some meaningful text
return cleaned_text or (
"I'm sorry, I couldn't generate a proper response. Could you rephrase your question?"
)