Spaces:
Paused
Paused
""" | |
codingo_chatbot.py | |
=================== | |
This module encapsulates the logic for Codingo's website chatbot. It | |
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector | |
database using Chroma and SentenceTransformers, and uses the shared | |
Groq language model (imported from ``backend.services.interview_engine``) | |
to generate answers constrained to the retrieved context. If a Groq API | |
key is not configured, a lightweight dummy model will be used as a | |
fallback. TinyLlama and other local models are no longer used in this | |
module. | |
""" | |
from __future__ import annotations | |
import os | |
import threading | |
from typing import List | |
import numpy as np | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from sentence_transformers import SentenceTransformer | |
import chromadb | |
from chromadb.config import Settings | |
# Import the shared Groq LLM instance from the interview engine. This ensures | |
# that the chatbot uses the exact same language model as the interview API. | |
from backend.services.interview_engine import groq_llm | |
# The llama_cpp dependency is no longer used for the chatbot. We keep the | |
# import guarded to avoid breaking environments where llama_cpp is not | |
# installed, but it is no longer required for generating responses. | |
try: | |
from llama_cpp import Llama # type: ignore # noqa: F401 | |
except Exception: | |
# We don't raise here because the Groq LLM will be used instead. If | |
# llama_cpp is unavailable, it won't affect chatbot functionality. | |
Llama = None # type: ignore | |
# Configuration | |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) | |
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt") | |
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma") | |
# Generation parameters for the Groq LLM. These values can be adjusted via | |
# environment variables if desired. They loosely mirror the previous TinyLlama | |
# settings but are applied when constructing prompts for the Groq LLM. Note | |
# that Groq models internally determine sampling behaviour; these variables | |
# mainly govern how much content we include in the prompt and do not directly | |
# control the sampling temperature of the Groq API. | |
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) # kept for compatibility | |
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) # unused but retained | |
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) # unused but retained | |
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # unused | |
# Thread lock and globals | |
_init_lock = threading.Lock() | |
_embedder: SentenceTransformer | None = None | |
_collection: chromadb.Collection | None = None | |
_llm = None # This will be set to the shared Groq LLM instance | |
def _load_chatbot_text() -> str: | |
"""Read the chatbot knowledge base from disk.""" | |
try: | |
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: | |
content = f.read() | |
# Clean up the content to avoid meta-descriptions | |
# Remove any lines that look like instructions about the chatbot | |
lines = content.split('\n') | |
cleaned_lines = [] | |
for line in lines: | |
# Skip lines that describe what the chatbot does | |
if any(phrase in line.lower() for phrase in [ | |
'the chatbot', 'this bot', 'the bot provides', | |
'chatbot provides', 'chatbot is used for', | |
'official chatbot of' | |
]): | |
continue | |
cleaned_lines.append(line) | |
return '\n'.join(cleaned_lines) | |
except FileNotFoundError: | |
return ( | |
"Codingo is an AI‑powered recruitment platform designed to " | |
"streamline job applications, candidate screening and hiring. " | |
"We make hiring smarter, faster and fairer through automation " | |
"and intelligent recommendations." | |
) | |
def init_embedder_and_db() -> None: | |
"""Initialize the SentenceTransformer embedder and Chroma vector DB.""" | |
global _embedder, _collection | |
if _embedder is not None and _collection is not None: | |
return | |
with _init_lock: | |
if _embedder is not None and _collection is not None: | |
return | |
os.makedirs(CHROMA_DB_DIR, exist_ok=True) | |
text = _load_chatbot_text() | |
# Split into chunks | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, # Increased for better context | |
chunk_overlap=100, | |
separators=["\n\n", "\n", ". ", " "], | |
) | |
docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()] | |
# Initialize embedder | |
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32) | |
# Initialize Chroma | |
client = chromadb.Client( | |
Settings( | |
persist_directory=CHROMA_DB_DIR, | |
anonymized_telemetry=False, | |
is_persistent=True, | |
) | |
) | |
# Create or get collection | |
collection = client.get_or_create_collection("codingo_chatbot") | |
# Populate if empty | |
need_populate = False | |
try: | |
existing = collection.get(limit=1) | |
if not existing or not existing.get("documents"): | |
need_populate = True | |
except Exception: | |
need_populate = True | |
if need_populate: | |
ids = [f"doc_{i}" for i in range(len(docs))] | |
collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids) | |
_embedder = embedder | |
_collection = collection | |
def init_llm() -> None: | |
""" | |
Initialize the chatbot's language model. This function now assigns | |
the globally shared Groq LLM instance imported from the interview | |
engine. If the Groq API key is unavailable, the fallback dummy | |
model defined in the interview engine will be used automatically. | |
""" | |
global _llm | |
if _llm is not None: | |
return | |
with _init_lock: | |
if _llm is not None: | |
return | |
# Assign the shared Groq LLM instance. This may be a DummyGroq when | |
# no API key is provided. We avoid loading any local GGUF models. | |
_llm = groq_llm | |
def _build_prompt(query: str, context: str) -> str: | |
""" | |
Construct a prompt for the Groq LLM. The prompt instructs the model to | |
behave as LUNA, Codingo's friendly assistant. It emphasises using only | |
information from the provided context to answer the question and | |
encourages the model to admit when the answer is unknown. This plain | |
format works well with ChatGroq's ``invoke`` API. | |
Args: | |
query: The user's question. | |
context: Concatenated snippets from the knowledge base deemed | |
relevant to the query. | |
Returns: | |
A formatted string prompt ready for submission to the Groq LLM. | |
""" | |
system_prompt = """ | |
You are LUNA, the official AI assistant of Codingo, an AI-powered recruitment platform. | |
You must: | |
- Answer questions using ONLY the provided context. | |
- Be concise, clear, and professional. | |
- If the context does not have the answer, politely say you do not know. | |
- Never make up features or information not in the context. | |
- Always focus on Codingo’s platform, services, and functionality. | |
""" | |
if context: | |
return ( | |
f"{system_prompt}\n\n" | |
f"Context:\n{context}\n\n" | |
f"Question: {query}\n" | |
f"Answer:" | |
) | |
else: | |
# When no context is available, still pass an empty context so the | |
# model knows there is no supporting information. | |
return ( | |
f"{system_prompt}\n\n" | |
"Context:\n\n" | |
f"Question: {query}\n" | |
f"Answer:" | |
) | |
def get_response(query: str, k: int = 3, score_threshold: float = 2.0) -> str: | |
""" | |
Generate a response to the user's query using the shared Groq LLM and the | |
chatbot's knowledge base. The function retrieves relevant context | |
passages from the vector store, constructs a prompt instructing the | |
model to answer as LUNA using only that context, and returns the | |
resulting answer. If no context is available, a polite fallback | |
message is returned without calling the LLM. | |
Args: | |
query: The user's question or statement. | |
k: Number of nearest neighbour documents to retrieve from the | |
knowledge base (default 3). | |
score_threshold: Maximum distance for a document to be considered | |
relevant (smaller means more similar). | |
Returns: | |
A string response appropriate for the chatbot UI. | |
""" | |
# Handle empty queries gracefully | |
if not query or not query.strip(): | |
return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?" | |
# Initialise embedder, vector DB and LLM if necessary | |
init_embedder_and_db() | |
init_llm() | |
# If embedder or collection or LLM didn't initialise, provide a safe fallback | |
if _embedder is None or _collection is None or _llm is None: | |
return "I'm sorry, I'm unable to process your request right now. Please try again later." | |
# Normalise for simple greetings | |
greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening'] | |
if query.lower().strip() in greetings: | |
return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?" | |
# Embed query and search for relevant documents | |
query_vector = _embedder.encode([query])[0] | |
results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k) | |
docs = results.get("documents", [[]])[0] if results else [] | |
distances = results.get("distances", [[]])[0] if results else [] | |
# Filter by distance threshold | |
relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold] | |
# If no relevant context is found, politely admit ignorance | |
if not relevant: | |
try: | |
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: | |
full_context = f.read() | |
context = full_context | |
except FileNotFoundError: | |
return ( | |
"I'm sorry, I don't know the answer to that question based on my knowledge. " | |
"Could you ask something else about Codingo or its services?" | |
) | |
else: | |
context = "\n\n".join(relevant[:2]) | |
# Concatenate the most relevant passages for context (use top 2) | |
context = "\n\n".join(relevant[:2]) | |
prompt = _build_prompt(query, context) | |
try: | |
# Invoke the Groq LLM. The ``invoke`` method may return an object | |
# with a ``content`` attribute or a plain string, depending on the | |
# backend. We handle both cases transparently. | |
response = _llm.invoke(prompt) | |
except Exception: | |
# If invocation fails, return a generic error message | |
return "I'm sorry, I encountered an error while generating a response. Please try again later." | |
# Extract text from the LLM response | |
if hasattr(response, 'content'): | |
text = str(response.content).strip() | |
elif isinstance(response, dict): | |
# Some wrappers may return dicts (e.g. ChatCompletion). Try common keys. | |
text = response.get('message', '') or response.get('text', '') or str(response) | |
text = text.strip() | |
else: | |
text = str(response).strip() | |
# Post-process the answer: remove unwanted phrases referring to the bot | |
lines = text.split('\n') | |
cleaned_lines = [] | |
for line in lines: | |
lower_line = line.lower() | |
if any(phrase in lower_line for phrase in [ | |
'the chatbot', 'this bot', 'the bot provides', | |
'in response to', 'overall,', | |
'as an ai language model' | |
]): | |
continue | |
cleaned_lines.append(line) | |
cleaned_text = '\n'.join(cleaned_lines).strip() | |
# Ensure we return some meaningful text | |
return cleaned_text or ( | |
"I'm sorry, I couldn't generate a proper response. Could you rephrase your question?" | |
) |