""" codingo_chatbot.py =================== This module encapsulates the logic for Codingo's website chatbot. It loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector database using Chroma and SentenceTransformers, and uses the shared Groq language model (imported from ``backend.services.interview_engine``) to generate answers constrained to the retrieved context. If a Groq API key is not configured, a lightweight dummy model will be used as a fallback. TinyLlama and other local models are no longer used in this module. """ from __future__ import annotations import os import threading from typing import List import numpy as np from langchain.text_splitter import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import chromadb from chromadb.config import Settings # Import the shared Groq LLM instance from the interview engine. This ensures # that the chatbot uses the exact same language model as the interview API. from backend.services.interview_engine import groq_llm # The llama_cpp dependency is no longer used for the chatbot. We keep the # import guarded to avoid breaking environments where llama_cpp is not # installed, but it is no longer required for generating responses. try: from llama_cpp import Llama # type: ignore # noqa: F401 except Exception: # We don't raise here because the Groq LLM will be used instead. If # llama_cpp is unavailable, it won't affect chatbot functionality. Llama = None # type: ignore # Configuration PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt") CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma") # Generation parameters for the Groq LLM. These values can be adjusted via # environment variables if desired. They loosely mirror the previous TinyLlama # settings but are applied when constructing prompts for the Groq LLM. Note # that Groq models internally determine sampling behaviour; these variables # mainly govern how much content we include in the prompt and do not directly # control the sampling temperature of the Groq API. MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) # kept for compatibility TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) # unused but retained TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) # unused but retained REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # unused # Thread lock and globals _init_lock = threading.Lock() _embedder: SentenceTransformer | None = None _collection: chromadb.Collection | None = None _llm = None # This will be set to the shared Groq LLM instance def _load_chatbot_text() -> str: """Read the chatbot knowledge base from disk.""" try: with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: content = f.read() # Clean up the content to avoid meta-descriptions # Remove any lines that look like instructions about the chatbot lines = content.split('\n') cleaned_lines = [] for line in lines: # Skip lines that describe what the chatbot does if any(phrase in line.lower() for phrase in [ 'the chatbot', 'this bot', 'the bot provides', 'chatbot provides', 'chatbot is used for', 'official chatbot of' ]): continue cleaned_lines.append(line) return '\n'.join(cleaned_lines) except FileNotFoundError: return ( "Codingo is an AI‑powered recruitment platform designed to " "streamline job applications, candidate screening and hiring. " "We make hiring smarter, faster and fairer through automation " "and intelligent recommendations." ) def init_embedder_and_db() -> None: """Initialize the SentenceTransformer embedder and Chroma vector DB.""" global _embedder, _collection if _embedder is not None and _collection is not None: return with _init_lock: if _embedder is not None and _collection is not None: return os.makedirs(CHROMA_DB_DIR, exist_ok=True) text = _load_chatbot_text() # Split into chunks splitter = RecursiveCharacterTextSplitter( chunk_size=500, # Increased for better context chunk_overlap=100, separators=["\n\n", "\n", ". ", " "], ) docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()] # Initialize embedder embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32) # Initialize Chroma client = chromadb.Client( Settings( persist_directory=CHROMA_DB_DIR, anonymized_telemetry=False, is_persistent=True, ) ) # Create or get collection collection = client.get_or_create_collection("codingo_chatbot") # Populate if empty need_populate = False try: existing = collection.get(limit=1) if not existing or not existing.get("documents"): need_populate = True except Exception: need_populate = True if need_populate: ids = [f"doc_{i}" for i in range(len(docs))] collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids) _embedder = embedder _collection = collection def init_llm() -> None: """ Initialize the chatbot's language model. This function now assigns the globally shared Groq LLM instance imported from the interview engine. If the Groq API key is unavailable, the fallback dummy model defined in the interview engine will be used automatically. """ global _llm if _llm is not None: return with _init_lock: if _llm is not None: return # Assign the shared Groq LLM instance. This may be a DummyGroq when # no API key is provided. We avoid loading any local GGUF models. _llm = groq_llm def _build_prompt(query: str, context: str) -> str: """ Construct a prompt for the Groq LLM. The prompt instructs the model to behave as LUNA, Codingo's friendly assistant. It emphasises using only information from the provided context to answer the question and encourages the model to admit when the answer is unknown. This plain format works well with ChatGroq's ``invoke`` API. Args: query: The user's question. context: Concatenated snippets from the knowledge base deemed relevant to the query. Returns: A formatted string prompt ready for submission to the Groq LLM. """ system_prompt = """ You are LUNA, the official AI assistant of Codingo, an AI-powered recruitment platform. You must: - Answer questions using ONLY the provided context. - Be concise, clear, and professional. - If the context does not have the answer, politely say you do not know. - Never make up features or information not in the context. - Always focus on Codingo’s platform, services, and functionality. """ if context: return ( f"{system_prompt}\n\n" f"Context:\n{context}\n\n" f"Question: {query}\n" f"Answer:" ) else: # When no context is available, still pass an empty context so the # model knows there is no supporting information. return ( f"{system_prompt}\n\n" "Context:\n\n" f"Question: {query}\n" f"Answer:" ) def get_response(query: str, k: int = 3, score_threshold: float = 2.0) -> str: """ Generate a response to the user's query using the shared Groq LLM and the chatbot's knowledge base. The function retrieves relevant context passages from the vector store, constructs a prompt instructing the model to answer as LUNA using only that context, and returns the resulting answer. If no context is available, a polite fallback message is returned without calling the LLM. Args: query: The user's question or statement. k: Number of nearest neighbour documents to retrieve from the knowledge base (default 3). score_threshold: Maximum distance for a document to be considered relevant (smaller means more similar). Returns: A string response appropriate for the chatbot UI. """ # Handle empty queries gracefully if not query or not query.strip(): return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?" # Initialise embedder, vector DB and LLM if necessary init_embedder_and_db() init_llm() # If embedder or collection or LLM didn't initialise, provide a safe fallback if _embedder is None or _collection is None or _llm is None: return "I'm sorry, I'm unable to process your request right now. Please try again later." # Normalise for simple greetings greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening'] if query.lower().strip() in greetings: return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?" # Embed query and search for relevant documents query_vector = _embedder.encode([query])[0] results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k) docs = results.get("documents", [[]])[0] if results else [] distances = results.get("distances", [[]])[0] if results else [] # Filter by distance threshold relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold] # If no relevant context is found, politely admit ignorance if not relevant: try: with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: full_context = f.read() context = full_context except FileNotFoundError: return ( "I'm sorry, I don't know the answer to that question based on my knowledge. " "Could you ask something else about Codingo or its services?" ) else: context = "\n\n".join(relevant[:2]) # Concatenate the most relevant passages for context (use top 2) context = "\n\n".join(relevant[:2]) prompt = _build_prompt(query, context) try: # Invoke the Groq LLM. The ``invoke`` method may return an object # with a ``content`` attribute or a plain string, depending on the # backend. We handle both cases transparently. response = _llm.invoke(prompt) except Exception: # If invocation fails, return a generic error message return "I'm sorry, I encountered an error while generating a response. Please try again later." # Extract text from the LLM response if hasattr(response, 'content'): text = str(response.content).strip() elif isinstance(response, dict): # Some wrappers may return dicts (e.g. ChatCompletion). Try common keys. text = response.get('message', '') or response.get('text', '') or str(response) text = text.strip() else: text = str(response).strip() # Post-process the answer: remove unwanted phrases referring to the bot lines = text.split('\n') cleaned_lines = [] for line in lines: lower_line = line.lower() if any(phrase in lower_line for phrase in [ 'the chatbot', 'this bot', 'the bot provides', 'in response to', 'overall,', 'as an ai language model' ]): continue cleaned_lines.append(line) cleaned_text = '\n'.join(cleaned_lines).strip() # Ensure we return some meaningful text return cleaned_text or ( "I'm sorry, I couldn't generate a proper response. Could you rephrase your question?" )