""" codingo_chatbot.py =================== This module encapsulates the logic for Codingo's website chatbot. It loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector database using Chroma and SentenceTransformers, and uses a local LLM powered by ``llama‑cpp‑python`` to generate answers constrained to the retrieved context. """ from __future__ import annotations import os import threading from typing import List import numpy as np from langchain.text_splitter import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import chromadb from chromadb.config import Settings from huggingface_hub import hf_hub_download try: from llama_cpp import Llama # type: ignore except Exception as exc: # pragma: no cover - import may fail until dependency installed raise ImportError( "llama_cpp is required for the chatbot. Please add 'llama-cpp-python' " "to your requirements.txt" ) from exc # Configuration PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt") CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma") # TinyLlama model settings LLAMA_REPO = os.getenv("LLAMA_REPO", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF") LLAMA_FILE = os.getenv("LLAMA_FILE", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") LLAMA_LOCAL_DIR = os.path.join("/tmp", "llama_models") # Generation parameters - adjusted for better responses MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # Thread lock and globals _init_lock = threading.Lock() _embedder: SentenceTransformer | None = None _collection: chromadb.Collection | None = None _llm: Llama | None = None def _load_chatbot_text() -> str: """Read the chatbot knowledge base from disk.""" try: with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: content = f.read() # Clean up the content to avoid meta-descriptions # Remove any lines that look like instructions about the chatbot lines = content.split('\n') cleaned_lines = [] for line in lines: # Skip lines that describe what the chatbot does if any(phrase in line.lower() for phrase in [ 'the chatbot', 'this bot', 'the bot provides', 'chatbot provides', 'chatbot is used for', 'official chatbot of' ]): continue cleaned_lines.append(line) return '\n'.join(cleaned_lines) except FileNotFoundError: return ( "Codingo is an AI‑powered recruitment platform designed to " "streamline job applications, candidate screening and hiring. " "We make hiring smarter, faster and fairer through automation " "and intelligent recommendations." ) def init_embedder_and_db() -> None: """Initialize the SentenceTransformer embedder and Chroma vector DB.""" global _embedder, _collection if _embedder is not None and _collection is not None: return with _init_lock: if _embedder is not None and _collection is not None: return os.makedirs(CHROMA_DB_DIR, exist_ok=True) text = _load_chatbot_text() # Split into chunks splitter = RecursiveCharacterTextSplitter( chunk_size=500, # Increased for better context chunk_overlap=100, separators=["\n\n", "\n", ". ", " "], ) docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()] # Initialize embedder embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32) # Initialize Chroma client = chromadb.Client( Settings( persist_directory=CHROMA_DB_DIR, anonymized_telemetry=False, is_persistent=True, ) ) # Create or get collection collection = client.get_or_create_collection("codingo_chatbot") # Populate if empty need_populate = False try: existing = collection.get(limit=1) if not existing or not existing.get("documents"): need_populate = True except Exception: need_populate = True if need_populate: ids = [f"doc_{i}" for i in range(len(docs))] collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids) _embedder = embedder _collection = collection def init_llm() -> None: """Initialize the llama‑cpp model for response generation.""" global _llm if _llm is not None: return with _init_lock: if _llm is not None: return os.makedirs(LLAMA_LOCAL_DIR, exist_ok=True) local_path = os.path.join(LLAMA_LOCAL_DIR, LLAMA_FILE) if not os.path.exists(local_path): local_path = hf_hub_download( repo_id=LLAMA_REPO, filename=LLAMA_FILE, local_dir=LLAMA_LOCAL_DIR, local_dir_use_symlinks=False, ) # GPU configuration try: import torch use_cuda = torch.cuda.is_available() except Exception: use_cuda = False n_gpu_layers = int(os.getenv("LLAMA_N_GPU_LAYERS", "35" if use_cuda else "0")) n_ctx = int(os.getenv("LLAMA_N_CTX", "2048")) n_threads = max(1, os.cpu_count() // 2) if os.cpu_count() else 4 _llm = Llama( model_path=local_path, n_ctx=n_ctx, n_threads=n_threads, n_gpu_layers=n_gpu_layers, verbose=False, # Reduce logging ) def _build_prompt(query: str, context: str) -> str: """Construct a natural prompt for the TinyLlama chat model.""" # Use a more direct, conversational system prompt system_prompt = ( "You are LUNA, a friendly AI assistant for the Codingo recruitment platform. " "Answer questions naturally and conversationally. Use the provided information " "to give helpful, direct answers. Keep responses concise and relevant." ) # Build the prompt with context integrated naturally if context: prompt = ( f"<|system|>\n{system_prompt}\n" f"<|user|>\nContext: {context}\n\n" f"Question: {query}\n" f"<|assistant|>\n" ) else: prompt = ( f"<|system|>\n{system_prompt}\n" f"<|user|>\n{query}\n" f"<|assistant|>\n" ) return prompt def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str: """Return a chatbot response for the given query.""" if not query or not query.strip(): return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?" init_embedder_and_db() init_llm() assert _embedder is not None and _collection is not None and _llm is not None # Handle greetings directly greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening'] if query.lower().strip() in greetings: return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?" # Embed query and search query_vector = _embedder.encode([query])[0] results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k) docs = results.get("documents", [[]])[0] if results else [] distances = results.get("distances", [[]])[0] if results else [] # Filter by score (lower threshold for better matching) relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold] if not relevant: # Provide a helpful response even without specific context return ( "I don't have specific information about that in my knowledge base. " "However, I can tell you that Codingo is an AI-powered recruitment platform " "that helps with job applications, candidate screening, and hiring. " "Would you like to know more about our features?" ) # Join context with better formatting context = " ".join(relevant[:2]) # Use top 2 most relevant chunks prompt = _build_prompt(query, context) # Generate response with better parameters output = _llm( prompt, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, repeat_penalty=REPEAT_PENALTY, stop=["", "<|user|>", "<|system|>"], echo=False, ) # Extract and clean the response text = output["choices"][0]["text"].strip() # Remove any meta-descriptions that might have leaked through lines = text.split('\n') cleaned_lines = [] for line in lines: if any(phrase in line.lower() for phrase in [ 'the chatbot', 'this bot', 'the bot provides', 'in response to', 'overall,' ]): continue cleaned_lines.append(line) text = '\n'.join(cleaned_lines).strip() return text or "I'm here to help you with Codingo. Could you please rephrase your question?"