Spaces:
Paused
Paused
""" | |
codingo_chatbot.py | |
=================== | |
This module encapsulates the logic for Codingo's website chatbot. It | |
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector | |
database using Chroma and SentenceTransformers, and uses a local LLM | |
powered by ``llama‑cpp‑python`` to generate answers constrained to the | |
retrieved context. | |
""" | |
from __future__ import annotations | |
import os | |
import threading | |
from typing import List | |
import numpy as np | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from sentence_transformers import SentenceTransformer | |
import chromadb | |
from chromadb.config import Settings | |
from huggingface_hub import hf_hub_download | |
try: | |
from llama_cpp import Llama # type: ignore | |
except Exception as exc: # pragma: no cover - import may fail until dependency installed | |
raise ImportError( | |
"llama_cpp is required for the chatbot. Please add 'llama-cpp-python' " | |
"to your requirements.txt" | |
) from exc | |
# Configuration | |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) | |
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt") | |
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma") | |
# TinyLlama model settings | |
LLAMA_REPO = os.getenv("LLAMA_REPO", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF") | |
LLAMA_FILE = os.getenv("LLAMA_FILE", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") | |
LLAMA_LOCAL_DIR = os.path.join("/tmp", "llama_models") | |
# Generation parameters - adjusted for better responses | |
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) | |
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) | |
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) | |
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) | |
# Thread lock and globals | |
_init_lock = threading.Lock() | |
_embedder: SentenceTransformer | None = None | |
_collection: chromadb.Collection | None = None | |
_llm: Llama | None = None | |
def _load_chatbot_text() -> str: | |
"""Read the chatbot knowledge base from disk.""" | |
try: | |
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: | |
content = f.read() | |
# Clean up the content to avoid meta-descriptions | |
# Remove any lines that look like instructions about the chatbot | |
lines = content.split('\n') | |
cleaned_lines = [] | |
for line in lines: | |
# Skip lines that describe what the chatbot does | |
if any(phrase in line.lower() for phrase in [ | |
'the chatbot', 'this bot', 'the bot provides', | |
'chatbot provides', 'chatbot is used for', | |
'official chatbot of' | |
]): | |
continue | |
cleaned_lines.append(line) | |
return '\n'.join(cleaned_lines) | |
except FileNotFoundError: | |
return ( | |
"Codingo is an AI‑powered recruitment platform designed to " | |
"streamline job applications, candidate screening and hiring. " | |
"We make hiring smarter, faster and fairer through automation " | |
"and intelligent recommendations." | |
) | |
def init_embedder_and_db() -> None: | |
"""Initialize the SentenceTransformer embedder and Chroma vector DB.""" | |
global _embedder, _collection | |
if _embedder is not None and _collection is not None: | |
return | |
with _init_lock: | |
if _embedder is not None and _collection is not None: | |
return | |
os.makedirs(CHROMA_DB_DIR, exist_ok=True) | |
text = _load_chatbot_text() | |
# Split into chunks | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, # Increased for better context | |
chunk_overlap=100, | |
separators=["\n\n", "\n", ". ", " "], | |
) | |
docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()] | |
# Initialize embedder | |
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32) | |
# Initialize Chroma | |
client = chromadb.Client( | |
Settings( | |
persist_directory=CHROMA_DB_DIR, | |
anonymized_telemetry=False, | |
is_persistent=True, | |
) | |
) | |
# Create or get collection | |
collection = client.get_or_create_collection("codingo_chatbot") | |
# Populate if empty | |
need_populate = False | |
try: | |
existing = collection.get(limit=1) | |
if not existing or not existing.get("documents"): | |
need_populate = True | |
except Exception: | |
need_populate = True | |
if need_populate: | |
ids = [f"doc_{i}" for i in range(len(docs))] | |
collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids) | |
_embedder = embedder | |
_collection = collection | |
def init_llm() -> None: | |
"""Initialize the llama‑cpp model for response generation.""" | |
global _llm | |
if _llm is not None: | |
return | |
with _init_lock: | |
if _llm is not None: | |
return | |
os.makedirs(LLAMA_LOCAL_DIR, exist_ok=True) | |
local_path = os.path.join(LLAMA_LOCAL_DIR, LLAMA_FILE) | |
if not os.path.exists(local_path): | |
local_path = hf_hub_download( | |
repo_id=LLAMA_REPO, | |
filename=LLAMA_FILE, | |
local_dir=LLAMA_LOCAL_DIR, | |
local_dir_use_symlinks=False, | |
) | |
# GPU configuration | |
try: | |
import torch | |
use_cuda = torch.cuda.is_available() | |
except Exception: | |
use_cuda = False | |
n_gpu_layers = int(os.getenv("LLAMA_N_GPU_LAYERS", "35" if use_cuda else "0")) | |
n_ctx = int(os.getenv("LLAMA_N_CTX", "2048")) | |
n_threads = max(1, os.cpu_count() // 2) if os.cpu_count() else 4 | |
_llm = Llama( | |
model_path=local_path, | |
n_ctx=n_ctx, | |
n_threads=n_threads, | |
n_gpu_layers=n_gpu_layers, | |
verbose=False, # Reduce logging | |
) | |
def _build_prompt(query: str, context: str) -> str: | |
"""Construct a natural prompt for the TinyLlama chat model.""" | |
# Use a more direct, conversational system prompt | |
system_prompt = ( | |
"You are LUNA, a friendly AI assistant for the Codingo recruitment platform. " | |
"Answer questions naturally and conversationally. Use the provided information " | |
"to give helpful, direct answers. Keep responses concise and relevant." | |
) | |
# Build the prompt with context integrated naturally | |
if context: | |
prompt = ( | |
f"<|system|>\n{system_prompt}</s>\n" | |
f"<|user|>\nContext: {context}\n\n" | |
f"Question: {query}</s>\n" | |
f"<|assistant|>\n" | |
) | |
else: | |
prompt = ( | |
f"<|system|>\n{system_prompt}</s>\n" | |
f"<|user|>\n{query}</s>\n" | |
f"<|assistant|>\n" | |
) | |
return prompt | |
def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str: | |
"""Return a chatbot response for the given query.""" | |
if not query or not query.strip(): | |
return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?" | |
init_embedder_and_db() | |
init_llm() | |
assert _embedder is not None and _collection is not None and _llm is not None | |
# Handle greetings directly | |
greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening'] | |
if query.lower().strip() in greetings: | |
return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?" | |
# Embed query and search | |
query_vector = _embedder.encode([query])[0] | |
results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k) | |
docs = results.get("documents", [[]])[0] if results else [] | |
distances = results.get("distances", [[]])[0] if results else [] | |
# Filter by score (lower threshold for better matching) | |
relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold] | |
if not relevant: | |
# Provide a helpful response even without specific context | |
return ( | |
"I don't have specific information about that in my knowledge base. " | |
"However, I can tell you that Codingo is an AI-powered recruitment platform " | |
"that helps with job applications, candidate screening, and hiring. " | |
"Would you like to know more about our features?" | |
) | |
# Join context with better formatting | |
context = " ".join(relevant[:2]) # Use top 2 most relevant chunks | |
prompt = _build_prompt(query, context) | |
# Generate response with better parameters | |
output = _llm( | |
prompt, | |
max_tokens=MAX_TOKENS, | |
temperature=TEMPERATURE, | |
top_p=TOP_P, | |
repeat_penalty=REPEAT_PENALTY, | |
stop=["</s>", "<|user|>", "<|system|>"], | |
echo=False, | |
) | |
# Extract and clean the response | |
text = output["choices"][0]["text"].strip() | |
# Remove any meta-descriptions that might have leaked through | |
lines = text.split('\n') | |
cleaned_lines = [] | |
for line in lines: | |
if any(phrase in line.lower() for phrase in [ | |
'the chatbot', 'this bot', 'the bot provides', | |
'in response to', 'overall,' | |
]): | |
continue | |
cleaned_lines.append(line) | |
text = '\n'.join(cleaned_lines).strip() | |
return text or "I'm here to help you with Codingo. Could you please rephrase your question?" |