Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

Codingo / backend /services /codingo_chatbot.py

husseinelsaadi

updated

987f59c 2 months ago

raw

history blame

9.73 kB

	"""
	codingo_chatbot.py
	===================

	This module encapsulates the logic for Codingo's website chatbot. It
	loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
	database using Chroma and SentenceTransformers, and uses a local LLM
	powered by ``llama‑cpp‑python`` to generate answers constrained to the
	retrieved context.
	"""

	from __future__ import annotations

	import os
	import threading
	from typing import List

	import numpy as np

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from sentence_transformers import SentenceTransformer
	import chromadb
	from chromadb.config import Settings
	from huggingface_hub import hf_hub_download

	try:
	from llama_cpp import Llama # type: ignore
	except Exception as exc: # pragma: no cover - import may fail until dependency installed
	raise ImportError(
	"llama_cpp is required for the chatbot. Please add 'llama-cpp-python' "
	"to your requirements.txt"
	) from exc

	# Configuration
	PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
	CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
	CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")

	# TinyLlama model settings
	LLAMA_REPO = os.getenv("LLAMA_REPO", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
	LLAMA_FILE = os.getenv("LLAMA_FILE", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
	LLAMA_LOCAL_DIR = os.path.join("/tmp", "llama_models")

	# Generation parameters - adjusted for better responses
	MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512"))
	TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3"))
	TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9"))
	REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1"))

	# Thread lock and globals
	_init_lock = threading.Lock()
	_embedder: SentenceTransformer \| None = None
	_collection: chromadb.Collection \| None = None
	_llm: Llama \| None = None


	def _load_chatbot_text() -> str:
	"""Read the chatbot knowledge base from disk."""
	try:
	with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
	content = f.read()
	# Clean up the content to avoid meta-descriptions
	# Remove any lines that look like instructions about the chatbot
	lines = content.split('\n')
	cleaned_lines = []
	for line in lines:
	# Skip lines that describe what the chatbot does
	if any(phrase in line.lower() for phrase in [
	'the chatbot', 'this bot', 'the bot provides',
	'chatbot provides', 'chatbot is used for',
	'official chatbot of'
	]):
	continue
	cleaned_lines.append(line)
	return '\n'.join(cleaned_lines)
	except FileNotFoundError:
	return (
	"Codingo is an AI‑powered recruitment platform designed to "
	"streamline job applications, candidate screening and hiring. "
	"We make hiring smarter, faster and fairer through automation "
	"and intelligent recommendations."
	)


	def init_embedder_and_db() -> None:
	"""Initialize the SentenceTransformer embedder and Chroma vector DB."""
	global _embedder, _collection
	if _embedder is not None and _collection is not None:
	return
	with _init_lock:
	if _embedder is not None and _collection is not None:
	return

	os.makedirs(CHROMA_DB_DIR, exist_ok=True)
	text = _load_chatbot_text()

	# Split into chunks
	splitter = RecursiveCharacterTextSplitter(
	chunk_size=500, # Increased for better context
	chunk_overlap=100,
	separators=["\n\n", "\n", ". ", " "],
	)
	docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]

	# Initialize embedder
	embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)

	# Initialize Chroma
	client = chromadb.Client(
	Settings(
	persist_directory=CHROMA_DB_DIR,
	anonymized_telemetry=False,
	is_persistent=True,
	)
	)

	# Create or get collection
	collection = client.get_or_create_collection("codingo_chatbot")

	# Populate if empty
	need_populate = False
	try:
	existing = collection.get(limit=1)
	if not existing or not existing.get("documents"):
	need_populate = True
	except Exception:
	need_populate = True

	if need_populate:
	ids = [f"doc_{i}" for i in range(len(docs))]
	collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)

	_embedder = embedder
	_collection = collection


	def init_llm() -> None:
	"""Initialize the llama‑cpp model for response generation."""
	global _llm
	if _llm is not None:
	return
	with _init_lock:
	if _llm is not None:
	return

	os.makedirs(LLAMA_LOCAL_DIR, exist_ok=True)
	local_path = os.path.join(LLAMA_LOCAL_DIR, LLAMA_FILE)

	if not os.path.exists(local_path):
	local_path = hf_hub_download(
	repo_id=LLAMA_REPO,
	filename=LLAMA_FILE,
	local_dir=LLAMA_LOCAL_DIR,
	local_dir_use_symlinks=False,
	)

	# GPU configuration
	try:
	import torch
	use_cuda = torch.cuda.is_available()
	except Exception:
	use_cuda = False

	n_gpu_layers = int(os.getenv("LLAMA_N_GPU_LAYERS", "35" if use_cuda else "0"))
	n_ctx = int(os.getenv("LLAMA_N_CTX", "2048"))
	n_threads = max(1, os.cpu_count() // 2) if os.cpu_count() else 4

	_llm = Llama(
	model_path=local_path,
	n_ctx=n_ctx,
	n_threads=n_threads,
	n_gpu_layers=n_gpu_layers,
	verbose=False, # Reduce logging
	)


	def _build_prompt(query: str, context: str) -> str:
	"""Construct a natural prompt for the TinyLlama chat model."""
	# Use a more direct, conversational system prompt
	system_prompt = (
	"You are LUNA, a friendly AI assistant for the Codingo recruitment platform. "
	"Answer questions naturally and conversationally. Use the provided information "
	"to give helpful, direct answers. Keep responses concise and relevant."
	)

	# Build the prompt with context integrated naturally
	if context:
	prompt = (
	f"<\|system\|>\n{system_prompt}</s>\n"
	f"<\|user\|>\nContext: {context}\n\n"
	f"Question: {query}</s>\n"
	f"<\|assistant\|>\n"
	)
	else:
	prompt = (
	f"<\|system\|>\n{system_prompt}</s>\n"
	f"<\|user\|>\n{query}</s>\n"
	f"<\|assistant\|>\n"
	)

	return prompt


	def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str:
	"""Return a chatbot response for the given query."""
	if not query or not query.strip():
	return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"

	init_embedder_and_db()
	init_llm()

	assert _embedder is not None and _collection is not None and _llm is not None

	# Handle greetings directly
	greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
	if query.lower().strip() in greetings:
	return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"

	# Embed query and search
	query_vector = _embedder.encode([query])[0]
	results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)

	docs = results.get("documents", [[]])[0] if results else []
	distances = results.get("distances", [[]])[0] if results else []

	# Filter by score (lower threshold for better matching)
	relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]

	if not relevant:
	# Provide a helpful response even without specific context
	return (
	"I don't have specific information about that in my knowledge base. "
	"However, I can tell you that Codingo is an AI-powered recruitment platform "
	"that helps with job applications, candidate screening, and hiring. "
	"Would you like to know more about our features?"
	)

	# Join context with better formatting
	context = " ".join(relevant[:2]) # Use top 2 most relevant chunks
	prompt = _build_prompt(query, context)

	# Generate response with better parameters
	output = _llm(
	prompt,
	max_tokens=MAX_TOKENS,
	temperature=TEMPERATURE,
	top_p=TOP_P,
	repeat_penalty=REPEAT_PENALTY,
	stop=["</s>", "<\|user\|>", "<\|system\|>"],
	echo=False,
	)

	# Extract and clean the response
	text = output["choices"][0]["text"].strip()

	# Remove any meta-descriptions that might have leaked through
	lines = text.split('\n')
	cleaned_lines = []
	for line in lines:
	if any(phrase in line.lower() for phrase in [
	'the chatbot', 'this bot', 'the bot provides',
	'in response to', 'overall,'
	]):
	continue
	cleaned_lines.append(line)

	text = '\n'.join(cleaned_lines).strip()

	return text or "I'm here to help you with Codingo. Could you please rephrase your question?"