Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

Codingo / backend /services /codingo_chatbot.py

husseinelsaadi

updated

5fed273 15 days ago

raw

history blame contribute delete

12.3 kB

	"""
	codingo_chatbot.py
	===================

	This module encapsulates the logic for Codingo's website chatbot. It
	loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
	database using Chroma and SentenceTransformers, and uses the shared
	Groq language model (imported from ``backend.services.interview_engine``)
	to generate answers constrained to the retrieved context. If a Groq API
	key is not configured, a lightweight dummy model will be used as a
	fallback. TinyLlama and other local models are no longer used in this
	module.
	"""

	from __future__ import annotations

	import os
	import threading
	from typing import List

	import numpy as np

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from sentence_transformers import SentenceTransformer
	import chromadb
	from chromadb.config import Settings

	# Import the shared Groq LLM instance from the interview engine. This ensures
	# that the chatbot uses the exact same language model as the interview API.
	from backend.services.interview_engine import groq_llm

	# The llama_cpp dependency is no longer used for the chatbot. We keep the
	# import guarded to avoid breaking environments where llama_cpp is not
	# installed, but it is no longer required for generating responses.
	try:
	from llama_cpp import Llama # type: ignore # noqa: F401
	except Exception:
	# We don't raise here because the Groq LLM will be used instead. If
	# llama_cpp is unavailable, it won't affect chatbot functionality.
	Llama = None # type: ignore

	# Configuration
	PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
	CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
	CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")

	# Generation parameters for the Groq LLM. These values can be adjusted via
	# environment variables if desired. They loosely mirror the previous TinyLlama
	# settings but are applied when constructing prompts for the Groq LLM. Note
	# that Groq models internally determine sampling behaviour; these variables
	# mainly govern how much content we include in the prompt and do not directly
	# control the sampling temperature of the Groq API.
	MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) # kept for compatibility
	TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) # unused but retained
	TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) # unused but retained
	REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # unused

	# Thread lock and globals
	_init_lock = threading.Lock()
	_embedder: SentenceTransformer \| None = None
	_collection: chromadb.Collection \| None = None
	_llm = None # This will be set to the shared Groq LLM instance


	def _load_chatbot_text() -> str:
	"""Read the chatbot knowledge base from disk."""
	try:
	with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
	content = f.read()
	# Clean up the content to avoid meta-descriptions
	# Remove any lines that look like instructions about the chatbot
	lines = content.split('\n')
	cleaned_lines = []
	for line in lines:
	# Skip lines that describe what the chatbot does
	if any(phrase in line.lower() for phrase in [
	'the chatbot', 'this bot', 'the bot provides',
	'chatbot provides', 'chatbot is used for',
	'official chatbot of'
	]):
	continue
	cleaned_lines.append(line)
	return '\n'.join(cleaned_lines)
	except FileNotFoundError:
	return (
	"Codingo is an AI‑powered recruitment platform designed to "
	"streamline job applications, candidate screening and hiring. "
	"We make hiring smarter, faster and fairer through automation "
	"and intelligent recommendations."
	)


	def init_embedder_and_db() -> None:
	"""Initialize the SentenceTransformer embedder and Chroma vector DB."""
	global _embedder, _collection
	if _embedder is not None and _collection is not None:
	return
	with _init_lock:
	if _embedder is not None and _collection is not None:
	return

	os.makedirs(CHROMA_DB_DIR, exist_ok=True)
	text = _load_chatbot_text()

	# Split into chunks
	splitter = RecursiveCharacterTextSplitter(
	chunk_size=500, # Increased for better context
	chunk_overlap=100,
	separators=["\n\n", "\n", ". ", " "],
	)
	docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]

	# Initialize embedder
	embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)

	# Initialize Chroma
	client = chromadb.Client(
	Settings(
	persist_directory=CHROMA_DB_DIR,
	anonymized_telemetry=False,
	is_persistent=True,
	)
	)

	# Create or get collection
	collection = client.get_or_create_collection("codingo_chatbot")

	# Populate if empty
	need_populate = False
	try:
	existing = collection.get(limit=1)
	if not existing or not existing.get("documents"):
	need_populate = True
	except Exception:
	need_populate = True

	if need_populate:
	ids = [f"doc_{i}" for i in range(len(docs))]
	collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)

	_embedder = embedder
	_collection = collection


	def init_llm() -> None:
	"""
	Initialize the chatbot's language model. This function now assigns
	the globally shared Groq LLM instance imported from the interview
	engine. If the Groq API key is unavailable, the fallback dummy
	model defined in the interview engine will be used automatically.
	"""
	global _llm
	if _llm is not None:
	return
	with _init_lock:
	if _llm is not None:
	return
	# Assign the shared Groq LLM instance. This may be a DummyGroq when
	# no API key is provided. We avoid loading any local GGUF models.
	_llm = groq_llm


	def _build_prompt(query: str, context: str) -> str:
	"""
	Construct a prompt for the Groq LLM. The prompt instructs the model to
	behave as LUNA, Codingo's friendly assistant. It emphasises using only
	information from the provided context to answer the question and
	encourages the model to admit when the answer is unknown. This plain
	format works well with ChatGroq's ``invoke`` API.

	Args:
	query: The user's question.
	context: Concatenated snippets from the knowledge base deemed
	relevant to the query.

	Returns:
	A formatted string prompt ready for submission to the Groq LLM.
	"""
	system_prompt = """
	You are LUNA, the official AI assistant of Codingo, an AI-powered recruitment platform.
	You must:
	- Answer questions using ONLY the provided context.
	- Be concise, clear, and professional.
	- If the context does not have the answer, politely say you do not know.
	- Never make up features or information not in the context.
	- Always focus on Codingo’s platform, services, and functionality.
	"""


	if context:
	return (
	f"{system_prompt}\n\n"
	f"Context:\n{context}\n\n"
	f"Question: {query}\n"
	f"Answer:"
	)
	else:
	# When no context is available, still pass an empty context so the
	# model knows there is no supporting information.
	return (
	f"{system_prompt}\n\n"
	"Context:\n\n"
	f"Question: {query}\n"
	f"Answer:"
	)


	def get_response(query: str, k: int = 3, score_threshold: float = 2.0) -> str:
	"""
	Generate a response to the user's query using the shared Groq LLM and the
	chatbot's knowledge base. The function retrieves relevant context
	passages from the vector store, constructs a prompt instructing the
	model to answer as LUNA using only that context, and returns the
	resulting answer. If no context is available, a polite fallback
	message is returned without calling the LLM.

	Args:
	query: The user's question or statement.
	k: Number of nearest neighbour documents to retrieve from the
	knowledge base (default 3).
	score_threshold: Maximum distance for a document to be considered
	relevant (smaller means more similar).

	Returns:
	A string response appropriate for the chatbot UI.
	"""
	# Handle empty queries gracefully
	if not query or not query.strip():
	return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"

	# Initialise embedder, vector DB and LLM if necessary
	init_embedder_and_db()
	init_llm()

	# If embedder or collection or LLM didn't initialise, provide a safe fallback
	if _embedder is None or _collection is None or _llm is None:
	return "I'm sorry, I'm unable to process your request right now. Please try again later."

	# Normalise for simple greetings
	greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
	if query.lower().strip() in greetings:
	return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"

	# Embed query and search for relevant documents
	query_vector = _embedder.encode([query])[0]
	results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)

	docs = results.get("documents", [[]])[0] if results else []
	distances = results.get("distances", [[]])[0] if results else []

	# Filter by distance threshold
	relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]

	# If no relevant context is found, politely admit ignorance
	if not relevant:
	try:
	with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
	full_context = f.read()
	context = full_context
	except FileNotFoundError:
	return (
	"I'm sorry, I don't know the answer to that question based on my knowledge. "
	"Could you ask something else about Codingo or its services?"
	)
	else:
	context = "\n\n".join(relevant[:2])


	# Concatenate the most relevant passages for context (use top 2)
	context = "\n\n".join(relevant[:2])
	prompt = _build_prompt(query, context)

	try:
	# Invoke the Groq LLM. The ``invoke`` method may return an object
	# with a ``content`` attribute or a plain string, depending on the
	# backend. We handle both cases transparently.
	response = _llm.invoke(prompt)
	except Exception:
	# If invocation fails, return a generic error message
	return "I'm sorry, I encountered an error while generating a response. Please try again later."

	# Extract text from the LLM response
	if hasattr(response, 'content'):
	text = str(response.content).strip()
	elif isinstance(response, dict):
	# Some wrappers may return dicts (e.g. ChatCompletion). Try common keys.
	text = response.get('message', '') or response.get('text', '') or str(response)
	text = text.strip()
	else:
	text = str(response).strip()

	# Post-process the answer: remove unwanted phrases referring to the bot
	lines = text.split('\n')
	cleaned_lines = []
	for line in lines:
	lower_line = line.lower()
	if any(phrase in lower_line for phrase in [
	'the chatbot', 'this bot', 'the bot provides',
	'in response to', 'overall,',
	'as an ai language model'
	]):
	continue
	cleaned_lines.append(line)
	cleaned_text = '\n'.join(cleaned_lines).strip()

	# Ensure we return some meaningful text
	return cleaned_text or (
	"I'm sorry, I couldn't generate a proper response. Could you rephrase your question?"
	)