Spaces:

husseinelsaadi
/

Codingo

Paused

File size: 12,297 Bytes

fb236cf
 
 
 
 
 
46ecbc8
 
 
 
 
 
fb236cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46ecbc8
 
 
 
 
 
 
fb236cf
46ecbc8
 
 
 
 
fb236cf
 
 
 
 
 
46ecbc8
 
 
 
 
 
 
 
 
 
fb236cf
987f59c
fb236cf
 
 
46ecbc8
fb236cf
 
 
987f59c
fb236cf
 
987f59c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb236cf
 
 
 
 
 
 
 
 
 
987f59c
fb236cf
 
 
 
 
 
987f59c
fb236cf
 
987f59c
 
fb236cf
987f59c
fb236cf
987f59c
fb236cf
 
987f59c
 
fb236cf
 
987f59c
 
fb236cf
 
 
 
 
 
 
987f59c
 
fb236cf
987f59c
 
fb236cf
 
 
 
 
 
 
987f59c
fb236cf
 
 
987f59c
fb236cf
 
 
 
 
46ecbc8
 
 
 
 
 
fb236cf
 
 
 
 
 
46ecbc8
 
 
fb236cf
 
 
46ecbc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fed273
 
 
 
 
 
 
 
 
 
46ecbc8
987f59c
46ecbc8
 
 
 
 
987f59c
 
46ecbc8
 
 
 
 
 
 
987f59c
fb236cf
 
5fed273
46ecbc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb236cf
987f59c
46ecbc8
 
fb236cf
 
46ecbc8
 
 
 
 
 
987f59c
 
 
46ecbc8
 
fb236cf
 
46ecbc8
fb236cf
 
46ecbc8
 
fb236cf
46ecbc8
 
fb236cf
5fed273
 
 
 
 
 
 
 
 
 
 
 
46ecbc8
 
 
fb236cf
46ecbc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987f59c
 
 
46ecbc8
 
987f59c
46ecbc8
 
987f59c
 
 
46ecbc8

"""
codingo_chatbot.py
===================

This module encapsulates the logic for Codingo's website chatbot.  It
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
database using Chroma and SentenceTransformers, and uses the shared
Groq language model (imported from ``backend.services.interview_engine``)
to generate answers constrained to the retrieved context.  If a Groq API
key is not configured, a lightweight dummy model will be used as a
fallback.  TinyLlama and other local models are no longer used in this
module.
"""

from __future__ import annotations

import os
import threading
from typing import List

import numpy as np

from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings

# Import the shared Groq LLM instance from the interview engine.  This ensures
# that the chatbot uses the exact same language model as the interview API.
from backend.services.interview_engine import groq_llm

# The llama_cpp dependency is no longer used for the chatbot.  We keep the
# import guarded to avoid breaking environments where llama_cpp is not
# installed, but it is no longer required for generating responses.
try:
    from llama_cpp import Llama  # type: ignore  # noqa: F401
except Exception:
    # We don't raise here because the Groq LLM will be used instead.  If
    # llama_cpp is unavailable, it won't affect chatbot functionality.
    Llama = None  # type: ignore

# Configuration
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")

# Generation parameters for the Groq LLM.  These values can be adjusted via
# environment variables if desired.  They loosely mirror the previous TinyLlama
# settings but are applied when constructing prompts for the Groq LLM.  Note
# that Groq models internally determine sampling behaviour; these variables
# mainly govern how much content we include in the prompt and do not directly
# control the sampling temperature of the Groq API.
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512"))  # kept for compatibility
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3"))  # unused but retained
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9"))  # unused but retained
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1"))  # unused

# Thread lock and globals
_init_lock = threading.Lock()
_embedder: SentenceTransformer | None = None
_collection: chromadb.Collection | None = None
_llm = None  # This will be set to the shared Groq LLM instance


def _load_chatbot_text() -> str:
    """Read the chatbot knowledge base from disk."""
    try:
        with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
            content = f.read()
            # Clean up the content to avoid meta-descriptions
            # Remove any lines that look like instructions about the chatbot
            lines = content.split('\n')
            cleaned_lines = []
            for line in lines:
                # Skip lines that describe what the chatbot does
                if any(phrase in line.lower() for phrase in [
                    'the chatbot', 'this bot', 'the bot provides', 
                    'chatbot provides', 'chatbot is used for',
                    'official chatbot of'
                ]):
                    continue
                cleaned_lines.append(line)
            return '\n'.join(cleaned_lines)
    except FileNotFoundError:
        return (
            "Codingo is an AI‑powered recruitment platform designed to "
            "streamline job applications, candidate screening and hiring. "
            "We make hiring smarter, faster and fairer through automation "
            "and intelligent recommendations."
        )


def init_embedder_and_db() -> None:
    """Initialize the SentenceTransformer embedder and Chroma vector DB."""
    global _embedder, _collection
    if _embedder is not None and _collection is not None:
        return
    with _init_lock:
        if _embedder is not None and _collection is not None:
            return
        
        os.makedirs(CHROMA_DB_DIR, exist_ok=True)
        text = _load_chatbot_text()
        
        # Split into chunks
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,  # Increased for better context
            chunk_overlap=100,
            separators=["\n\n", "\n", ". ", " "],
        )
        docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]
        
        # Initialize embedder
        embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
        
        # Initialize Chroma
        client = chromadb.Client(
            Settings(
                persist_directory=CHROMA_DB_DIR,
                anonymized_telemetry=False,
                is_persistent=True,
            )
        )
        
        # Create or get collection
        collection = client.get_or_create_collection("codingo_chatbot")
        
        # Populate if empty
        need_populate = False
        try:
            existing = collection.get(limit=1)
            if not existing or not existing.get("documents"):
                need_populate = True
        except Exception:
            need_populate = True
            
        if need_populate:
            ids = [f"doc_{i}" for i in range(len(docs))]
            collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
            
        _embedder = embedder
        _collection = collection


def init_llm() -> None:
    """
    Initialize the chatbot's language model.  This function now assigns
    the globally shared Groq LLM instance imported from the interview
    engine.  If the Groq API key is unavailable, the fallback dummy
    model defined in the interview engine will be used automatically.
    """
    global _llm
    if _llm is not None:
        return
    with _init_lock:
        if _llm is not None:
            return
        # Assign the shared Groq LLM instance.  This may be a DummyGroq when
        # no API key is provided.  We avoid loading any local GGUF models.
        _llm = groq_llm


def _build_prompt(query: str, context: str) -> str:
    """
    Construct a prompt for the Groq LLM.  The prompt instructs the model to
    behave as LUNA, Codingo's friendly assistant.  It emphasises using only
    information from the provided context to answer the question and
    encourages the model to admit when the answer is unknown.  This plain
    format works well with ChatGroq's ``invoke`` API.

    Args:
        query: The user's question.
        context: Concatenated snippets from the knowledge base deemed
            relevant to the query.

    Returns:
        A formatted string prompt ready for submission to the Groq LLM.
    """
    system_prompt = """
        You are LUNA, the official AI assistant of Codingo, an AI-powered recruitment platform.
        You must:
        - Answer questions using ONLY the provided context.
        - Be concise, clear, and professional.
        - If the context does not have the answer, politely say you do not know.
        - Never make up features or information not in the context.
        - Always focus on Codingo’s platform, services, and functionality.
        """


    if context:
        return (
            f"{system_prompt}\n\n"
            f"Context:\n{context}\n\n"
            f"Question: {query}\n"
            f"Answer:"
        )
    else:
        # When no context is available, still pass an empty context so the
        # model knows there is no supporting information.
        return (
            f"{system_prompt}\n\n"
            "Context:\n\n"
            f"Question: {query}\n"
            f"Answer:"
        )


def get_response(query: str, k: int = 3, score_threshold: float = 2.0) -> str:
    """
    Generate a response to the user's query using the shared Groq LLM and the
    chatbot's knowledge base.  The function retrieves relevant context
    passages from the vector store, constructs a prompt instructing the
    model to answer as LUNA using only that context, and returns the
    resulting answer.  If no context is available, a polite fallback
    message is returned without calling the LLM.

    Args:
        query: The user's question or statement.
        k: Number of nearest neighbour documents to retrieve from the
            knowledge base (default 3).
        score_threshold: Maximum distance for a document to be considered
            relevant (smaller means more similar).

    Returns:
        A string response appropriate for the chatbot UI.
    """
    # Handle empty queries gracefully
    if not query or not query.strip():
        return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"

    # Initialise embedder, vector DB and LLM if necessary
    init_embedder_and_db()
    init_llm()

    # If embedder or collection or LLM didn't initialise, provide a safe fallback
    if _embedder is None or _collection is None or _llm is None:
        return "I'm sorry, I'm unable to process your request right now. Please try again later."

    # Normalise for simple greetings
    greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
    if query.lower().strip() in greetings:
        return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"

    # Embed query and search for relevant documents
    query_vector = _embedder.encode([query])[0]
    results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)

    docs = results.get("documents", [[]])[0] if results else []
    distances = results.get("distances", [[]])[0] if results else []

    # Filter by distance threshold
    relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]

    # If no relevant context is found, politely admit ignorance
    if not relevant:
        try:
            with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
                full_context = f.read()
            context = full_context
        except FileNotFoundError:
            return (
                "I'm sorry, I don't know the answer to that question based on my knowledge. "
                "Could you ask something else about Codingo or its services?"
            )
    else:
        context = "\n\n".join(relevant[:2])


    # Concatenate the most relevant passages for context (use top 2)
    context = "\n\n".join(relevant[:2])
    prompt = _build_prompt(query, context)

    try:
        # Invoke the Groq LLM.  The ``invoke`` method may return an object
        # with a ``content`` attribute or a plain string, depending on the
        # backend.  We handle both cases transparently.
        response = _llm.invoke(prompt)
    except Exception:
        # If invocation fails, return a generic error message
        return "I'm sorry, I encountered an error while generating a response. Please try again later."

    # Extract text from the LLM response
    if hasattr(response, 'content'):
        text = str(response.content).strip()
    elif isinstance(response, dict):
        # Some wrappers may return dicts (e.g. ChatCompletion).  Try common keys.
        text = response.get('message', '') or response.get('text', '') or str(response)
        text = text.strip()
    else:
        text = str(response).strip()

    # Post-process the answer: remove unwanted phrases referring to the bot
    lines = text.split('\n')
    cleaned_lines = []
    for line in lines:
        lower_line = line.lower()
        if any(phrase in lower_line for phrase in [
            'the chatbot', 'this bot', 'the bot provides',
            'in response to', 'overall,',
            'as an ai language model'
        ]):
            continue
        cleaned_lines.append(line)
    cleaned_text = '\n'.join(cleaned_lines).strip()

    # Ensure we return some meaningful text
    return cleaned_text or (
        "I'm sorry, I couldn't generate a proper response. Could you rephrase your question?"
    )