"""
codingo_chatbot.py
===================

This module encapsulates the logic for Codingo's website chatbot.  It
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
database using Chroma and SentenceTransformers, and uses a local LLM
powered by ``llama‑cpp‑python`` to generate answers constrained to the
retrieved context.
"""

from __future__ import annotations

import os
import threading
from typing import List

import numpy as np

from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from huggingface_hub import hf_hub_download

try:
    from llama_cpp import Llama  # type: ignore
except Exception as exc:  # pragma: no cover - import may fail until dependency installed
    raise ImportError(
        "llama_cpp is required for the chatbot. Please add 'llama-cpp-python' "
        "to your requirements.txt"
    ) from exc

# Configuration
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")

# TinyLlama model settings
LLAMA_REPO = os.getenv("LLAMA_REPO", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
LLAMA_FILE = os.getenv("LLAMA_FILE", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
LLAMA_LOCAL_DIR = os.path.join("/tmp", "llama_models")

# Generation parameters - adjusted for better responses
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512"))
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3"))
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9"))
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1"))

# Thread lock and globals
_init_lock = threading.Lock()
_embedder: SentenceTransformer | None = None
_collection: chromadb.Collection | None = None
_llm: Llama | None = None


def _load_chatbot_text() -> str:
    """Read the chatbot knowledge base from disk."""
    try:
        with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
            content = f.read()
            # Clean up the content to avoid meta-descriptions
            # Remove any lines that look like instructions about the chatbot
            lines = content.split('\n')
            cleaned_lines = []
            for line in lines:
                # Skip lines that describe what the chatbot does
                if any(phrase in line.lower() for phrase in [
                    'the chatbot', 'this bot', 'the bot provides', 
                    'chatbot provides', 'chatbot is used for',
                    'official chatbot of'
                ]):
                    continue
                cleaned_lines.append(line)
            return '\n'.join(cleaned_lines)
    except FileNotFoundError:
        return (
            "Codingo is an AI‑powered recruitment platform designed to "
            "streamline job applications, candidate screening and hiring. "
            "We make hiring smarter, faster and fairer through automation "
            "and intelligent recommendations."
        )


def init_embedder_and_db() -> None:
    """Initialize the SentenceTransformer embedder and Chroma vector DB."""
    global _embedder, _collection
    if _embedder is not None and _collection is not None:
        return
    with _init_lock:
        if _embedder is not None and _collection is not None:
            return
        
        os.makedirs(CHROMA_DB_DIR, exist_ok=True)
        text = _load_chatbot_text()
        
        # Split into chunks
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,  # Increased for better context
            chunk_overlap=100,
            separators=["\n\n", "\n", ". ", " "],
        )
        docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]
        
        # Initialize embedder
        embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
        
        # Initialize Chroma
        client = chromadb.Client(
            Settings(
                persist_directory=CHROMA_DB_DIR,
                anonymized_telemetry=False,
                is_persistent=True,
            )
        )
        
        # Create or get collection
        collection = client.get_or_create_collection("codingo_chatbot")
        
        # Populate if empty
        need_populate = False
        try:
            existing = collection.get(limit=1)
            if not existing or not existing.get("documents"):
                need_populate = True
        except Exception:
            need_populate = True
            
        if need_populate:
            ids = [f"doc_{i}" for i in range(len(docs))]
            collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
            
        _embedder = embedder
        _collection = collection


def init_llm() -> None:
    """Initialize the llama‑cpp model for response generation."""
    global _llm
    if _llm is not None:
        return
    with _init_lock:
        if _llm is not None:
            return
            
        os.makedirs(LLAMA_LOCAL_DIR, exist_ok=True)
        local_path = os.path.join(LLAMA_LOCAL_DIR, LLAMA_FILE)
        
        if not os.path.exists(local_path):
            local_path = hf_hub_download(
                repo_id=LLAMA_REPO,
                filename=LLAMA_FILE,
                local_dir=LLAMA_LOCAL_DIR,
                local_dir_use_symlinks=False,
            )
        
        # GPU configuration
        try:
            import torch
            use_cuda = torch.cuda.is_available()
        except Exception:
            use_cuda = False
            
        n_gpu_layers = int(os.getenv("LLAMA_N_GPU_LAYERS", "35" if use_cuda else "0"))
        n_ctx = int(os.getenv("LLAMA_N_CTX", "2048"))
        n_threads = max(1, os.cpu_count() // 2) if os.cpu_count() else 4
        
        _llm = Llama(
            model_path=local_path,
            n_ctx=n_ctx,
            n_threads=n_threads,
            n_gpu_layers=n_gpu_layers,
            verbose=False,  # Reduce logging
        )


def _build_prompt(query: str, context: str) -> str:
    """Construct a natural prompt for the TinyLlama chat model."""
    # Use a more direct, conversational system prompt
    system_prompt = (
        "You are LUNA, a friendly AI assistant for the Codingo recruitment platform. "
        "Answer questions naturally and conversationally. Use the provided information "
        "to give helpful, direct answers. Keep responses concise and relevant."
    )
    
    # Build the prompt with context integrated naturally
    if context:
        prompt = (
            f"<|system|>\n{system_prompt}</s>\n"
            f"<|user|>\nContext: {context}\n\n"
            f"Question: {query}</s>\n"
            f"<|assistant|>\n"
        )
    else:
        prompt = (
            f"<|system|>\n{system_prompt}</s>\n"
            f"<|user|>\n{query}</s>\n"
            f"<|assistant|>\n"
        )
    
    return prompt


def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str:
    """Return a chatbot response for the given query."""
    if not query or not query.strip():
        return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"
    
    init_embedder_and_db()
    init_llm()
    
    assert _embedder is not None and _collection is not None and _llm is not None
    
    # Handle greetings directly
    greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
    if query.lower().strip() in greetings:
        return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"
    
    # Embed query and search
    query_vector = _embedder.encode([query])[0]
    results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)
    
    docs = results.get("documents", [[]])[0] if results else []
    distances = results.get("distances", [[]])[0] if results else []
    
    # Filter by score (lower threshold for better matching)
    relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]
    
    if not relevant:
        # Provide a helpful response even without specific context
        return (
            "I don't have specific information about that in my knowledge base. "
            "However, I can tell you that Codingo is an AI-powered recruitment platform "
            "that helps with job applications, candidate screening, and hiring. "
            "Would you like to know more about our features?"
        )
    
    # Join context with better formatting
    context = " ".join(relevant[:2])  # Use top 2 most relevant chunks
    prompt = _build_prompt(query, context)
    
    # Generate response with better parameters
    output = _llm(
        prompt,
        max_tokens=MAX_TOKENS,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        repeat_penalty=REPEAT_PENALTY,
        stop=["</s>", "<|user|>", "<|system|>"],
        echo=False,
    )
    
    # Extract and clean the response
    text = output["choices"][0]["text"].strip()
    
    # Remove any meta-descriptions that might have leaked through
    lines = text.split('\n')
    cleaned_lines = []
    for line in lines:
        if any(phrase in line.lower() for phrase in [
            'the chatbot', 'this bot', 'the bot provides',
            'in response to', 'overall,'
        ]):
            continue
        cleaned_lines.append(line)
    
    text = '\n'.join(cleaned_lines).strip()
    
    return text or "I'm here to help you with Codingo. Could you please rephrase your question?"