Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 16 days ago

Commit

bef6630

1 Parent(s): 29cfacc

chatbot updated

Browse files

Files changed (2) hide show

app.py +20 -206
chatbot/chatbot.py +254 -0

app.py CHANGED Viewed

@@ -36,215 +36,23 @@ import json
 #
 # The chatbot uses a local vector database (Chroma) to search the
 # ``chatbot/chatbot.txt`` knowledge base.  Retrieved passages are fed to
-# a lightweight conversational model from Hugging Face (see
-# ``init_hf_model`` below).  To avoid the expensive model and database
-# initialisation on every request, embeddings and the vector collection are
-# loaded lazily the first time a chat query is processed.  Subsequent
-# requests reuse the same global objects.  See ``init_chatbot`` and
-# ``get_chatbot_response`` for implementation details.
 # Paths for the chatbot knowledge base and persistent vector store.  We
 # compute these relative to the current file so that the app can be deployed
 # anywhere without needing to change configuration.  The ``chroma_db``
 # directory will be created automatically by the Chroma client if it does not
 # exist.
-import shutil
-# Remove any old unwritable Chroma DB path from previous versions
-shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
-CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
-CHATBOT_DB_DIR = "/tmp/chroma_db"
-# -----------------------------------------------------------------------------
-# Hugging Face model configuration
-#
-# The chatbot uses a small conversational model hosted on Hugging Face.  To
-# allow easy experimentation, the model name can be overridden via the
-# ``HF_CHATBOT_MODEL`` environment variable.  If unset, we fall back to
-# ``microsoft/DialoGPT-medium`` which provides better conversational quality
-# than blenderbot for our use case.
-HF_MODEL_NAME = os.getenv("HF_CHATBOT_MODEL", "microsoft/DialoGPT-medium")
-# Global Hugging Face model and tokenizer.  These variables remain ``None``
-# until ``init_hf_model()`` is called.  They are reused across all chatbot
-# requests to prevent repeatedly loading the large model into memory.
-_hf_model = None
-_hf_tokenizer = None
-def init_hf_model():
-    """
-    Initialise the Hugging Face conversational model and tokenizer.
-    This function loads the specified ``HF_MODEL_NAME`` model and its
-    corresponding tokenizer.  The model is moved to GPU if available,
-    otherwise it runs on CPU.  Subsequent calls return immediately if
-    the model and tokenizer have already been instantiated.
-    """
-    global _hf_model, _hf_tokenizer
-    if _hf_model is not None and _hf_tokenizer is not None:
-        return
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    import torch
-    model_name = HF_MODEL_NAME
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Loading model {model_name} on device {device}")
-    # Load tokenizer and model from Hugging Face
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
-    # Set pad token to eos token if not set
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    _hf_model = model
-    _hf_tokenizer = tokenizer
-    print(f"Model loaded successfully on {device}")
-_chatbot_embedder = None
-_chatbot_collection = None
-def init_chatbot():
-    """Initialise the Chroma vector DB with chatbot.txt content."""
-    global _chatbot_embedder, _chatbot_collection
-    if _chatbot_embedder is not None and _chatbot_collection is not None:
-        return
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
-    from sentence_transformers import SentenceTransformer
-    import chromadb
-    from chromadb.config import Settings
-    import os
-    os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
-    # Read and parse the chatbot knowledge base
-    try:
-        with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
-            text = f.read()
-    except FileNotFoundError:
-        print(f"Warning: {CHATBOT_TXT_PATH} not found, using default content")
-        text = """
-        Codingo is an AI-powered recruitment platform designed to streamline job applications,
-        candidate screening, and hiring. We make hiring smarter, faster, and fairer through
-        automation and intelligent recommendations.
-        """
-    # Split text into chunks for vector search
-    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
-    docs = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]
-    # Initialize embedder
-    embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
-    # Initialize Chroma client
-    client = chromadb.Client(Settings(
-        persist_directory=CHATBOT_DB_DIR,
-        anonymized_telemetry=False,
-        is_persistent=True
-    ))
-    # Get or create collection
-    collection = client.get_or_create_collection("chatbot")
-    # Check if collection is empty and populate if needed
-    try:
-        existing = collection.get(limit=1)
-        if not existing.get("documents"):
-            raise ValueError("Empty Chroma DB")
-    except Exception:
-        # Add documents to collection
-        ids = [f"doc_{i}" for i in range(len(docs))]
-        collection.add(
-            documents=docs,
-            embeddings=embeddings.tolist(),
-            ids=ids
-        )
-        print(f"Added {len(docs)} documents to Chroma DB")
-    _chatbot_embedder = embedder
-    _chatbot_collection = collection
-def get_chatbot_response(query: str) -> str:
-    """Generate a reply to the user's query using Chroma + Hugging Face model."""
-    try:
-        init_chatbot()
-        init_hf_model()
-        # Safety: prevent empty input
-        if not query or not query.strip():
-            return "Please type a question about the Codingo platform."
-        embedder = _chatbot_embedder
-        collection = _chatbot_collection
-        model = _hf_model
-        tokenizer = _hf_tokenizer
-        device = model.device
-        # Retrieve context from Chroma
-        query_embedding = embedder.encode([query])[0]
-        results = collection.query(
-            query_embeddings=[query_embedding.tolist()],
-            n_results=3
-        )
-        retrieved_docs = results.get("documents", [[]])[0] if results else []
-        context = "\n".join(retrieved_docs[:3])  # Limit context to top 3 results
-        # Build conversational prompt
-        system_instruction = (
-            "You are LUNA AI, a helpful assistant for the Codingo recruitment platform. "
-            "Use the provided context to answer questions about Codingo. "
-            "If the question is not related to Codingo, politely redirect the conversation. "
-            "Keep responses concise and friendly."
-        )
-        # Format prompt for DialoGPT
-        prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
-        # Tokenize with proper truncation
-        inputs = tokenizer.encode(
-            prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512,
-            padding=True
-        ).to(device)
-        # Generate response
-        with torch.no_grad():
-            output_ids = model.generate(
-                inputs,
-                max_length=inputs.shape[1] + 150,
-                num_beams=3,
-                do_sample=True,
-                temperature=0.7,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                early_stopping=True
-            )
-        # Decode response
-        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        # Extract only the bot's response
-        if "LUNA AI:" in response:
-            response = response.split("LUNA AI:")[-1].strip()
-        elif prompt in response:
-            response = response.replace(prompt, "").strip()
-        # Fallback if response is empty
-        if not response:
-            response = "I'm here to help you with questions about the Codingo platform. What would you like to know?"
-        return response
-    except Exception as e:
-        print(f"Chatbot error: {str(e)}")
-        return "I'm having trouble processing your request. Please try again or ask about Codingo's features, job matching, or how to use the platform."
 # Initialize Flask app
 app = Flask(
@@ -540,11 +348,17 @@ if __name__ == '__main__':
     with app.app_context():
         db.create_all()
-        # Pre-initialize chatbot on startup for faster first response
         print("Initializing chatbot...")
         try:
-            init_chatbot()
-            init_hf_model()
             print("Chatbot initialized successfully")
         except Exception as e:
             print(f"Chatbot initialization warning: {e}")

 #
 # The chatbot uses a local vector database (Chroma) to search the
 # ``chatbot/chatbot.txt`` knowledge base.  Retrieved passages are fed to
+# a lightweight conversational model from Hugging Face.  To avoid the
+# expensive model and database initialisation on every request, embeddings
+# and the vector collection are loaded lazily the first time a chat query
+# is processed.  Subsequent requests reuse the same global objects.  All
+# chatbot logic resides in ``chatbot/chatbot.py``.
 # Paths for the chatbot knowledge base and persistent vector store.  We
 # compute these relative to the current file so that the app can be deployed
 # anywhere without needing to change configuration.  The ``chroma_db``
 # directory will be created automatically by the Chroma client if it does not
 # exist.
+# The internal chatbot logic has been extracted to ``chatbot/chatbot.py``.  See
+# that module for details.  We import the ``get_chatbot_response`` function
+# here so that the Flask route can delegate queries directly to it.  This
+# prevents ``app.py`` from depending on the heavy ML libraries and keeps
+# the application entry point lean.
+from chatbot.chatbot import get_chatbot_response
 # Initialize Flask app
 app = Flask(
     with app.app_context():
         db.create_all()
+        # Pre-initialize the chatbot on startup for faster first response.  We
+        # deliberately trigger a dummy query here to force loading of the
+        # sentence encoder, vector store and conversational model.  Any
+        # exceptions during warm‑up are logged but do not stop the app from
+        # starting.
         print("Initializing chatbot...")
         try:
+            # Import inside the block to ensure the module has been
+            # properly loaded with the current environment settings.
+            from chatbot.chatbot import get_chatbot_response
+            _ = get_chatbot_response("Hello!")
             print("Chatbot initialized successfully")
         except Exception as e:
             print(f"Chatbot initialization warning: {e}")

chatbot/chatbot.py ADDED Viewed

	@@ -0,0 +1,254 @@

+"""
+Chatbot module for Codingo
+==========================
+This module encapsulates all functionality required to serve answers to
+questions about the Codingo platform.  It loads a small conversational
+model from Hugging Face and a lightweight vector database populated from
+``chatbot.txt``.  When a user asks a question, the module retrieves
+relevant snippets from the knowledge base and feeds them into the
+language model to generate a friendly response.
+Key features:
+* Completely self‑contained: there are no OpenAI or external API
+  dependencies.  Only free, locally hosted Hugging Face models are used.
+* Lazy initialisation: the model and vector store are loaded on the
+  first call to ``get_chatbot_response``.  Subsequent calls reuse
+  existing objects, avoiding expensive reloads.
+* GPU support: if a CUDA device is available, the model is automatically
+  moved onto the GPU for faster inference.
+This file lives inside ``codingo/chatbot`` alongside ``chatbot.txt``.
+``chatbot.txt`` should contain a plain‑text knowledge base of
+Codingo‑specific information and FAQs.  Feel free to update the
+contents of that file without touching any code here.
+"""
+from __future__ import annotations
+import os
+import shutil
+from typing import List
+# -----------------------------------------------------------------------------
+# Environment configuration
+#
+# We set a few environment variables to force Hugging Face to store model
+# weights and tokeniser files inside ``/tmp``.  Hugging Face Spaces
+# provisions a read‑only file system outside of ``/tmp``, so without these
+# settings the transformers library might attempt to write into
+# unwritable locations.  These variables have no effect if the same
+# variables are already set by the hosting environment.
+os.environ.setdefault("HF_HOME", "/tmp/huggingface")
+os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
+os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
+# -----------------------------------------------------------------------------
+# Module‑level state
+_hf_model = None  # type: ignore[assignment]
+_hf_tokenizer = None  # type: ignore[assignment]
+_chatbot_embedder = None  # type: ignore[assignment]
+_chatbot_collection = None  # type: ignore[assignment]
+# Paths
+_current_dir = os.path.dirname(os.path.abspath(__file__))
+_knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
+_chroma_db_dir = "/tmp/chroma_db"
+# Default Hugging Face model for FAQ‑style Q&A.  You can override this
+# behaviour at deployment time by setting the ``HF_CHATBOT_MODEL``
+# environment variable.  DialoGPT is a lightweight conversational model
+# suitable for generating coherent short answers.  If you need more
+# open‑domain capability, consider ``facebook/blenderbot-400M-distill``.
+DEFAULT_MODEL_NAME = "microsoft/DialoGPT-medium"
+def _init_hf_model() -> None:
+    """Load the Hugging Face model and tokenizer if not already loaded."""
+    global _hf_model, _hf_tokenizer
+    if _hf_model is not None and _hf_tokenizer is not None:
+        return
+    from transformers import AutoModelForCausalLM, AutoTokenizer  # slow import
+    import torch
+    model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
+    # Choose GPU if available; otherwise CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Download and load tokenizer and model.  They will be cached under
+    # the directories specified above.  If running for the first time on
+    # Hugging Face Spaces, model download may take a while.
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
+    # Ensure the pad token is defined.  Many casual conversation models
+    # reuse the end‑of‑sentence token for padding.
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    _hf_model = model
+    _hf_tokenizer = tokenizer
+def _init_vector_store() -> None:
+    """Initialise the Chroma vector store from ``chatbot.txt`` if needed."""
+    global _chatbot_embedder, _chatbot_collection
+    if _chatbot_embedder is not None and _chatbot_collection is not None:
+        return
+    # Import heavy dependencies lazily to reduce module import time
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from sentence_transformers import SentenceTransformer
+    import chromadb
+    from chromadb.config import Settings
+    # Clear out any legacy database path that might be unwritable.  Previous
+    # versions of this project wrote under ``/app/chatbot/chroma_db`` which
+    # fails on Hugging Face Spaces.  The ``ignore_errors=True`` flag
+    # suppresses FileNotFoundError.
+    shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
+    os.makedirs(_chroma_db_dir, exist_ok=True)
+    # Read the knowledge base file.  If the file is missing, fall back to a
+    # minimal description of Codingo so the chatbot can still respond.
+    try:
+        with open(_knowledge_base_path, encoding="utf-8") as f:
+            raw_text = f.read()
+    except FileNotFoundError:
+        raw_text = (
+            "Codingo is an AI-powered recruitment platform designed to "
+            "streamline job applications, candidate screening, and hiring. "
+            "We make hiring smarter, faster, and fairer through automation "
+            "and intelligent recommendations."
+        )
+    # Split the knowledge base into overlapping chunks for semantic search.
+    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
+    docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
+    # Embed the chunks using a small sentence transformer.  This model is
+    # lightweight (~80 MB) and works well for semantic similarity tasks.
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
+    # Initialise a persistent Chroma client.  We disable anonymous telemetry
+    # because the environment has no outbound internet access.
+    client = chromadb.Client(Settings(
+        persist_directory=_chroma_db_dir,
+        anonymized_telemetry=False,
+        is_persistent=True,
+    ))
+    # Create or retrieve the "chatbot" collection within the database.
+    collection = client.get_or_create_collection("chatbot")
+    # If no documents are present, populate the collection with our chunks.
+    try:
+        existing = collection.get(limit=1)
+        if not existing.get("documents"):
+            raise ValueError("Empty Chroma DB")
+    except Exception:
+        ids = [f"doc_{i}" for i in range(len(docs))]
+        collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
+    _chatbot_embedder = embedder
+    _chatbot_collection = collection
+def get_chatbot_response(query: str) -> str:
+    """
+    Generate a chatbot reply to the given user query.
+    The response is generated by retrieving up to three relevant snippets
+    from the knowledge base using the MiniLM embeddings and then feeding
+    those snippets together with the user question into the conversational
+    model.  If no relevant information is found or the model generates
+    an empty response, a helpful fallback message is returned.
+    Parameters
+    ----------
+    query : str
+        The user's message.  Should be non‑empty and related to the
+        Codingo platform.
+    Returns
+    -------
+    str
+        The chatbot's reply, always a string.
+    """
+    # Basic validation of the query string
+    if not query or not query.strip():
+        return "Please type a question about the Codingo platform."
+    # Lazy initialisation of the vector store and Hugging Face model
+    _init_vector_store()
+    _init_hf_model()
+    # Unpack state
+    embedder = _chatbot_embedder  # type: ignore[assignment]
+    collection = _chatbot_collection  # type: ignore[assignment]
+    model = _hf_model  # type: ignore[assignment]
+    tokenizer = _hf_tokenizer  # type: ignore[assignment]
+    import torch
+    # Embed the incoming query using the same sentence transformer
+    query_embedding = embedder.encode([query])[0]  # type: ignore[operator]
+    # Retrieve the three most similar documents from the vector store
+    results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
+    retrieved_docs = results.get("documents", [[]])[0] if results else []
+    # Build a context string from the retrieved documents
+    context = "\n".join(retrieved_docs[:3])
+    # Compose the system instruction.  The model is prompted as a
+    # persona called LUNA AI.  Keep responses concise and friendly, and
+    # redirect politely on irrelevant questions.
+    system_instruction = (
+        "You are LUNA AI, a helpful assistant for the Codingo recruitment "
+        "platform. Use the provided context to answer questions about "
+        "Codingo. If the question is not related to Codingo, politely "
+        "redirect the conversation. Keep responses concise and friendly."
+    )
+    prompt = (
+        f"{system_instruction}\n\nContext:\n{context}\n\n"
+        f"User: {query}\nLUNA AI:"
+    )
+    # Tokenise the prompt and truncate to the maximum input length supported
+    inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=512, padding=True)
+    inputs = inputs.to(model.device)
+    # Generate a continuation from the model
+    with torch.no_grad():
+        output_ids = model.generate(
+            inputs,
+            max_length=inputs.shape[1] + 150,
+            num_beams=3,
+            do_sample=True,
+            temperature=0.7,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            early_stopping=True,
+        )
+    # Decode the output and strip the prompt from the beginning
+    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    if "LUNA AI:" in response:
+        response = response.split("LUNA AI:")[-1].strip()
+    elif prompt in response:
+        response = response.replace(prompt, "").strip()
+    # Fallback if the model didn't return anything useful
+    if not response:
+        return (
+            "I'm here to help you with questions about the Codingo platform. "
+            "What would you like to know?"
+        )
+    return response