Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 19 days ago

Commit

3e1e43f

1 Parent(s): 0bd189c

updated

Browse files

Files changed (1) hide show

app.py +57 -111

app.py CHANGED Viewed

@@ -117,132 +117,78 @@ def init_hf_model() -> None:
 _chatbot_embedder = None
 _chatbot_collection = None
-def init_chatbot() -> None:
-    """Initialise the chatbot embedding model and vector database.
-    This function is designed to be idempotent: it only performs the heavy
-    initialisation steps once.  Subsequent calls will return immediately if
-    the global variables are already populated.  The knowledge base is read
-    from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
-    using a lightweight sentence transformer.  The resulting embeddings are
-    stored in a Chroma collection located at ``CHATBOT_DB_DIR``.  We set
-    ``anonymized_telemetry=False`` to prevent any external network calls from
-    the Chroma client.
-    """
-    global _chatbot_embedder, _chatbot_collection
-    if _chatbot_embedder is not None and _chatbot_collection is not None:
         return
-    # Perform imports locally to avoid slowing down application startup.  These
-    # libraries are heavy and only needed when the chatbot is used.
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
-    from sentence_transformers import SentenceTransformer
-    import chromadb
-    from chromadb.config import Settings
-    # Ensure the persist directory exists.  Chroma will create it if missing,
-    # but explicitly creating it avoids permission errors on some platforms.
-    os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
-    # Read the raw FAQ text and split into overlapping chunks to improve
-    # retrieval granularity.  The chunk size and overlap are tuned to
-    # accommodate the relatively small knowledge base.
-    with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
-        text = f.read()
-    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
-    docs = [doc.strip() for doc in splitter.split_text(text)]
-    # Load the sentence transformer.  This model is small and runs quickly on
-    # CPU.  If you wish to change the model, update the name here.
-    embedder = SentenceTransformer('all-MiniLM-L6-v2')
-    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
-    # Initialise Chroma with an on‑disk persistent store.  If the collection
-    # already exists and contains all documents, the add operation below will
-    # silently merge duplicates.
-    client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
-    collection = client.get_or_create_collection('chatbot')
-    ids = [f'doc_{i}' for i in range(len(docs))]
-    try:
-        # Attempt to query an existing document to see if the collection is
-        # populated.  If this fails, we'll proceed to add all documents.
-        existing = collection.get(ids=ids[:1])
-        if not existing.get('documents'):
-            raise ValueError('No documents in collection')
-    except Exception:
-        collection.add(documents=docs, embeddings=embeddings, ids=ids)
-    _chatbot_embedder = embedder
-    _chatbot_collection = collection
 def get_chatbot_response(query: str) -> str:
-    """Generate a reply to the user's query using the knowledge base and a Hugging Face model.
-    This function performs a two‑stage process to answer user questions.  First
-    it ensures that the vector store and embedder are available via
-    ``init_chatbot()``, then embeds the query to retrieve the most relevant
-    context chunks from ``chatbot.txt`` using Chroma.  Second, it calls
-    ``init_hf_model()`` to lazily load a conversational model from Hugging
-    Face.  The retrieved context, together with a system instruction,
-    constitute the prompt for the model.  The model is then run to
-    generate an answer.  If the user asks a question unrelated to the
-    Codingo platform the system prompt instructs the model to refuse
-    politely.
-    Parameters
-    ----------
-    query: str
-        The user's input message.
-    Returns
-    -------
-    str
-        The assistant's reply.
-    """
-    # Ensure the embedding model and vector store are ready.
     init_chatbot()
     init_hf_model()
     embedder = _chatbot_embedder
     collection = _chatbot_collection
-    # Compute embedding for the query and retrieve the top three matching
-    # context chunks.  Chroma returns a list of documents for each query.
     query_embedding = embedder.encode([query])[0]
     results = collection.query(query_embeddings=[query_embedding], n_results=3)
-    retrieved_docs = results.get('documents', [[]])[0] if results else []
     context = "\n".join(retrieved_docs)
-    # Construct the system prompt.  This instruction encourages the model to
-    # answer only questions related to the context and to decline otherwise.
     system_prompt = (
         "You are a helpful assistant for the Codingo website. "
-        "Only answer questions that are directly relevant to the context provided. "
-        "If the user asks anything unrelated, politely refuse by saying: "
-        "\"I'm only trained to answer questions about the Codingo platform.\""
     )
-    # Compose the complete prompt with context and user question.  Including
-    # the system prompt inline helps guide smaller conversational models.
     prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"
-    # Generate a response using the Hugging Face model.  The global model
-    # variables are guaranteed to be initialised by ``init_hf_model()``.
-    model = _hf_model
-    tokenizer = _hf_tokenizer
-    device = model.device
-    # Encode the prompt and perform generation.  ``generate`` will
-    # automatically use the model's device (CPU or GPU).  We limit the
-    # response length to 200 tokens to keep answers concise.
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    output_ids = model.generate(
-        **inputs,
-        max_length=200,
-        num_beams=1,
-        do_sample=False,
-        early_stopping=True
-    )
-    reply = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # The reply may include the prompt prefix; extract the generated answer
-    # following the original prompt.  If the model echoes the prompt, we
-    # remove the prompt part to return only the answer.
-    if reply.startswith(prompt):
-        reply = reply[len(prompt):]
-    return reply.strip()
 # Initialize Flask app
 app = Flask(

 _chatbot_embedder = None
 _chatbot_collection = None
+def init_hf_model() -> None:
+    """Initialise the Hugging Face conversational model and tokenizer."""
+    global _hf_model, _hf_tokenizer
+    if _hf_model is not None and _hf_tokenizer is not None:
         return
+    from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+    import torch
+    model_name = "facebook/blenderbot-400M-distill"
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+    _hf_model = model
+    _hf_tokenizer = tokenizer
 def get_chatbot_response(query: str) -> str:
+    """Generate a reply to the user's query using Chroma + Hugging Face model."""
     init_chatbot()
     init_hf_model()
+    # Safety: prevent empty input
+    if not query or not query.strip():
+        return "Please type a question about the Codingo platform."
     embedder = _chatbot_embedder
     collection = _chatbot_collection
+    model = _hf_model
+    tokenizer = _hf_tokenizer
+    device = model.device
+    # Retrieve context from Chroma
     query_embedding = embedder.encode([query])[0]
     results = collection.query(query_embeddings=[query_embedding], n_results=3)
+    retrieved_docs = results.get("documents", [[]])[0] if results else []
     context = "\n".join(retrieved_docs)
+    # System instruction
     system_prompt = (
         "You are a helpful assistant for the Codingo website. "
+        "Only answer questions relevant to the context provided. "
+        "If unrelated, reply: 'I'm only trained to answer questions about the Codingo platform.'"
     )
     prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"
+    # ✅ Safe tokenization with truncation to avoid CUDA indexing issues
+    inputs = tokenizer(
+        prompt,
+        return_tensors="pt",
+        truncation=True,
+        max_length=256,  # Prevents long inputs
+        padding=True
+    ).to(device)
+    try:
+        output_ids = model.generate(
+            **inputs,
+            max_length=200,
+            num_beams=3,
+            do_sample=False,
+            early_stopping=True
+        )
+        reply = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        if reply.startswith(prompt):
+            reply = reply[len(prompt):]
+        return reply.strip()
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
 # Initialize Flask app
 app = Flask(