Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on Jul 30

Commit

0bd189c

1 Parent(s): 9019090

updated

Browse files

Files changed (4) hide show

app.py +97 -43
backend/templates/base.html +21 -0
chatbot/requirements.txt +1 -2
requirements.txt +7 -8

app.py CHANGED Viewed

@@ -54,13 +54,59 @@ import shutil
 shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
 CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
 CHATBOT_DB_DIR = "/tmp/chroma_db"
-# API credentials for Groq.  These values mirror those in the standalone
-# ``chatbot/chatbot.py`` script.  If you need to update your API key or
-# model name, modify these constants.  The API key is public in this
-# repository purely for demonstration purposes; in a real deployment it
-# should be stored securely (e.g. via environment variables or Secrets).
-GROQ_API_KEY = "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"
-GROQ_MODEL = "llama3-8b-8192"
 # Global objects used by the chatbot.  They remain ``None`` until
 # ``init_chatbot()`` runs.  After initialisation, ``_chatbot_embedder`` holds
@@ -129,16 +175,18 @@ def init_chatbot() -> None:
     _chatbot_collection = collection
 def get_chatbot_response(query: str) -> str:
-    """Generate a reply to the user's query using the knowledge base and Groq API.
-    The function first calls ``init_chatbot()`` to ensure that the embedding
-    model and Chroma collection are loaded.  It then embeds the user's query
-    and retrieves the top three most relevant context chunks via a nearest
-    neighbour search.  These chunks are concatenated and passed to the
-    Groq API via the OpenAI client.  The system prompt constrains the model
-    to only answer questions about Codingo; for unrelated queries it will
-    politely decline to answer.  Any exceptions during the API call are
-    propagated to the caller.
     Parameters
     ----------
@@ -150,45 +198,51 @@ def get_chatbot_response(query: str) -> str:
     str
         The assistant's reply.
     """
     init_chatbot()
-    # Local imports to avoid pulling heavy dependencies on module import.
-    import openai
     embedder = _chatbot_embedder
     collection = _chatbot_collection
     query_embedding = embedder.encode([query])[0]
     results = collection.query(query_embeddings=[query_embedding], n_results=3)
-    retrieved_docs = results['documents'][0]
     context = "\n".join(retrieved_docs)
     system_prompt = (
         "You are a helpful assistant for the Codingo website. "
         "Only answer questions that are directly relevant to the context provided. "
         "If the user asks anything unrelated, politely refuse by saying: "
         "\"I'm only trained to answer questions about the Codingo platform.\""
     )
-    user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
-    # Configure the OpenAI client to talk to the Groq API.  The base URL is
-    # set here rather than globally to avoid interfering with other parts of
-    # the application that might use OpenAI for different providers.
-    openai.api_key = GROQ_API_KEY
-    openai.api_base = "https://api.groq.com/openai/v1"
-    from openai import OpenAI
-    client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
-    completion = client.chat.completions.create(
-        model=GROQ_MODEL,
-        messages=[
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ],
-        max_tokens=200,
-        temperature=0.3,
     )
-    return completion.choices[0].message["content"].strip()
 # Initialize Flask app
 app = Flask(

 shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
 CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
 CHATBOT_DB_DIR = "/tmp/chroma_db"
+# -----------------------------------------------------------------------------
+# Hugging Face model configuration
+#
+# The original chatbot implementation sent queries to the Groq API via the
+# OpenAI client.  To remove that dependency we now load a small conversational
+# model from Hugging Face.  ``HF_MODEL_NAME`` defines which model to use.  The
+# default value, ``facebook/blenderbot-400M-distill``, provides a good
+# balance between quality and resource consumption and is available on
+# Hugging Face without requiring authentication.  Should you wish to swap to
+# another conversational model (e.g. ``microsoft/DialoGPT-medium``), update
+# this constant accordingly.  The model and tokenizer are loaded lazily in
+# ``init_hf_model()`` to avoid impacting application startup time.
+HF_MODEL_NAME = "facebook/blenderbot-400M-distill"
+# Global Hugging Face model and tokenizer.  These variables remain ``None``
+# until ``init_hf_model()`` is called.  They are reused across all chatbot
+# requests to prevent repeatedly loading the large model into memory.
+_hf_model = None
+_hf_tokenizer = None
+def init_hf_model() -> None:
+    """Initialise the Hugging Face conversational model and tokenizer.
+    Loading large Transformer models can be expensive.  This helper ensures
+    that we only perform the download and model initialisation once.  On
+    subsequent calls the function returns immediately if the model and
+    tokenizer are already loaded.  The model is moved to GPU if one is
+    available; otherwise it will run on the CPU.  Any import of heavy
+    dependencies such as ``transformers`` or ``torch`` is performed inside
+    this function to keep the global import section lightweight.
+    """
+    global _hf_model, _hf_tokenizer
+    if _hf_model is not None and _hf_tokenizer is not None:
+        return
+    # Local imports to avoid pulling heavy dependencies during module import.
+    from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+    import torch
+    # Determine execution device.  Prefer CUDA if available; otherwise
+    # fallback to CPU.  The application will run correctly on CPU-only
+    # systems albeit with higher latency.
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load tokenizer and model.  The model weights will be downloaded the
+    # first time this function runs.  Hugging Face caches models under
+    # ``HF_HOME`` / ``TRANSFORMERS_CACHE`` which are set at the top of
+    # this file to a writable temporary directory.
+    tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)
+    model = AutoModelForSeq2SeqLM.from_pretrained(HF_MODEL_NAME)
+    model.to(device)
+    _hf_model = model
+    _hf_tokenizer = tokenizer
 # Global objects used by the chatbot.  They remain ``None`` until
 # ``init_chatbot()`` runs.  After initialisation, ``_chatbot_embedder`` holds
     _chatbot_collection = collection
 def get_chatbot_response(query: str) -> str:
+    """Generate a reply to the user's query using the knowledge base and a Hugging Face model.
+    This function performs a two‑stage process to answer user questions.  First
+    it ensures that the vector store and embedder are available via
+    ``init_chatbot()``, then embeds the query to retrieve the most relevant
+    context chunks from ``chatbot.txt`` using Chroma.  Second, it calls
+    ``init_hf_model()`` to lazily load a conversational model from Hugging
+    Face.  The retrieved context, together with a system instruction,
+    constitute the prompt for the model.  The model is then run to
+    generate an answer.  If the user asks a question unrelated to the
+    Codingo platform the system prompt instructs the model to refuse
+    politely.
     Parameters
     ----------
     str
         The assistant's reply.
     """
+    # Ensure the embedding model and vector store are ready.
     init_chatbot()
+    init_hf_model()
     embedder = _chatbot_embedder
     collection = _chatbot_collection
+    # Compute embedding for the query and retrieve the top three matching
+    # context chunks.  Chroma returns a list of documents for each query.
     query_embedding = embedder.encode([query])[0]
     results = collection.query(query_embeddings=[query_embedding], n_results=3)
+    retrieved_docs = results.get('documents', [[]])[0] if results else []
     context = "\n".join(retrieved_docs)
+    # Construct the system prompt.  This instruction encourages the model to
+    # answer only questions related to the context and to decline otherwise.
     system_prompt = (
         "You are a helpful assistant for the Codingo website. "
         "Only answer questions that are directly relevant to the context provided. "
         "If the user asks anything unrelated, politely refuse by saying: "
         "\"I'm only trained to answer questions about the Codingo platform.\""
     )
+    # Compose the complete prompt with context and user question.  Including
+    # the system prompt inline helps guide smaller conversational models.
+    prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"
+    # Generate a response using the Hugging Face model.  The global model
+    # variables are guaranteed to be initialised by ``init_hf_model()``.
+    model = _hf_model
+    tokenizer = _hf_tokenizer
+    device = model.device
+    # Encode the prompt and perform generation.  ``generate`` will
+    # automatically use the model's device (CPU or GPU).  We limit the
+    # response length to 200 tokens to keep answers concise.
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    output_ids = model.generate(
+        **inputs,
+        max_length=200,
+        num_beams=1,
+        do_sample=False,
+        early_stopping=True
     )
+    reply = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # The reply may include the prompt prefix; extract the generated answer
+    # following the original prompt.  If the model echoes the prompt, we
+    # remove the prompt part to return only the answer.
+    if reply.startswith(prompt):
+        reply = reply[len(prompt):]
+    return reply.strip()
 # Initialize Flask app
 app = Flask(

backend/templates/base.html CHANGED Viewed

@@ -804,6 +804,7 @@
     position: fixed;
     bottom: 80px;
     right: 20px;
     width: 300px;
     height: 400px;
     background: white;
@@ -829,6 +830,26 @@
     max-height: 300px;
   }
   #chat-input {
     border: none;
     border-top: 1px solid #ccc;

     position: fixed;
     bottom: 80px;
     right: 20px;
+    /* Default dimensions for larger screens */
     width: 300px;
     height: 400px;
     background: white;
     max-height: 300px;
   }
+  /* Responsive adjustments for small screens */
+  @media (max-width: 600px) {
+    #chatbot-box {
+      width: 90vw;
+      height: 60vh;
+      bottom: 70px;
+      right: 5vw;
+    }
+    #chat-messages {
+      max-height: calc(60vh - 100px);
+    }
+  }
+  @media (max-width: 400px) {
+    #chatbot-toggle {
+      bottom: 10px;
+      right: 10px;
+      padding: 10px 12px;
+    }
+  }
   #chat-input {
     border: none;
     border-top: 1px solid #ccc;

chatbot/requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
 flask
-flask-cors
-groq


1	flask
2	+ flask-cors

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 flask
 flask_login
 flask_sqlalchemy
@@ -48,15 +49,13 @@ gunicorn
 python-dotenv
 # --- Chatbot Dependencies ---
-# The chatbot feature relies on a vector database and external API calls via the
-# OpenAI client.  ``chromadb`` provides a simple embedding store for semantic
-# search over the knowledge base stored in ``chatbot/chatbot.txt``.  ``openai``
-# is required to communicate with the Groq API endpoint (which is compatible
-# with the OpenAI client).  ``flask-cors`` allows cross‑origin requests if we
-# decide to decouple the chat interface in the future.
 chromadb>=0.4.0
-# openai>=1.8.0
-openai==0.28
 flask-cors>=4.0.0
 # Audio format conversion (critical for WebM/WAV handling)

 flask
 flask_login
 flask_sqlalchemy
 python-dotenv
 # --- Chatbot Dependencies ---
+# The chatbot feature relies on a vector database for semantic search over
+# the knowledge base stored in ``chatbot/chatbot.txt``.  ``chromadb`` provides
+# this capability.  We removed the OpenAI dependency in favour of a local
+# Hugging Face model, so no openai package is required.  ``flask-cors`` is
+# retained to allow cross‑origin requests should the chat UI be decoupled in
+# the future.
 chromadb>=0.4.0
 flask-cors>=4.0.0
 # Audio format conversion (critical for WebM/WAV handling)