Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 17 days ago

Commit

d8529bc

1 Parent(s): 40ace38

chatbot integrated

Browse files

Files changed (3) hide show

app.py +176 -1
backend/templates/base.html +87 -0
requirements.txt +12 -0

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ safe_instance_path = "/tmp/flask_instance"
 # Create the safe instance path after imports
 os.makedirs(safe_instance_path, exist_ok=True)
-from flask import Flask, render_template, redirect, url_for, flash, request
 from flask_login import LoginManager, login_required, current_user
 from werkzeug.utils import secure_filename
 import sys
@@ -31,6 +31,158 @@ from backend.routes.interview_api import interview_api
 # Import additional utilities
 import re
 import json
 # Initialize Flask app
 app = Flask(
     __name__,
@@ -178,6 +330,29 @@ def my_applications():
     ).order_by(Application.date_applied.desc()).all()
     return render_template('my_applications.html', applications=applications)
 @app.route('/parse_resume', methods=['POST'])
 def parse_resume():
     file = request.files.get('resume')

 # Create the safe instance path after imports
 os.makedirs(safe_instance_path, exist_ok=True)
+from flask import Flask, render_template, redirect, url_for, flash, request, jsonify
 from flask_login import LoginManager, login_required, current_user
 from werkzeug.utils import secure_filename
 import sys
 # Import additional utilities
 import re
 import json
+# -----------------------------------------------------------------------------
+# Chatbot setup
+#
+# The chatbot feature uses a local vector database (Chroma) to search the
+# ``chatbot/chatbot.txt`` knowledge base and then calls the Groq API via the
+# OpenAI client.  To avoid the expensive model and database initialisation on
+# every request, we lazily load the embeddings and collection the first time
+# a chat query is processed.  Subsequent requests reuse the same global
+# objects.  See ``init_chatbot()`` and ``get_chatbot_response()`` below for
+# implementation details.
+# Paths for the chatbot knowledge base and persistent vector store.  We
+# compute these relative to the current file so that the app can be deployed
+# anywhere without needing to change configuration.  The ``chroma_db``
+# directory will be created automatically by the Chroma client if it does not
+# exist.
+CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
+CHATBOT_DB_DIR = os.path.join(current_dir, 'chatbot', 'chroma_db')
+# API credentials for Groq.  These values mirror those in the standalone
+# ``chatbot/chatbot.py`` script.  If you need to update your API key or
+# model name, modify these constants.  The API key is public in this
+# repository purely for demonstration purposes; in a real deployment it
+# should be stored securely (e.g. via environment variables or Secrets).
+GROQ_API_KEY = "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"
+GROQ_MODEL = "llama3-8b-8192"
+# Global objects used by the chatbot.  They remain ``None`` until
+# ``init_chatbot()`` runs.  After initialisation, ``_chatbot_embedder`` holds
+# the SentenceTransformer model and ``_chatbot_collection`` is the Chroma
+# collection with embedded knowledge base documents.  A separate import of
+# the OpenAI client is performed in ``get_chatbot_response()`` to avoid
+# unintentional import side effects at module import time.
+_chatbot_embedder = None
+_chatbot_collection = None
+def init_chatbot() -> None:
+    """Initialise the chatbot embedding model and vector database.
+    This function is designed to be idempotent: it only performs the heavy
+    initialisation steps once.  Subsequent calls will return immediately if
+    the global variables are already populated.  The knowledge base is read
+    from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
+    using a lightweight sentence transformer.  The resulting embeddings are
+    stored in a Chroma collection located at ``CHATBOT_DB_DIR``.  We set
+    ``anonymized_telemetry=False`` to prevent any external network calls from
+    the Chroma client.
+    """
+    global _chatbot_embedder, _chatbot_collection
+    if _chatbot_embedder is not None and _chatbot_collection is not None:
+        return
+    # Perform imports locally to avoid slowing down application startup.  These
+    # libraries are heavy and only needed when the chatbot is used.
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from sentence_transformers import SentenceTransformer
+    import chromadb
+    from chromadb.config import Settings
+    # Ensure the persist directory exists.  Chroma will create it if missing,
+    # but explicitly creating it avoids permission errors on some platforms.
+    os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
+    # Read the raw FAQ text and split into overlapping chunks to improve
+    # retrieval granularity.  The chunk size and overlap are tuned to
+    # accommodate the relatively small knowledge base.
+    with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
+        text = f.read()
+    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
+    docs = [doc.strip() for doc in splitter.split_text(text)]
+    # Load the sentence transformer.  This model is small and runs quickly on
+    # CPU.  If you wish to change the model, update the name here.
+    embedder = SentenceTransformer('all-MiniLM-L6-v2')
+    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
+    # Initialise Chroma with an on‑disk persistent store.  If the collection
+    # already exists and contains all documents, the add operation below will
+    # silently merge duplicates.
+    client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
+    collection = client.get_or_create_collection('chatbot')
+    ids = [f'doc_{i}' for i in range(len(docs))]
+    try:
+        # Attempt to query an existing document to see if the collection is
+        # populated.  If this fails, we'll proceed to add all documents.
+        existing = collection.get(ids=ids[:1])
+        if not existing.get('documents'):
+            raise ValueError('No documents in collection')
+    except Exception:
+        collection.add(documents=docs, embeddings=embeddings, ids=ids)
+    _chatbot_embedder = embedder
+    _chatbot_collection = collection
+def get_chatbot_response(query: str) -> str:
+    """Generate a reply to the user's query using the knowledge base and Groq API.
+    The function first calls ``init_chatbot()`` to ensure that the embedding
+    model and Chroma collection are loaded.  It then embeds the user's query
+    and retrieves the top three most relevant context chunks via a nearest
+    neighbour search.  These chunks are concatenated and passed to the
+    Groq API via the OpenAI client.  The system prompt constrains the model
+    to only answer questions about Codingo; for unrelated queries it will
+    politely decline to answer.  Any exceptions during the API call are
+    propagated to the caller.
+    Parameters
+    ----------
+    query: str
+        The user's input message.
+    Returns
+    -------
+    str
+        The assistant's reply.
+    """
+    init_chatbot()
+    # Local imports to avoid pulling heavy dependencies on module import.
+    import openai
+    embedder = _chatbot_embedder
+    collection = _chatbot_collection
+    query_embedding = embedder.encode([query])[0]
+    results = collection.query(query_embeddings=[query_embedding], n_results=3)
+    retrieved_docs = results['documents'][0]
+    context = "\n".join(retrieved_docs)
+    system_prompt = (
+        "You are a helpful assistant for the Codingo website. "
+        "Only answer questions that are directly relevant to the context provided. "
+        "If the user asks anything unrelated, politely refuse by saying: "
+        "\"I'm only trained to answer questions about the Codingo platform.\""
+    )
+    user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
+    # Configure the OpenAI client to talk to the Groq API.  The base URL is
+    # set here rather than globally to avoid interfering with other parts of
+    # the application that might use OpenAI for different providers.
+    openai.api_key = GROQ_API_KEY
+    openai.api_base = "https://api.groq.com/openai/v1"
+    completion = openai.ChatCompletion.create(
+        model=GROQ_MODEL,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        max_tokens=200,
+        temperature=0.3,
+    )
+    return completion['choices'][0]['message']['content'].strip()
 # Initialize Flask app
 app = Flask(
     __name__,
     ).order_by(Application.date_applied.desc()).all()
     return render_template('my_applications.html', applications=applications)
+# -----------------------------------------------------------------------------
+# Chatbot API endpoint
+#
+# This route receives a JSON payload containing a ``message`` field from the
+# front‑end chat widget.  It validates the input, invokes the chatbot
+# response function and returns a JSON response.  Any errors are surfaced
+# as a 400 or 500 response with an ``error`` message field.
+@app.route('/chatbot', methods=['POST'])
+def chatbot_endpoint():
+    data = request.get_json(silent=True) or {}
+    user_input = str(data.get('message', '')).strip()
+    if not user_input:
+        return jsonify({"error": "Empty message"}), 400
+    try:
+        reply = get_chatbot_response(user_input)
+        return jsonify({"response": reply})
+    except Exception as exc:
+        # Log the exception to stderr for debugging in the console.  In a
+        # production setting you might want to log this to a proper logging
+        # facility instead.
+        print(f"Chatbot error: {exc}", file=sys.stderr)
+        return jsonify({"error": str(exc)}), 500
 @app.route('/parse_resume', methods=['POST'])
 def parse_resume():
     file = request.files.get('resume')

backend/templates/base.html CHANGED Viewed

@@ -859,5 +859,92 @@
         </div>
     </div>
 </footer>
 </body>
 </html>

         </div>
     </div>
 </footer>
+    {# -------------------------------------------------------------------------
+       Chatbot UI scripts and styles
+       The following script powers the floating chatbot widget located at the
+       bottom right of every page.  When the user clicks the 💬 button, the
+       widget toggles visibility.  Pressing Enter in the input box sends the
+       message to the `/chatbot` endpoint defined in ``app.py``.  Both user
+       and bot messages are appended to the conversation pane with simple
+       styling defined below.  Jinja's ``url_for`` helper is used to
+       dynamically generate the correct path to the endpoint at render time.
+    #}
+    <script type="text/javascript">
+      function toggleChatbot() {
+        const box = document.getElementById('chatbot-box');
+        if (!box) return;
+        // Toggle between flex (visible) and none (hidden)
+        box.style.display = (box.style.display === 'flex') ? 'none' : 'flex';
+      }
+      function sendChat(event) {
+        if (event.key === 'Enter') {
+          event.preventDefault();
+          const input = document.getElementById('chat-input');
+          const message = input.value.trim();
+          if (!message) return;
+          appendChatMessage(message, 'user');
+          input.value = '';
+          fetch("{{ url_for('chatbot_endpoint') }}", {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ message: message })
+          }).then(response => response.json())
+            .then(data => {
+              if (data.response) {
+                appendChatMessage(data.response, 'bot');
+              } else {
+                appendChatMessage(data.error || 'Error occurred.', 'bot');
+              }
+            }).catch(() => {
+              appendChatMessage('Network error.', 'bot');
+            });
+        }
+      }
+      function appendChatMessage(text, sender) {
+        const container = document.getElementById('chat-messages');
+        if (!container) return;
+        const wrapper = document.createElement('div');
+        wrapper.className = sender === 'user' ? 'user-message' : 'bot-message';
+        const bubble = document.createElement('div');
+        bubble.className = sender === 'user' ? 'user-bubble' : 'bot-bubble';
+        bubble.textContent = text;
+        wrapper.appendChild(bubble);
+        container.appendChild(wrapper);
+        container.scrollTop = container.scrollHeight;
+      }
+    </script>
+    <style>
+      /* Chat message styling for user and bot */
+      #chat-messages .user-message {
+        display: flex;
+        justify-content: flex-end;
+        margin-bottom: 8px;
+      }
+      #chat-messages .bot-message {
+        display: flex;
+        justify-content: flex-start;
+        margin-bottom: 8px;
+      }
+      #chat-messages .user-bubble {
+        background-color: #4caf50;
+        color: #ffffff;
+        padding: 8px 12px;
+        border-radius: 12px;
+        max-width: 80%;
+        word-wrap: break-word;
+      }
+      #chat-messages .bot-bubble {
+        background-color: #f1f0f0;
+        color: #000000;
+        padding: 8px 12px;
+        border-radius: 12px;
+        max-width: 80%;
+        word-wrap: break-word;
+      }
+    </style>
 </body>
 </html>

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 flask
 flask_login
 flask_sqlalchemy
@@ -46,6 +47,17 @@ edge-tts==6.1.2
 gunicorn
 python-dotenv
 # Audio format conversion (critical for WebM/WAV handling)
 pydub>=0.25.1

 flask
 flask_login
 flask_sqlalchemy
 gunicorn
 python-dotenv
+# --- Chatbot Dependencies ---
+# The chatbot feature relies on a vector database and external API calls via the
+# OpenAI client.  ``chromadb`` provides a simple embedding store for semantic
+# search over the knowledge base stored in ``chatbot/chatbot.txt``.  ``openai``
+# is required to communicate with the Groq API endpoint (which is compatible
+# with the OpenAI client).  ``flask-cors`` allows cross‑origin requests if we
+# decide to decouple the chat interface in the future.
+chromadb>=0.4.0
+openai>=1.8.0
+flask-cors>=4.0.0
 # Audio format conversion (critical for WebM/WAV handling)
 pydub>=0.25.1