Spaces:
Paused
Paused
Commit
·
d8529bc
1
Parent(s):
40ace38
chatbot integrated
Browse files- app.py +176 -1
- backend/templates/base.html +87 -0
- requirements.txt +12 -0
app.py
CHANGED
|
@@ -12,7 +12,7 @@ safe_instance_path = "/tmp/flask_instance"
|
|
| 12 |
# Create the safe instance path after imports
|
| 13 |
os.makedirs(safe_instance_path, exist_ok=True)
|
| 14 |
|
| 15 |
-
from flask import Flask, render_template, redirect, url_for, flash, request
|
| 16 |
from flask_login import LoginManager, login_required, current_user
|
| 17 |
from werkzeug.utils import secure_filename
|
| 18 |
import sys
|
|
@@ -31,6 +31,158 @@ from backend.routes.interview_api import interview_api
|
|
| 31 |
# Import additional utilities
|
| 32 |
import re
|
| 33 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Initialize Flask app
|
| 35 |
app = Flask(
|
| 36 |
__name__,
|
|
@@ -178,6 +330,29 @@ def my_applications():
|
|
| 178 |
).order_by(Application.date_applied.desc()).all()
|
| 179 |
return render_template('my_applications.html', applications=applications)
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
@app.route('/parse_resume', methods=['POST'])
|
| 182 |
def parse_resume():
|
| 183 |
file = request.files.get('resume')
|
|
|
|
| 12 |
# Create the safe instance path after imports
|
| 13 |
os.makedirs(safe_instance_path, exist_ok=True)
|
| 14 |
|
| 15 |
+
from flask import Flask, render_template, redirect, url_for, flash, request, jsonify
|
| 16 |
from flask_login import LoginManager, login_required, current_user
|
| 17 |
from werkzeug.utils import secure_filename
|
| 18 |
import sys
|
|
|
|
| 31 |
# Import additional utilities
|
| 32 |
import re
|
| 33 |
import json
|
| 34 |
+
|
| 35 |
+
# -----------------------------------------------------------------------------
|
| 36 |
+
# Chatbot setup
|
| 37 |
+
#
|
| 38 |
+
# The chatbot feature uses a local vector database (Chroma) to search the
|
| 39 |
+
# ``chatbot/chatbot.txt`` knowledge base and then calls the Groq API via the
|
| 40 |
+
# OpenAI client. To avoid the expensive model and database initialisation on
|
| 41 |
+
# every request, we lazily load the embeddings and collection the first time
|
| 42 |
+
# a chat query is processed. Subsequent requests reuse the same global
|
| 43 |
+
# objects. See ``init_chatbot()`` and ``get_chatbot_response()`` below for
|
| 44 |
+
# implementation details.
|
| 45 |
+
|
| 46 |
+
# Paths for the chatbot knowledge base and persistent vector store. We
|
| 47 |
+
# compute these relative to the current file so that the app can be deployed
|
| 48 |
+
# anywhere without needing to change configuration. The ``chroma_db``
|
| 49 |
+
# directory will be created automatically by the Chroma client if it does not
|
| 50 |
+
# exist.
|
| 51 |
+
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
| 52 |
+
CHATBOT_DB_DIR = os.path.join(current_dir, 'chatbot', 'chroma_db')
|
| 53 |
+
|
| 54 |
+
# API credentials for Groq. These values mirror those in the standalone
|
| 55 |
+
# ``chatbot/chatbot.py`` script. If you need to update your API key or
|
| 56 |
+
# model name, modify these constants. The API key is public in this
|
| 57 |
+
# repository purely for demonstration purposes; in a real deployment it
|
| 58 |
+
# should be stored securely (e.g. via environment variables or Secrets).
|
| 59 |
+
GROQ_API_KEY = "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"
|
| 60 |
+
GROQ_MODEL = "llama3-8b-8192"
|
| 61 |
+
|
| 62 |
+
# Global objects used by the chatbot. They remain ``None`` until
|
| 63 |
+
# ``init_chatbot()`` runs. After initialisation, ``_chatbot_embedder`` holds
|
| 64 |
+
# the SentenceTransformer model and ``_chatbot_collection`` is the Chroma
|
| 65 |
+
# collection with embedded knowledge base documents. A separate import of
|
| 66 |
+
# the OpenAI client is performed in ``get_chatbot_response()`` to avoid
|
| 67 |
+
# unintentional import side effects at module import time.
|
| 68 |
+
_chatbot_embedder = None
|
| 69 |
+
_chatbot_collection = None
|
| 70 |
+
|
| 71 |
+
def init_chatbot() -> None:
|
| 72 |
+
"""Initialise the chatbot embedding model and vector database.
|
| 73 |
+
|
| 74 |
+
This function is designed to be idempotent: it only performs the heavy
|
| 75 |
+
initialisation steps once. Subsequent calls will return immediately if
|
| 76 |
+
the global variables are already populated. The knowledge base is read
|
| 77 |
+
from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
|
| 78 |
+
using a lightweight sentence transformer. The resulting embeddings are
|
| 79 |
+
stored in a Chroma collection located at ``CHATBOT_DB_DIR``. We set
|
| 80 |
+
``anonymized_telemetry=False`` to prevent any external network calls from
|
| 81 |
+
the Chroma client.
|
| 82 |
+
"""
|
| 83 |
+
global _chatbot_embedder, _chatbot_collection
|
| 84 |
+
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
| 85 |
+
return
|
| 86 |
+
# Perform imports locally to avoid slowing down application startup. These
|
| 87 |
+
# libraries are heavy and only needed when the chatbot is used.
|
| 88 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 89 |
+
from sentence_transformers import SentenceTransformer
|
| 90 |
+
import chromadb
|
| 91 |
+
from chromadb.config import Settings
|
| 92 |
+
|
| 93 |
+
# Ensure the persist directory exists. Chroma will create it if missing,
|
| 94 |
+
# but explicitly creating it avoids permission errors on some platforms.
|
| 95 |
+
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
| 96 |
+
|
| 97 |
+
# Read the raw FAQ text and split into overlapping chunks to improve
|
| 98 |
+
# retrieval granularity. The chunk size and overlap are tuned to
|
| 99 |
+
# accommodate the relatively small knowledge base.
|
| 100 |
+
with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
|
| 101 |
+
text = f.read()
|
| 102 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
| 103 |
+
docs = [doc.strip() for doc in splitter.split_text(text)]
|
| 104 |
+
|
| 105 |
+
# Load the sentence transformer. This model is small and runs quickly on
|
| 106 |
+
# CPU. If you wish to change the model, update the name here.
|
| 107 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 108 |
+
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
| 109 |
+
|
| 110 |
+
# Initialise Chroma with an on‑disk persistent store. If the collection
|
| 111 |
+
# already exists and contains all documents, the add operation below will
|
| 112 |
+
# silently merge duplicates.
|
| 113 |
+
client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
|
| 114 |
+
collection = client.get_or_create_collection('chatbot')
|
| 115 |
+
ids = [f'doc_{i}' for i in range(len(docs))]
|
| 116 |
+
try:
|
| 117 |
+
# Attempt to query an existing document to see if the collection is
|
| 118 |
+
# populated. If this fails, we'll proceed to add all documents.
|
| 119 |
+
existing = collection.get(ids=ids[:1])
|
| 120 |
+
if not existing.get('documents'):
|
| 121 |
+
raise ValueError('No documents in collection')
|
| 122 |
+
except Exception:
|
| 123 |
+
collection.add(documents=docs, embeddings=embeddings, ids=ids)
|
| 124 |
+
|
| 125 |
+
_chatbot_embedder = embedder
|
| 126 |
+
_chatbot_collection = collection
|
| 127 |
+
|
| 128 |
+
def get_chatbot_response(query: str) -> str:
|
| 129 |
+
"""Generate a reply to the user's query using the knowledge base and Groq API.
|
| 130 |
+
|
| 131 |
+
The function first calls ``init_chatbot()`` to ensure that the embedding
|
| 132 |
+
model and Chroma collection are loaded. It then embeds the user's query
|
| 133 |
+
and retrieves the top three most relevant context chunks via a nearest
|
| 134 |
+
neighbour search. These chunks are concatenated and passed to the
|
| 135 |
+
Groq API via the OpenAI client. The system prompt constrains the model
|
| 136 |
+
to only answer questions about Codingo; for unrelated queries it will
|
| 137 |
+
politely decline to answer. Any exceptions during the API call are
|
| 138 |
+
propagated to the caller.
|
| 139 |
+
|
| 140 |
+
Parameters
|
| 141 |
+
----------
|
| 142 |
+
query: str
|
| 143 |
+
The user's input message.
|
| 144 |
+
|
| 145 |
+
Returns
|
| 146 |
+
-------
|
| 147 |
+
str
|
| 148 |
+
The assistant's reply.
|
| 149 |
+
"""
|
| 150 |
+
init_chatbot()
|
| 151 |
+
# Local imports to avoid pulling heavy dependencies on module import.
|
| 152 |
+
import openai
|
| 153 |
+
embedder = _chatbot_embedder
|
| 154 |
+
collection = _chatbot_collection
|
| 155 |
+
|
| 156 |
+
query_embedding = embedder.encode([query])[0]
|
| 157 |
+
results = collection.query(query_embeddings=[query_embedding], n_results=3)
|
| 158 |
+
retrieved_docs = results['documents'][0]
|
| 159 |
+
context = "\n".join(retrieved_docs)
|
| 160 |
+
|
| 161 |
+
system_prompt = (
|
| 162 |
+
"You are a helpful assistant for the Codingo website. "
|
| 163 |
+
"Only answer questions that are directly relevant to the context provided. "
|
| 164 |
+
"If the user asks anything unrelated, politely refuse by saying: "
|
| 165 |
+
"\"I'm only trained to answer questions about the Codingo platform.\""
|
| 166 |
+
)
|
| 167 |
+
user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
|
| 168 |
+
|
| 169 |
+
# Configure the OpenAI client to talk to the Groq API. The base URL is
|
| 170 |
+
# set here rather than globally to avoid interfering with other parts of
|
| 171 |
+
# the application that might use OpenAI for different providers.
|
| 172 |
+
openai.api_key = GROQ_API_KEY
|
| 173 |
+
openai.api_base = "https://api.groq.com/openai/v1"
|
| 174 |
+
|
| 175 |
+
completion = openai.ChatCompletion.create(
|
| 176 |
+
model=GROQ_MODEL,
|
| 177 |
+
messages=[
|
| 178 |
+
{"role": "system", "content": system_prompt},
|
| 179 |
+
{"role": "user", "content": user_prompt},
|
| 180 |
+
],
|
| 181 |
+
max_tokens=200,
|
| 182 |
+
temperature=0.3,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
return completion['choices'][0]['message']['content'].strip()
|
| 186 |
# Initialize Flask app
|
| 187 |
app = Flask(
|
| 188 |
__name__,
|
|
|
|
| 330 |
).order_by(Application.date_applied.desc()).all()
|
| 331 |
return render_template('my_applications.html', applications=applications)
|
| 332 |
|
| 333 |
+
# -----------------------------------------------------------------------------
|
| 334 |
+
# Chatbot API endpoint
|
| 335 |
+
#
|
| 336 |
+
# This route receives a JSON payload containing a ``message`` field from the
|
| 337 |
+
# front‑end chat widget. It validates the input, invokes the chatbot
|
| 338 |
+
# response function and returns a JSON response. Any errors are surfaced
|
| 339 |
+
# as a 400 or 500 response with an ``error`` message field.
|
| 340 |
+
@app.route('/chatbot', methods=['POST'])
|
| 341 |
+
def chatbot_endpoint():
|
| 342 |
+
data = request.get_json(silent=True) or {}
|
| 343 |
+
user_input = str(data.get('message', '')).strip()
|
| 344 |
+
if not user_input:
|
| 345 |
+
return jsonify({"error": "Empty message"}), 400
|
| 346 |
+
try:
|
| 347 |
+
reply = get_chatbot_response(user_input)
|
| 348 |
+
return jsonify({"response": reply})
|
| 349 |
+
except Exception as exc:
|
| 350 |
+
# Log the exception to stderr for debugging in the console. In a
|
| 351 |
+
# production setting you might want to log this to a proper logging
|
| 352 |
+
# facility instead.
|
| 353 |
+
print(f"Chatbot error: {exc}", file=sys.stderr)
|
| 354 |
+
return jsonify({"error": str(exc)}), 500
|
| 355 |
+
|
| 356 |
@app.route('/parse_resume', methods=['POST'])
|
| 357 |
def parse_resume():
|
| 358 |
file = request.files.get('resume')
|
backend/templates/base.html
CHANGED
|
@@ -859,5 +859,92 @@
|
|
| 859 |
</div>
|
| 860 |
</div>
|
| 861 |
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 862 |
</body>
|
| 863 |
</html>
|
|
|
|
| 859 |
</div>
|
| 860 |
</div>
|
| 861 |
</footer>
|
| 862 |
+
|
| 863 |
+
{# -------------------------------------------------------------------------
|
| 864 |
+
Chatbot UI scripts and styles
|
| 865 |
+
|
| 866 |
+
The following script powers the floating chatbot widget located at the
|
| 867 |
+
bottom right of every page. When the user clicks the 💬 button, the
|
| 868 |
+
widget toggles visibility. Pressing Enter in the input box sends the
|
| 869 |
+
message to the `/chatbot` endpoint defined in ``app.py``. Both user
|
| 870 |
+
and bot messages are appended to the conversation pane with simple
|
| 871 |
+
styling defined below. Jinja's ``url_for`` helper is used to
|
| 872 |
+
dynamically generate the correct path to the endpoint at render time.
|
| 873 |
+
#}
|
| 874 |
+
<script type="text/javascript">
|
| 875 |
+
function toggleChatbot() {
|
| 876 |
+
const box = document.getElementById('chatbot-box');
|
| 877 |
+
if (!box) return;
|
| 878 |
+
// Toggle between flex (visible) and none (hidden)
|
| 879 |
+
box.style.display = (box.style.display === 'flex') ? 'none' : 'flex';
|
| 880 |
+
}
|
| 881 |
+
|
| 882 |
+
function sendChat(event) {
|
| 883 |
+
if (event.key === 'Enter') {
|
| 884 |
+
event.preventDefault();
|
| 885 |
+
const input = document.getElementById('chat-input');
|
| 886 |
+
const message = input.value.trim();
|
| 887 |
+
if (!message) return;
|
| 888 |
+
appendChatMessage(message, 'user');
|
| 889 |
+
input.value = '';
|
| 890 |
+
fetch("{{ url_for('chatbot_endpoint') }}", {
|
| 891 |
+
method: 'POST',
|
| 892 |
+
headers: { 'Content-Type': 'application/json' },
|
| 893 |
+
body: JSON.stringify({ message: message })
|
| 894 |
+
}).then(response => response.json())
|
| 895 |
+
.then(data => {
|
| 896 |
+
if (data.response) {
|
| 897 |
+
appendChatMessage(data.response, 'bot');
|
| 898 |
+
} else {
|
| 899 |
+
appendChatMessage(data.error || 'Error occurred.', 'bot');
|
| 900 |
+
}
|
| 901 |
+
}).catch(() => {
|
| 902 |
+
appendChatMessage('Network error.', 'bot');
|
| 903 |
+
});
|
| 904 |
+
}
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
function appendChatMessage(text, sender) {
|
| 908 |
+
const container = document.getElementById('chat-messages');
|
| 909 |
+
if (!container) return;
|
| 910 |
+
const wrapper = document.createElement('div');
|
| 911 |
+
wrapper.className = sender === 'user' ? 'user-message' : 'bot-message';
|
| 912 |
+
const bubble = document.createElement('div');
|
| 913 |
+
bubble.className = sender === 'user' ? 'user-bubble' : 'bot-bubble';
|
| 914 |
+
bubble.textContent = text;
|
| 915 |
+
wrapper.appendChild(bubble);
|
| 916 |
+
container.appendChild(wrapper);
|
| 917 |
+
container.scrollTop = container.scrollHeight;
|
| 918 |
+
}
|
| 919 |
+
</script>
|
| 920 |
+
<style>
|
| 921 |
+
/* Chat message styling for user and bot */
|
| 922 |
+
#chat-messages .user-message {
|
| 923 |
+
display: flex;
|
| 924 |
+
justify-content: flex-end;
|
| 925 |
+
margin-bottom: 8px;
|
| 926 |
+
}
|
| 927 |
+
#chat-messages .bot-message {
|
| 928 |
+
display: flex;
|
| 929 |
+
justify-content: flex-start;
|
| 930 |
+
margin-bottom: 8px;
|
| 931 |
+
}
|
| 932 |
+
#chat-messages .user-bubble {
|
| 933 |
+
background-color: #4caf50;
|
| 934 |
+
color: #ffffff;
|
| 935 |
+
padding: 8px 12px;
|
| 936 |
+
border-radius: 12px;
|
| 937 |
+
max-width: 80%;
|
| 938 |
+
word-wrap: break-word;
|
| 939 |
+
}
|
| 940 |
+
#chat-messages .bot-bubble {
|
| 941 |
+
background-color: #f1f0f0;
|
| 942 |
+
color: #000000;
|
| 943 |
+
padding: 8px 12px;
|
| 944 |
+
border-radius: 12px;
|
| 945 |
+
max-width: 80%;
|
| 946 |
+
word-wrap: break-word;
|
| 947 |
+
}
|
| 948 |
+
</style>
|
| 949 |
</body>
|
| 950 |
</html>
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
|
|
|
|
| 2 |
flask
|
| 3 |
flask_login
|
| 4 |
flask_sqlalchemy
|
|
@@ -46,6 +47,17 @@ edge-tts==6.1.2
|
|
| 46 |
gunicorn
|
| 47 |
python-dotenv
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Audio format conversion (critical for WebM/WAV handling)
|
| 50 |
pydub>=0.25.1
|
| 51 |
|
|
|
|
| 1 |
|
| 2 |
+
|
| 3 |
flask
|
| 4 |
flask_login
|
| 5 |
flask_sqlalchemy
|
|
|
|
| 47 |
gunicorn
|
| 48 |
python-dotenv
|
| 49 |
|
| 50 |
+
# --- Chatbot Dependencies ---
|
| 51 |
+
# The chatbot feature relies on a vector database and external API calls via the
|
| 52 |
+
# OpenAI client. ``chromadb`` provides a simple embedding store for semantic
|
| 53 |
+
# search over the knowledge base stored in ``chatbot/chatbot.txt``. ``openai``
|
| 54 |
+
# is required to communicate with the Groq API endpoint (which is compatible
|
| 55 |
+
# with the OpenAI client). ``flask-cors`` allows cross‑origin requests if we
|
| 56 |
+
# decide to decouple the chat interface in the future.
|
| 57 |
+
chromadb>=0.4.0
|
| 58 |
+
openai>=1.8.0
|
| 59 |
+
flask-cors>=4.0.0
|
| 60 |
+
|
| 61 |
# Audio format conversion (critical for WebM/WAV handling)
|
| 62 |
pydub>=0.25.1
|
| 63 |
|