Spaces:
Paused
Paused
Commit
·
d8529bc
1
Parent(s):
40ace38
chatbot integrated
Browse files- app.py +176 -1
- backend/templates/base.html +87 -0
- requirements.txt +12 -0
app.py
CHANGED
@@ -12,7 +12,7 @@ safe_instance_path = "/tmp/flask_instance"
|
|
12 |
# Create the safe instance path after imports
|
13 |
os.makedirs(safe_instance_path, exist_ok=True)
|
14 |
|
15 |
-
from flask import Flask, render_template, redirect, url_for, flash, request
|
16 |
from flask_login import LoginManager, login_required, current_user
|
17 |
from werkzeug.utils import secure_filename
|
18 |
import sys
|
@@ -31,6 +31,158 @@ from backend.routes.interview_api import interview_api
|
|
31 |
# Import additional utilities
|
32 |
import re
|
33 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# Initialize Flask app
|
35 |
app = Flask(
|
36 |
__name__,
|
@@ -178,6 +330,29 @@ def my_applications():
|
|
178 |
).order_by(Application.date_applied.desc()).all()
|
179 |
return render_template('my_applications.html', applications=applications)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
@app.route('/parse_resume', methods=['POST'])
|
182 |
def parse_resume():
|
183 |
file = request.files.get('resume')
|
|
|
12 |
# Create the safe instance path after imports
|
13 |
os.makedirs(safe_instance_path, exist_ok=True)
|
14 |
|
15 |
+
from flask import Flask, render_template, redirect, url_for, flash, request, jsonify
|
16 |
from flask_login import LoginManager, login_required, current_user
|
17 |
from werkzeug.utils import secure_filename
|
18 |
import sys
|
|
|
31 |
# Import additional utilities
|
32 |
import re
|
33 |
import json
|
34 |
+
|
35 |
+
# -----------------------------------------------------------------------------
|
36 |
+
# Chatbot setup
|
37 |
+
#
|
38 |
+
# The chatbot feature uses a local vector database (Chroma) to search the
|
39 |
+
# ``chatbot/chatbot.txt`` knowledge base and then calls the Groq API via the
|
40 |
+
# OpenAI client. To avoid the expensive model and database initialisation on
|
41 |
+
# every request, we lazily load the embeddings and collection the first time
|
42 |
+
# a chat query is processed. Subsequent requests reuse the same global
|
43 |
+
# objects. See ``init_chatbot()`` and ``get_chatbot_response()`` below for
|
44 |
+
# implementation details.
|
45 |
+
|
46 |
+
# Paths for the chatbot knowledge base and persistent vector store. We
|
47 |
+
# compute these relative to the current file so that the app can be deployed
|
48 |
+
# anywhere without needing to change configuration. The ``chroma_db``
|
49 |
+
# directory will be created automatically by the Chroma client if it does not
|
50 |
+
# exist.
|
51 |
+
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
52 |
+
CHATBOT_DB_DIR = os.path.join(current_dir, 'chatbot', 'chroma_db')
|
53 |
+
|
54 |
+
# API credentials for Groq. These values mirror those in the standalone
|
55 |
+
# ``chatbot/chatbot.py`` script. If you need to update your API key or
|
56 |
+
# model name, modify these constants. The API key is public in this
|
57 |
+
# repository purely for demonstration purposes; in a real deployment it
|
58 |
+
# should be stored securely (e.g. via environment variables or Secrets).
|
59 |
+
GROQ_API_KEY = "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"
|
60 |
+
GROQ_MODEL = "llama3-8b-8192"
|
61 |
+
|
62 |
+
# Global objects used by the chatbot. They remain ``None`` until
|
63 |
+
# ``init_chatbot()`` runs. After initialisation, ``_chatbot_embedder`` holds
|
64 |
+
# the SentenceTransformer model and ``_chatbot_collection`` is the Chroma
|
65 |
+
# collection with embedded knowledge base documents. A separate import of
|
66 |
+
# the OpenAI client is performed in ``get_chatbot_response()`` to avoid
|
67 |
+
# unintentional import side effects at module import time.
|
68 |
+
_chatbot_embedder = None
|
69 |
+
_chatbot_collection = None
|
70 |
+
|
71 |
+
def init_chatbot() -> None:
|
72 |
+
"""Initialise the chatbot embedding model and vector database.
|
73 |
+
|
74 |
+
This function is designed to be idempotent: it only performs the heavy
|
75 |
+
initialisation steps once. Subsequent calls will return immediately if
|
76 |
+
the global variables are already populated. The knowledge base is read
|
77 |
+
from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
|
78 |
+
using a lightweight sentence transformer. The resulting embeddings are
|
79 |
+
stored in a Chroma collection located at ``CHATBOT_DB_DIR``. We set
|
80 |
+
``anonymized_telemetry=False`` to prevent any external network calls from
|
81 |
+
the Chroma client.
|
82 |
+
"""
|
83 |
+
global _chatbot_embedder, _chatbot_collection
|
84 |
+
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
85 |
+
return
|
86 |
+
# Perform imports locally to avoid slowing down application startup. These
|
87 |
+
# libraries are heavy and only needed when the chatbot is used.
|
88 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
89 |
+
from sentence_transformers import SentenceTransformer
|
90 |
+
import chromadb
|
91 |
+
from chromadb.config import Settings
|
92 |
+
|
93 |
+
# Ensure the persist directory exists. Chroma will create it if missing,
|
94 |
+
# but explicitly creating it avoids permission errors on some platforms.
|
95 |
+
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
96 |
+
|
97 |
+
# Read the raw FAQ text and split into overlapping chunks to improve
|
98 |
+
# retrieval granularity. The chunk size and overlap are tuned to
|
99 |
+
# accommodate the relatively small knowledge base.
|
100 |
+
with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
|
101 |
+
text = f.read()
|
102 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
103 |
+
docs = [doc.strip() for doc in splitter.split_text(text)]
|
104 |
+
|
105 |
+
# Load the sentence transformer. This model is small and runs quickly on
|
106 |
+
# CPU. If you wish to change the model, update the name here.
|
107 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
108 |
+
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
109 |
+
|
110 |
+
# Initialise Chroma with an on‑disk persistent store. If the collection
|
111 |
+
# already exists and contains all documents, the add operation below will
|
112 |
+
# silently merge duplicates.
|
113 |
+
client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
|
114 |
+
collection = client.get_or_create_collection('chatbot')
|
115 |
+
ids = [f'doc_{i}' for i in range(len(docs))]
|
116 |
+
try:
|
117 |
+
# Attempt to query an existing document to see if the collection is
|
118 |
+
# populated. If this fails, we'll proceed to add all documents.
|
119 |
+
existing = collection.get(ids=ids[:1])
|
120 |
+
if not existing.get('documents'):
|
121 |
+
raise ValueError('No documents in collection')
|
122 |
+
except Exception:
|
123 |
+
collection.add(documents=docs, embeddings=embeddings, ids=ids)
|
124 |
+
|
125 |
+
_chatbot_embedder = embedder
|
126 |
+
_chatbot_collection = collection
|
127 |
+
|
128 |
+
def get_chatbot_response(query: str) -> str:
|
129 |
+
"""Generate a reply to the user's query using the knowledge base and Groq API.
|
130 |
+
|
131 |
+
The function first calls ``init_chatbot()`` to ensure that the embedding
|
132 |
+
model and Chroma collection are loaded. It then embeds the user's query
|
133 |
+
and retrieves the top three most relevant context chunks via a nearest
|
134 |
+
neighbour search. These chunks are concatenated and passed to the
|
135 |
+
Groq API via the OpenAI client. The system prompt constrains the model
|
136 |
+
to only answer questions about Codingo; for unrelated queries it will
|
137 |
+
politely decline to answer. Any exceptions during the API call are
|
138 |
+
propagated to the caller.
|
139 |
+
|
140 |
+
Parameters
|
141 |
+
----------
|
142 |
+
query: str
|
143 |
+
The user's input message.
|
144 |
+
|
145 |
+
Returns
|
146 |
+
-------
|
147 |
+
str
|
148 |
+
The assistant's reply.
|
149 |
+
"""
|
150 |
+
init_chatbot()
|
151 |
+
# Local imports to avoid pulling heavy dependencies on module import.
|
152 |
+
import openai
|
153 |
+
embedder = _chatbot_embedder
|
154 |
+
collection = _chatbot_collection
|
155 |
+
|
156 |
+
query_embedding = embedder.encode([query])[0]
|
157 |
+
results = collection.query(query_embeddings=[query_embedding], n_results=3)
|
158 |
+
retrieved_docs = results['documents'][0]
|
159 |
+
context = "\n".join(retrieved_docs)
|
160 |
+
|
161 |
+
system_prompt = (
|
162 |
+
"You are a helpful assistant for the Codingo website. "
|
163 |
+
"Only answer questions that are directly relevant to the context provided. "
|
164 |
+
"If the user asks anything unrelated, politely refuse by saying: "
|
165 |
+
"\"I'm only trained to answer questions about the Codingo platform.\""
|
166 |
+
)
|
167 |
+
user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
|
168 |
+
|
169 |
+
# Configure the OpenAI client to talk to the Groq API. The base URL is
|
170 |
+
# set here rather than globally to avoid interfering with other parts of
|
171 |
+
# the application that might use OpenAI for different providers.
|
172 |
+
openai.api_key = GROQ_API_KEY
|
173 |
+
openai.api_base = "https://api.groq.com/openai/v1"
|
174 |
+
|
175 |
+
completion = openai.ChatCompletion.create(
|
176 |
+
model=GROQ_MODEL,
|
177 |
+
messages=[
|
178 |
+
{"role": "system", "content": system_prompt},
|
179 |
+
{"role": "user", "content": user_prompt},
|
180 |
+
],
|
181 |
+
max_tokens=200,
|
182 |
+
temperature=0.3,
|
183 |
+
)
|
184 |
+
|
185 |
+
return completion['choices'][0]['message']['content'].strip()
|
186 |
# Initialize Flask app
|
187 |
app = Flask(
|
188 |
__name__,
|
|
|
330 |
).order_by(Application.date_applied.desc()).all()
|
331 |
return render_template('my_applications.html', applications=applications)
|
332 |
|
333 |
+
# -----------------------------------------------------------------------------
|
334 |
+
# Chatbot API endpoint
|
335 |
+
#
|
336 |
+
# This route receives a JSON payload containing a ``message`` field from the
|
337 |
+
# front‑end chat widget. It validates the input, invokes the chatbot
|
338 |
+
# response function and returns a JSON response. Any errors are surfaced
|
339 |
+
# as a 400 or 500 response with an ``error`` message field.
|
340 |
+
@app.route('/chatbot', methods=['POST'])
|
341 |
+
def chatbot_endpoint():
|
342 |
+
data = request.get_json(silent=True) or {}
|
343 |
+
user_input = str(data.get('message', '')).strip()
|
344 |
+
if not user_input:
|
345 |
+
return jsonify({"error": "Empty message"}), 400
|
346 |
+
try:
|
347 |
+
reply = get_chatbot_response(user_input)
|
348 |
+
return jsonify({"response": reply})
|
349 |
+
except Exception as exc:
|
350 |
+
# Log the exception to stderr for debugging in the console. In a
|
351 |
+
# production setting you might want to log this to a proper logging
|
352 |
+
# facility instead.
|
353 |
+
print(f"Chatbot error: {exc}", file=sys.stderr)
|
354 |
+
return jsonify({"error": str(exc)}), 500
|
355 |
+
|
356 |
@app.route('/parse_resume', methods=['POST'])
|
357 |
def parse_resume():
|
358 |
file = request.files.get('resume')
|
backend/templates/base.html
CHANGED
@@ -859,5 +859,92 @@
|
|
859 |
</div>
|
860 |
</div>
|
861 |
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
862 |
</body>
|
863 |
</html>
|
|
|
859 |
</div>
|
860 |
</div>
|
861 |
</footer>
|
862 |
+
|
863 |
+
{# -------------------------------------------------------------------------
|
864 |
+
Chatbot UI scripts and styles
|
865 |
+
|
866 |
+
The following script powers the floating chatbot widget located at the
|
867 |
+
bottom right of every page. When the user clicks the 💬 button, the
|
868 |
+
widget toggles visibility. Pressing Enter in the input box sends the
|
869 |
+
message to the `/chatbot` endpoint defined in ``app.py``. Both user
|
870 |
+
and bot messages are appended to the conversation pane with simple
|
871 |
+
styling defined below. Jinja's ``url_for`` helper is used to
|
872 |
+
dynamically generate the correct path to the endpoint at render time.
|
873 |
+
#}
|
874 |
+
<script type="text/javascript">
|
875 |
+
function toggleChatbot() {
|
876 |
+
const box = document.getElementById('chatbot-box');
|
877 |
+
if (!box) return;
|
878 |
+
// Toggle between flex (visible) and none (hidden)
|
879 |
+
box.style.display = (box.style.display === 'flex') ? 'none' : 'flex';
|
880 |
+
}
|
881 |
+
|
882 |
+
function sendChat(event) {
|
883 |
+
if (event.key === 'Enter') {
|
884 |
+
event.preventDefault();
|
885 |
+
const input = document.getElementById('chat-input');
|
886 |
+
const message = input.value.trim();
|
887 |
+
if (!message) return;
|
888 |
+
appendChatMessage(message, 'user');
|
889 |
+
input.value = '';
|
890 |
+
fetch("{{ url_for('chatbot_endpoint') }}", {
|
891 |
+
method: 'POST',
|
892 |
+
headers: { 'Content-Type': 'application/json' },
|
893 |
+
body: JSON.stringify({ message: message })
|
894 |
+
}).then(response => response.json())
|
895 |
+
.then(data => {
|
896 |
+
if (data.response) {
|
897 |
+
appendChatMessage(data.response, 'bot');
|
898 |
+
} else {
|
899 |
+
appendChatMessage(data.error || 'Error occurred.', 'bot');
|
900 |
+
}
|
901 |
+
}).catch(() => {
|
902 |
+
appendChatMessage('Network error.', 'bot');
|
903 |
+
});
|
904 |
+
}
|
905 |
+
}
|
906 |
+
|
907 |
+
function appendChatMessage(text, sender) {
|
908 |
+
const container = document.getElementById('chat-messages');
|
909 |
+
if (!container) return;
|
910 |
+
const wrapper = document.createElement('div');
|
911 |
+
wrapper.className = sender === 'user' ? 'user-message' : 'bot-message';
|
912 |
+
const bubble = document.createElement('div');
|
913 |
+
bubble.className = sender === 'user' ? 'user-bubble' : 'bot-bubble';
|
914 |
+
bubble.textContent = text;
|
915 |
+
wrapper.appendChild(bubble);
|
916 |
+
container.appendChild(wrapper);
|
917 |
+
container.scrollTop = container.scrollHeight;
|
918 |
+
}
|
919 |
+
</script>
|
920 |
+
<style>
|
921 |
+
/* Chat message styling for user and bot */
|
922 |
+
#chat-messages .user-message {
|
923 |
+
display: flex;
|
924 |
+
justify-content: flex-end;
|
925 |
+
margin-bottom: 8px;
|
926 |
+
}
|
927 |
+
#chat-messages .bot-message {
|
928 |
+
display: flex;
|
929 |
+
justify-content: flex-start;
|
930 |
+
margin-bottom: 8px;
|
931 |
+
}
|
932 |
+
#chat-messages .user-bubble {
|
933 |
+
background-color: #4caf50;
|
934 |
+
color: #ffffff;
|
935 |
+
padding: 8px 12px;
|
936 |
+
border-radius: 12px;
|
937 |
+
max-width: 80%;
|
938 |
+
word-wrap: break-word;
|
939 |
+
}
|
940 |
+
#chat-messages .bot-bubble {
|
941 |
+
background-color: #f1f0f0;
|
942 |
+
color: #000000;
|
943 |
+
padding: 8px 12px;
|
944 |
+
border-radius: 12px;
|
945 |
+
max-width: 80%;
|
946 |
+
word-wrap: break-word;
|
947 |
+
}
|
948 |
+
</style>
|
949 |
</body>
|
950 |
</html>
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
|
|
|
2 |
flask
|
3 |
flask_login
|
4 |
flask_sqlalchemy
|
@@ -46,6 +47,17 @@ edge-tts==6.1.2
|
|
46 |
gunicorn
|
47 |
python-dotenv
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
# Audio format conversion (critical for WebM/WAV handling)
|
50 |
pydub>=0.25.1
|
51 |
|
|
|
1 |
|
2 |
+
|
3 |
flask
|
4 |
flask_login
|
5 |
flask_sqlalchemy
|
|
|
47 |
gunicorn
|
48 |
python-dotenv
|
49 |
|
50 |
+
# --- Chatbot Dependencies ---
|
51 |
+
# The chatbot feature relies on a vector database and external API calls via the
|
52 |
+
# OpenAI client. ``chromadb`` provides a simple embedding store for semantic
|
53 |
+
# search over the knowledge base stored in ``chatbot/chatbot.txt``. ``openai``
|
54 |
+
# is required to communicate with the Groq API endpoint (which is compatible
|
55 |
+
# with the OpenAI client). ``flask-cors`` allows cross‑origin requests if we
|
56 |
+
# decide to decouple the chat interface in the future.
|
57 |
+
chromadb>=0.4.0
|
58 |
+
openai>=1.8.0
|
59 |
+
flask-cors>=4.0.0
|
60 |
+
|
61 |
# Audio format conversion (critical for WebM/WAV handling)
|
62 |
pydub>=0.25.1
|
63 |
|