husseinelsaadi commited on
Commit
d8529bc
·
1 Parent(s): 40ace38

chatbot integrated

Browse files
Files changed (3) hide show
  1. app.py +176 -1
  2. backend/templates/base.html +87 -0
  3. requirements.txt +12 -0
app.py CHANGED
@@ -12,7 +12,7 @@ safe_instance_path = "/tmp/flask_instance"
12
  # Create the safe instance path after imports
13
  os.makedirs(safe_instance_path, exist_ok=True)
14
 
15
- from flask import Flask, render_template, redirect, url_for, flash, request
16
  from flask_login import LoginManager, login_required, current_user
17
  from werkzeug.utils import secure_filename
18
  import sys
@@ -31,6 +31,158 @@ from backend.routes.interview_api import interview_api
31
  # Import additional utilities
32
  import re
33
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Initialize Flask app
35
  app = Flask(
36
  __name__,
@@ -178,6 +330,29 @@ def my_applications():
178
  ).order_by(Application.date_applied.desc()).all()
179
  return render_template('my_applications.html', applications=applications)
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  @app.route('/parse_resume', methods=['POST'])
182
  def parse_resume():
183
  file = request.files.get('resume')
 
12
  # Create the safe instance path after imports
13
  os.makedirs(safe_instance_path, exist_ok=True)
14
 
15
+ from flask import Flask, render_template, redirect, url_for, flash, request, jsonify
16
  from flask_login import LoginManager, login_required, current_user
17
  from werkzeug.utils import secure_filename
18
  import sys
 
31
  # Import additional utilities
32
  import re
33
  import json
34
+
35
+ # -----------------------------------------------------------------------------
36
+ # Chatbot setup
37
+ #
38
+ # The chatbot feature uses a local vector database (Chroma) to search the
39
+ # ``chatbot/chatbot.txt`` knowledge base and then calls the Groq API via the
40
+ # OpenAI client. To avoid the expensive model and database initialisation on
41
+ # every request, we lazily load the embeddings and collection the first time
42
+ # a chat query is processed. Subsequent requests reuse the same global
43
+ # objects. See ``init_chatbot()`` and ``get_chatbot_response()`` below for
44
+ # implementation details.
45
+
46
+ # Paths for the chatbot knowledge base and persistent vector store. We
47
+ # compute these relative to the current file so that the app can be deployed
48
+ # anywhere without needing to change configuration. The ``chroma_db``
49
+ # directory will be created automatically by the Chroma client if it does not
50
+ # exist.
51
+ CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
52
+ CHATBOT_DB_DIR = os.path.join(current_dir, 'chatbot', 'chroma_db')
53
+
54
+ # API credentials for Groq. These values mirror those in the standalone
55
+ # ``chatbot/chatbot.py`` script. If you need to update your API key or
56
+ # model name, modify these constants. The API key is public in this
57
+ # repository purely for demonstration purposes; in a real deployment it
58
+ # should be stored securely (e.g. via environment variables or Secrets).
59
+ GROQ_API_KEY = "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"
60
+ GROQ_MODEL = "llama3-8b-8192"
61
+
62
+ # Global objects used by the chatbot. They remain ``None`` until
63
+ # ``init_chatbot()`` runs. After initialisation, ``_chatbot_embedder`` holds
64
+ # the SentenceTransformer model and ``_chatbot_collection`` is the Chroma
65
+ # collection with embedded knowledge base documents. A separate import of
66
+ # the OpenAI client is performed in ``get_chatbot_response()`` to avoid
67
+ # unintentional import side effects at module import time.
68
+ _chatbot_embedder = None
69
+ _chatbot_collection = None
70
+
71
+ def init_chatbot() -> None:
72
+ """Initialise the chatbot embedding model and vector database.
73
+
74
+ This function is designed to be idempotent: it only performs the heavy
75
+ initialisation steps once. Subsequent calls will return immediately if
76
+ the global variables are already populated. The knowledge base is read
77
+ from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
78
+ using a lightweight sentence transformer. The resulting embeddings are
79
+ stored in a Chroma collection located at ``CHATBOT_DB_DIR``. We set
80
+ ``anonymized_telemetry=False`` to prevent any external network calls from
81
+ the Chroma client.
82
+ """
83
+ global _chatbot_embedder, _chatbot_collection
84
+ if _chatbot_embedder is not None and _chatbot_collection is not None:
85
+ return
86
+ # Perform imports locally to avoid slowing down application startup. These
87
+ # libraries are heavy and only needed when the chatbot is used.
88
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
89
+ from sentence_transformers import SentenceTransformer
90
+ import chromadb
91
+ from chromadb.config import Settings
92
+
93
+ # Ensure the persist directory exists. Chroma will create it if missing,
94
+ # but explicitly creating it avoids permission errors on some platforms.
95
+ os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
96
+
97
+ # Read the raw FAQ text and split into overlapping chunks to improve
98
+ # retrieval granularity. The chunk size and overlap are tuned to
99
+ # accommodate the relatively small knowledge base.
100
+ with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
101
+ text = f.read()
102
+ splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
103
+ docs = [doc.strip() for doc in splitter.split_text(text)]
104
+
105
+ # Load the sentence transformer. This model is small and runs quickly on
106
+ # CPU. If you wish to change the model, update the name here.
107
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
108
+ embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
109
+
110
+ # Initialise Chroma with an on‑disk persistent store. If the collection
111
+ # already exists and contains all documents, the add operation below will
112
+ # silently merge duplicates.
113
+ client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
114
+ collection = client.get_or_create_collection('chatbot')
115
+ ids = [f'doc_{i}' for i in range(len(docs))]
116
+ try:
117
+ # Attempt to query an existing document to see if the collection is
118
+ # populated. If this fails, we'll proceed to add all documents.
119
+ existing = collection.get(ids=ids[:1])
120
+ if not existing.get('documents'):
121
+ raise ValueError('No documents in collection')
122
+ except Exception:
123
+ collection.add(documents=docs, embeddings=embeddings, ids=ids)
124
+
125
+ _chatbot_embedder = embedder
126
+ _chatbot_collection = collection
127
+
128
+ def get_chatbot_response(query: str) -> str:
129
+ """Generate a reply to the user's query using the knowledge base and Groq API.
130
+
131
+ The function first calls ``init_chatbot()`` to ensure that the embedding
132
+ model and Chroma collection are loaded. It then embeds the user's query
133
+ and retrieves the top three most relevant context chunks via a nearest
134
+ neighbour search. These chunks are concatenated and passed to the
135
+ Groq API via the OpenAI client. The system prompt constrains the model
136
+ to only answer questions about Codingo; for unrelated queries it will
137
+ politely decline to answer. Any exceptions during the API call are
138
+ propagated to the caller.
139
+
140
+ Parameters
141
+ ----------
142
+ query: str
143
+ The user's input message.
144
+
145
+ Returns
146
+ -------
147
+ str
148
+ The assistant's reply.
149
+ """
150
+ init_chatbot()
151
+ # Local imports to avoid pulling heavy dependencies on module import.
152
+ import openai
153
+ embedder = _chatbot_embedder
154
+ collection = _chatbot_collection
155
+
156
+ query_embedding = embedder.encode([query])[0]
157
+ results = collection.query(query_embeddings=[query_embedding], n_results=3)
158
+ retrieved_docs = results['documents'][0]
159
+ context = "\n".join(retrieved_docs)
160
+
161
+ system_prompt = (
162
+ "You are a helpful assistant for the Codingo website. "
163
+ "Only answer questions that are directly relevant to the context provided. "
164
+ "If the user asks anything unrelated, politely refuse by saying: "
165
+ "\"I'm only trained to answer questions about the Codingo platform.\""
166
+ )
167
+ user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
168
+
169
+ # Configure the OpenAI client to talk to the Groq API. The base URL is
170
+ # set here rather than globally to avoid interfering with other parts of
171
+ # the application that might use OpenAI for different providers.
172
+ openai.api_key = GROQ_API_KEY
173
+ openai.api_base = "https://api.groq.com/openai/v1"
174
+
175
+ completion = openai.ChatCompletion.create(
176
+ model=GROQ_MODEL,
177
+ messages=[
178
+ {"role": "system", "content": system_prompt},
179
+ {"role": "user", "content": user_prompt},
180
+ ],
181
+ max_tokens=200,
182
+ temperature=0.3,
183
+ )
184
+
185
+ return completion['choices'][0]['message']['content'].strip()
186
  # Initialize Flask app
187
  app = Flask(
188
  __name__,
 
330
  ).order_by(Application.date_applied.desc()).all()
331
  return render_template('my_applications.html', applications=applications)
332
 
333
+ # -----------------------------------------------------------------------------
334
+ # Chatbot API endpoint
335
+ #
336
+ # This route receives a JSON payload containing a ``message`` field from the
337
+ # front‑end chat widget. It validates the input, invokes the chatbot
338
+ # response function and returns a JSON response. Any errors are surfaced
339
+ # as a 400 or 500 response with an ``error`` message field.
340
+ @app.route('/chatbot', methods=['POST'])
341
+ def chatbot_endpoint():
342
+ data = request.get_json(silent=True) or {}
343
+ user_input = str(data.get('message', '')).strip()
344
+ if not user_input:
345
+ return jsonify({"error": "Empty message"}), 400
346
+ try:
347
+ reply = get_chatbot_response(user_input)
348
+ return jsonify({"response": reply})
349
+ except Exception as exc:
350
+ # Log the exception to stderr for debugging in the console. In a
351
+ # production setting you might want to log this to a proper logging
352
+ # facility instead.
353
+ print(f"Chatbot error: {exc}", file=sys.stderr)
354
+ return jsonify({"error": str(exc)}), 500
355
+
356
  @app.route('/parse_resume', methods=['POST'])
357
  def parse_resume():
358
  file = request.files.get('resume')
backend/templates/base.html CHANGED
@@ -859,5 +859,92 @@
859
  </div>
860
  </div>
861
  </footer>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862
  </body>
863
  </html>
 
859
  </div>
860
  </div>
861
  </footer>
862
+
863
+ {# -------------------------------------------------------------------------
864
+ Chatbot UI scripts and styles
865
+
866
+ The following script powers the floating chatbot widget located at the
867
+ bottom right of every page. When the user clicks the 💬 button, the
868
+ widget toggles visibility. Pressing Enter in the input box sends the
869
+ message to the `/chatbot` endpoint defined in ``app.py``. Both user
870
+ and bot messages are appended to the conversation pane with simple
871
+ styling defined below. Jinja's ``url_for`` helper is used to
872
+ dynamically generate the correct path to the endpoint at render time.
873
+ #}
874
+ <script type="text/javascript">
875
+ function toggleChatbot() {
876
+ const box = document.getElementById('chatbot-box');
877
+ if (!box) return;
878
+ // Toggle between flex (visible) and none (hidden)
879
+ box.style.display = (box.style.display === 'flex') ? 'none' : 'flex';
880
+ }
881
+
882
+ function sendChat(event) {
883
+ if (event.key === 'Enter') {
884
+ event.preventDefault();
885
+ const input = document.getElementById('chat-input');
886
+ const message = input.value.trim();
887
+ if (!message) return;
888
+ appendChatMessage(message, 'user');
889
+ input.value = '';
890
+ fetch("{{ url_for('chatbot_endpoint') }}", {
891
+ method: 'POST',
892
+ headers: { 'Content-Type': 'application/json' },
893
+ body: JSON.stringify({ message: message })
894
+ }).then(response => response.json())
895
+ .then(data => {
896
+ if (data.response) {
897
+ appendChatMessage(data.response, 'bot');
898
+ } else {
899
+ appendChatMessage(data.error || 'Error occurred.', 'bot');
900
+ }
901
+ }).catch(() => {
902
+ appendChatMessage('Network error.', 'bot');
903
+ });
904
+ }
905
+ }
906
+
907
+ function appendChatMessage(text, sender) {
908
+ const container = document.getElementById('chat-messages');
909
+ if (!container) return;
910
+ const wrapper = document.createElement('div');
911
+ wrapper.className = sender === 'user' ? 'user-message' : 'bot-message';
912
+ const bubble = document.createElement('div');
913
+ bubble.className = sender === 'user' ? 'user-bubble' : 'bot-bubble';
914
+ bubble.textContent = text;
915
+ wrapper.appendChild(bubble);
916
+ container.appendChild(wrapper);
917
+ container.scrollTop = container.scrollHeight;
918
+ }
919
+ </script>
920
+ <style>
921
+ /* Chat message styling for user and bot */
922
+ #chat-messages .user-message {
923
+ display: flex;
924
+ justify-content: flex-end;
925
+ margin-bottom: 8px;
926
+ }
927
+ #chat-messages .bot-message {
928
+ display: flex;
929
+ justify-content: flex-start;
930
+ margin-bottom: 8px;
931
+ }
932
+ #chat-messages .user-bubble {
933
+ background-color: #4caf50;
934
+ color: #ffffff;
935
+ padding: 8px 12px;
936
+ border-radius: 12px;
937
+ max-width: 80%;
938
+ word-wrap: break-word;
939
+ }
940
+ #chat-messages .bot-bubble {
941
+ background-color: #f1f0f0;
942
+ color: #000000;
943
+ padding: 8px 12px;
944
+ border-radius: 12px;
945
+ max-width: 80%;
946
+ word-wrap: break-word;
947
+ }
948
+ </style>
949
  </body>
950
  </html>
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
 
 
2
  flask
3
  flask_login
4
  flask_sqlalchemy
@@ -46,6 +47,17 @@ edge-tts==6.1.2
46
  gunicorn
47
  python-dotenv
48
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Audio format conversion (critical for WebM/WAV handling)
50
  pydub>=0.25.1
51
 
 
1
 
2
+
3
  flask
4
  flask_login
5
  flask_sqlalchemy
 
47
  gunicorn
48
  python-dotenv
49
 
50
+ # --- Chatbot Dependencies ---
51
+ # The chatbot feature relies on a vector database and external API calls via the
52
+ # OpenAI client. ``chromadb`` provides a simple embedding store for semantic
53
+ # search over the knowledge base stored in ``chatbot/chatbot.txt``. ``openai``
54
+ # is required to communicate with the Groq API endpoint (which is compatible
55
+ # with the OpenAI client). ``flask-cors`` allows cross‑origin requests if we
56
+ # decide to decouple the chat interface in the future.
57
+ chromadb>=0.4.0
58
+ openai>=1.8.0
59
+ flask-cors>=4.0.0
60
+
61
  # Audio format conversion (critical for WebM/WAV handling)
62
  pydub>=0.25.1
63