Spaces:
Paused
Paused
Commit
·
29cfacc
1
Parent(s):
89511c3
updated
Browse files- app.py +158 -161
- requirements.txt +7 -10
app.py
CHANGED
@@ -54,23 +54,24 @@ import shutil
|
|
54 |
shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
|
55 |
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
56 |
CHATBOT_DB_DIR = "/tmp/chroma_db"
|
|
|
57 |
# -----------------------------------------------------------------------------
|
58 |
# Hugging Face model configuration
|
59 |
#
|
60 |
# The chatbot uses a small conversational model hosted on Hugging Face. To
|
61 |
# allow easy experimentation, the model name can be overridden via the
|
62 |
# ``HF_CHATBOT_MODEL`` environment variable. If unset, we fall back to
|
63 |
-
# ``
|
64 |
-
#
|
65 |
-
HF_MODEL_NAME = os.getenv("HF_CHATBOT_MODEL", "
|
66 |
|
67 |
# Global Hugging Face model and tokenizer. These variables remain ``None``
|
68 |
# until ``init_hf_model()`` is called. They are reused across all chatbot
|
69 |
# requests to prevent repeatedly loading the large model into memory.
|
70 |
-
_hf_model
|
71 |
-
_hf_tokenizer
|
72 |
|
73 |
-
def init_hf_model()
|
74 |
"""
|
75 |
Initialise the Hugging Face conversational model and tokenizer.
|
76 |
|
@@ -83,25 +84,30 @@ def init_hf_model() -> None:
|
|
83 |
if _hf_model is not None and _hf_tokenizer is not None:
|
84 |
return
|
85 |
|
86 |
-
from transformers import
|
87 |
import torch
|
88 |
|
89 |
model_name = HF_MODEL_NAME
|
90 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
91 |
|
92 |
-
# Load tokenizer and model from Hugging Face
|
93 |
-
# specifying ``use_auth_token`` here since the default models are
|
94 |
-
# publicly accessible. Should you wish to use a private model, set
|
95 |
-
# HF_HOME/HF_TOKEN environment variables accordingly.
|
96 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
97 |
-
model =
|
|
|
|
|
|
|
|
|
98 |
|
99 |
_hf_model = model
|
100 |
_hf_tokenizer = tokenizer
|
|
|
|
|
101 |
_chatbot_embedder = None
|
102 |
_chatbot_collection = None
|
103 |
|
104 |
-
def init_chatbot()
|
105 |
"""Initialise the Chroma vector DB with chatbot.txt content."""
|
106 |
global _chatbot_embedder, _chatbot_collection
|
107 |
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
@@ -115,81 +121,130 @@ def init_chatbot() -> None:
|
|
115 |
|
116 |
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
117 |
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
121 |
-
docs = [doc.strip() for doc in splitter.split_text(text)]
|
122 |
|
|
|
123 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
124 |
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
125 |
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
collection = client.get_or_create_collection("chatbot")
|
128 |
-
|
|
|
129 |
try:
|
130 |
-
existing = collection.get(
|
131 |
if not existing.get("documents"):
|
132 |
raise ValueError("Empty Chroma DB")
|
133 |
except Exception:
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
_chatbot_embedder = embedder
|
137 |
_chatbot_collection = collection
|
138 |
|
139 |
-
|
140 |
def get_chatbot_response(query: str) -> str:
|
141 |
"""Generate a reply to the user's query using Chroma + Hugging Face model."""
|
142 |
-
init_chatbot()
|
143 |
-
init_hf_model()
|
144 |
-
|
145 |
-
# Safety: prevent empty input
|
146 |
-
if not query or not query.strip():
|
147 |
-
return "Please type a question about the Codingo platform."
|
148 |
-
|
149 |
-
embedder = _chatbot_embedder
|
150 |
-
collection = _chatbot_collection
|
151 |
-
model = _hf_model
|
152 |
-
tokenizer = _hf_tokenizer
|
153 |
-
device = model.device
|
154 |
-
|
155 |
-
# Retrieve context from Chroma
|
156 |
-
query_embedding = embedder.encode([query])[0]
|
157 |
-
results = collection.query(query_embeddings=[query_embedding], n_results=3)
|
158 |
-
retrieved_docs = results.get("documents", [[]])[0] if results else []
|
159 |
-
context = "\n".join(retrieved_docs)
|
160 |
-
|
161 |
-
# System instruction
|
162 |
-
system_prompt = (
|
163 |
-
"You are a helpful assistant for the Codingo website. "
|
164 |
-
"Only answer questions relevant to the context provided. "
|
165 |
-
"If unrelated, reply: 'I'm only trained to answer questions about the Codingo platform.'"
|
166 |
-
)
|
167 |
-
|
168 |
-
prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"
|
169 |
-
|
170 |
-
# ✅ Safe tokenization with truncation to avoid CUDA indexing issues
|
171 |
-
inputs = tokenizer(
|
172 |
-
prompt,
|
173 |
-
return_tensors="pt",
|
174 |
-
truncation=True,
|
175 |
-
max_length=256, # Prevents long inputs
|
176 |
-
padding=True
|
177 |
-
).to(device)
|
178 |
-
|
179 |
try:
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
)
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
except Exception as e:
|
192 |
-
|
|
|
193 |
|
194 |
# Initialize Flask app
|
195 |
app = Flask(
|
@@ -197,37 +252,20 @@ app = Flask(
|
|
197 |
static_folder='backend/static',
|
198 |
static_url_path='/static',
|
199 |
template_folder='backend/templates',
|
200 |
-
instance_path=safe_instance_path
|
201 |
)
|
202 |
|
203 |
app.config['SECRET_KEY'] = 'saadi'
|
204 |
|
205 |
-
# -----------------------------------------------------------------------------
|
206 |
# Cookie configuration for Hugging Face Spaces
|
207 |
-
#
|
208 |
-
# When running this app inside an iframe (as is typical on Hugging Face Spaces),
|
209 |
-
# browsers will drop cookies that have the default SameSite policy of ``Lax``.
|
210 |
-
# This prevents the Flask session cookie from being stored and means that
|
211 |
-
# ``login_user()`` will appear to have no effect – the user will be redirected
|
212 |
-
# back to the home page but remain anonymous. By explicitly setting the
|
213 |
-
# SameSite policy to ``None`` and enabling the ``Secure`` flag, we allow the
|
214 |
-
# session and remember cookies to be sent even when the app is embedded in an
|
215 |
-
# iframe. Without these settings the sign‑up and login flows work locally
|
216 |
-
# but silently fail in Spaces, causing the "redirect to home page without
|
217 |
-
# anything" behaviour reported by users.
|
218 |
app.config['SESSION_COOKIE_SAMESITE'] = 'None'
|
219 |
app.config['SESSION_COOKIE_SECURE'] = True
|
220 |
app.config['REMEMBER_COOKIE_SAMESITE'] = 'None'
|
221 |
app.config['REMEMBER_COOKIE_SECURE'] = True
|
222 |
|
223 |
# Configure the database connection
|
224 |
-
# Use /tmp directory for database in Hugging Face Spaces
|
225 |
-
# Note: Data will be lost when the space restarts
|
226 |
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/codingo.db'
|
227 |
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
228 |
-
from flask_wtf.csrf import CSRFProtect
|
229 |
-
|
230 |
-
# csrf = CSRFProtect(app)
|
231 |
|
232 |
# Create necessary directories in writable locations
|
233 |
os.makedirs('/tmp/static/audio', exist_ok=True)
|
@@ -249,7 +287,7 @@ def load_user(user_id):
|
|
249 |
app.register_blueprint(auth_bp)
|
250 |
app.register_blueprint(interview_api, url_prefix="/api")
|
251 |
|
252 |
-
# Routes
|
253 |
@app.route('/')
|
254 |
def index():
|
255 |
return render_template('index.html')
|
@@ -269,30 +307,17 @@ def job_detail(job_id):
|
|
269 |
def apply(job_id):
|
270 |
job = Job.query.get_or_404(job_id)
|
271 |
if request.method == 'POST':
|
272 |
-
# Retrieve the uploaded resume file from the request. The ``name``
|
273 |
-
# attribute in the HTML form is ``resume``.
|
274 |
file = request.files.get('resume')
|
275 |
-
# Use our safe upload helper to store the resume. ``filepath``
|
276 |
-
# contains the location where the file was saved so that recruiters
|
277 |
-
# can download it later. Resume parsing has been disabled, so
|
278 |
-
# ``features`` will always be an empty dictionary.
|
279 |
features, error, filepath = handle_resume_upload(file)
|
280 |
|
281 |
-
# If there was an error saving the resume, notify the user. We no
|
282 |
-
# longer attempt to parse the resume contents, so the manual fields
|
283 |
-
# collected below will form the entire feature set.
|
284 |
if error:
|
285 |
flash("Resume upload failed. Please try again.", "danger")
|
286 |
return render_template('apply.html', job=job)
|
287 |
|
288 |
-
# Collect the manually entered fields for skills, experience and education.
|
289 |
-
# Users can separate entries with commas, semicolons or newlines; we
|
290 |
-
# normalise the input into lists of trimmed strings.
|
291 |
def parse_entries(raw_value: str):
|
292 |
import re
|
293 |
entries = []
|
294 |
if raw_value:
|
295 |
-
# Split on commas, semicolons or newlines
|
296 |
for item in re.split(r'[\n,;]+', raw_value):
|
297 |
item = item.strip()
|
298 |
if item:
|
@@ -309,10 +334,6 @@ def apply(job_id):
|
|
309 |
"education": parse_entries(education_input)
|
310 |
}
|
311 |
|
312 |
-
# Prepare the application record. We ignore the empty ``features``
|
313 |
-
# returned by ``handle_resume_upload`` and instead persist the
|
314 |
-
# manually collected attributes. The extracted_features column
|
315 |
-
# expects a JSON string; json.dumps handles proper serialization.
|
316 |
application = Application(
|
317 |
job_id=job_id,
|
318 |
user_id=current_user.id,
|
@@ -338,43 +359,33 @@ def my_applications():
|
|
338 |
).order_by(Application.date_applied.desc()).all()
|
339 |
return render_template('my_applications.html', applications=applications)
|
340 |
|
341 |
-
# -----------------------------------------------------------------------------
|
342 |
# Chatbot API endpoint
|
343 |
-
#
|
344 |
-
# This route receives a JSON payload containing a ``message`` field from the
|
345 |
-
# front‑end chat widget. It validates the input, invokes the chatbot
|
346 |
-
# response function and returns a JSON response. Any errors are surfaced
|
347 |
-
# as a 400 or 500 response with an ``error`` message field.
|
348 |
@app.route('/chatbot', methods=['POST'])
|
349 |
def chatbot_endpoint():
|
350 |
-
|
351 |
-
user_input = str(data.get('message', '')).strip()
|
352 |
-
if not user_input:
|
353 |
-
return jsonify({"error": "Empty message"}), 400
|
354 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
reply = get_chatbot_response(user_input)
|
356 |
return jsonify({"response": reply})
|
|
|
357 |
except Exception as exc:
|
358 |
-
|
359 |
-
|
360 |
-
# facility instead.
|
361 |
-
print(f"Chatbot error: {exc}", file=sys.stderr)
|
362 |
-
return jsonify({"error": str(exc)}), 500
|
363 |
|
364 |
@app.route('/parse_resume', methods=['POST'])
|
365 |
def parse_resume():
|
366 |
file = request.files.get('resume')
|
367 |
features, error, filepath = handle_resume_upload(file)
|
368 |
|
369 |
-
# If the upload failed, return an error. Parsing is no longer
|
370 |
-
# supported, so we do not attempt to inspect the resume contents.
|
371 |
if error:
|
372 |
return {"error": "Error processing resume. Please try again."}, 400
|
373 |
|
374 |
-
# If no features were extracted (the normal case now), respond with
|
375 |
-
# empty fields rather than an error. This preserves the API
|
376 |
-
# contract expected by any front‑end code that might call this
|
377 |
-
# endpoint.
|
378 |
if not features:
|
379 |
return {
|
380 |
"name": "",
|
@@ -386,8 +397,6 @@ def parse_resume():
|
|
386 |
"summary": ""
|
387 |
}, 200
|
388 |
|
389 |
-
# Should features contain values (unlikely in the new implementation),
|
390 |
-
# pass them through to the client.
|
391 |
response = {
|
392 |
"name": features.get('name', ''),
|
393 |
"email": features.get('email', ''),
|
@@ -415,30 +424,20 @@ def interview_page(job_id):
|
|
415 |
cv_data = json.loads(application.extracted_features)
|
416 |
return render_template("interview.html", job=job, cv=cv_data)
|
417 |
|
418 |
-
|
419 |
-
# -----------------------------------------------------------------------------
|
420 |
-
# Recruiter job posting route
|
421 |
-
#
|
422 |
-
# Authenticated users with a recruiter or admin role can access this page to
|
423 |
-
# create new job listings. Posted jobs are associated with the current
|
424 |
-
# recruiter via the ``recruiter_id`` foreign key on the ``Job`` model.
|
425 |
@app.route('/post_job', methods=['GET', 'POST'])
|
426 |
@login_required
|
427 |
def post_job():
|
428 |
-
# Only allow recruiters and admins to post jobs
|
429 |
if current_user.role not in ('recruiter', 'admin'):
|
430 |
flash('You do not have permission to post jobs.', 'warning')
|
431 |
return redirect(url_for('jobs'))
|
432 |
|
433 |
if request.method == 'POST':
|
434 |
-
# Extract fields from the form
|
435 |
role_title = request.form.get('role', '').strip()
|
436 |
description = request.form.get('description', '').strip()
|
437 |
seniority = request.form.get('seniority', '').strip()
|
438 |
skills_input = request.form.get('skills', '').strip()
|
439 |
company = request.form.get('company', '').strip()
|
440 |
|
441 |
-
# Validate required fields
|
442 |
errors = []
|
443 |
if not role_title:
|
444 |
errors.append('Job title is required.')
|
@@ -456,12 +455,9 @@ def post_job():
|
|
456 |
flash(err, 'danger')
|
457 |
return render_template('post_job.html')
|
458 |
|
459 |
-
# Normalise the skills input into a JSON encoded list. Users can
|
460 |
-
# separate entries with commas, semicolons or newlines.
|
461 |
skills_list = [s.strip() for s in re.split(r'[\n,;]+', skills_input) if s.strip()]
|
462 |
skills_json = json.dumps(skills_list)
|
463 |
|
464 |
-
# Create and persist the new job
|
465 |
new_job = Job(
|
466 |
role=role_title,
|
467 |
description=description,
|
@@ -476,52 +472,35 @@ def post_job():
|
|
476 |
flash('Job posted successfully!', 'success')
|
477 |
return redirect(url_for('jobs'))
|
478 |
|
479 |
-
# GET request returns the form
|
480 |
return render_template('post_job.html')
|
481 |
|
482 |
-
|
483 |
-
# -----------------------------------------------------------------------------
|
484 |
-
# Recruiter dashboard route
|
485 |
-
#
|
486 |
-
# Displays a list of candidates who applied to jobs posted by the current
|
487 |
-
# recruiter. Candidates are sorted by a simple skill match score computed
|
488 |
-
# against the job requirements. A placeholder download button is provided
|
489 |
-
# for future PDF report functionality.
|
490 |
@app.route('/dashboard')
|
491 |
@login_required
|
492 |
def dashboard():
|
493 |
-
# Only recruiters and admins can view the dashboard
|
494 |
if current_user.role not in ('recruiter', 'admin'):
|
495 |
flash('You do not have permission to access the dashboard.', 'warning')
|
496 |
return redirect(url_for('index'))
|
497 |
|
498 |
-
# Fetch jobs posted by the current recruiter
|
499 |
posted_jobs = Job.query.filter_by(recruiter_id=current_user.id).all()
|
500 |
job_ids = [job.id for job in posted_jobs]
|
501 |
|
502 |
candidates_with_scores = []
|
503 |
if job_ids:
|
504 |
-
# Fetch applications associated with these job IDs
|
505 |
candidate_apps = Application.query.filter(Application.job_id.in_(job_ids)).all()
|
506 |
|
507 |
-
# Helper to compute a match score based on skills overlap
|
508 |
def compute_score(application):
|
509 |
try:
|
510 |
-
# Extract candidate skills from stored JSON
|
511 |
candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
|
512 |
candidate_skills = candidate_features.get('skills', [])
|
513 |
-
# Retrieve the job's required skills and parse from JSON
|
514 |
job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
|
515 |
if not job_skills:
|
516 |
-
return ('Medium', 2)
|
517 |
|
518 |
-
# Compute case‑insensitive intersection
|
519 |
candidate_set = {s.lower() for s in candidate_skills}
|
520 |
job_set = {s.lower() for s in job_skills}
|
521 |
common = candidate_set & job_set
|
522 |
ratio = len(common) / len(job_set) if job_set else 0
|
523 |
|
524 |
-
# Map ratio to qualitative score
|
525 |
if ratio >= 0.75:
|
526 |
return ('Excellent', 4)
|
527 |
elif ratio >= 0.5:
|
@@ -533,7 +512,6 @@ def dashboard():
|
|
533 |
except Exception:
|
534 |
return ('Medium', 2)
|
535 |
|
536 |
-
# Build a list of candidate applications with computed scores
|
537 |
for app_record in candidate_apps:
|
538 |
score_label, score_value = compute_score(app_record)
|
539 |
candidates_with_scores.append({
|
@@ -542,15 +520,34 @@ def dashboard():
|
|
542 |
'score_value': score_value
|
543 |
})
|
544 |
|
545 |
-
# Sort candidates from highest to lowest score
|
546 |
candidates_with_scores.sort(key=lambda item: item['score_value'], reverse=True)
|
547 |
|
548 |
return render_template('dashboard.html', candidates=candidates_with_scores)
|
549 |
|
550 |
if __name__ == '__main__':
|
551 |
print("Starting Codingo application...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
with app.app_context():
|
553 |
db.create_all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
|
555 |
# Use port from environment or default to 7860
|
556 |
port = int(os.environ.get('PORT', 7860))
|
|
|
54 |
shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
|
55 |
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
56 |
CHATBOT_DB_DIR = "/tmp/chroma_db"
|
57 |
+
|
58 |
# -----------------------------------------------------------------------------
|
59 |
# Hugging Face model configuration
|
60 |
#
|
61 |
# The chatbot uses a small conversational model hosted on Hugging Face. To
|
62 |
# allow easy experimentation, the model name can be overridden via the
|
63 |
# ``HF_CHATBOT_MODEL`` environment variable. If unset, we fall back to
|
64 |
+
# ``microsoft/DialoGPT-medium`` which provides better conversational quality
|
65 |
+
# than blenderbot for our use case.
|
66 |
+
HF_MODEL_NAME = os.getenv("HF_CHATBOT_MODEL", "microsoft/DialoGPT-medium")
|
67 |
|
68 |
# Global Hugging Face model and tokenizer. These variables remain ``None``
|
69 |
# until ``init_hf_model()`` is called. They are reused across all chatbot
|
70 |
# requests to prevent repeatedly loading the large model into memory.
|
71 |
+
_hf_model = None
|
72 |
+
_hf_tokenizer = None
|
73 |
|
74 |
+
def init_hf_model():
|
75 |
"""
|
76 |
Initialise the Hugging Face conversational model and tokenizer.
|
77 |
|
|
|
84 |
if _hf_model is not None and _hf_tokenizer is not None:
|
85 |
return
|
86 |
|
87 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
88 |
import torch
|
89 |
|
90 |
model_name = HF_MODEL_NAME
|
91 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
92 |
+
|
93 |
+
print(f"Loading model {model_name} on device {device}")
|
94 |
|
95 |
+
# Load tokenizer and model from Hugging Face
|
|
|
|
|
|
|
96 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
97 |
+
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
98 |
+
|
99 |
+
# Set pad token to eos token if not set
|
100 |
+
if tokenizer.pad_token is None:
|
101 |
+
tokenizer.pad_token = tokenizer.eos_token
|
102 |
|
103 |
_hf_model = model
|
104 |
_hf_tokenizer = tokenizer
|
105 |
+
print(f"Model loaded successfully on {device}")
|
106 |
+
|
107 |
_chatbot_embedder = None
|
108 |
_chatbot_collection = None
|
109 |
|
110 |
+
def init_chatbot():
|
111 |
"""Initialise the Chroma vector DB with chatbot.txt content."""
|
112 |
global _chatbot_embedder, _chatbot_collection
|
113 |
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
|
|
121 |
|
122 |
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
123 |
|
124 |
+
# Read and parse the chatbot knowledge base
|
125 |
+
try:
|
126 |
+
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
|
127 |
+
text = f.read()
|
128 |
+
except FileNotFoundError:
|
129 |
+
print(f"Warning: {CHATBOT_TXT_PATH} not found, using default content")
|
130 |
+
text = """
|
131 |
+
Codingo is an AI-powered recruitment platform designed to streamline job applications,
|
132 |
+
candidate screening, and hiring. We make hiring smarter, faster, and fairer through
|
133 |
+
automation and intelligent recommendations.
|
134 |
+
"""
|
135 |
+
|
136 |
+
# Split text into chunks for vector search
|
137 |
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
138 |
+
docs = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]
|
139 |
|
140 |
+
# Initialize embedder
|
141 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
142 |
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
143 |
|
144 |
+
# Initialize Chroma client
|
145 |
+
client = chromadb.Client(Settings(
|
146 |
+
persist_directory=CHATBOT_DB_DIR,
|
147 |
+
anonymized_telemetry=False,
|
148 |
+
is_persistent=True
|
149 |
+
))
|
150 |
+
|
151 |
+
# Get or create collection
|
152 |
collection = client.get_or_create_collection("chatbot")
|
153 |
+
|
154 |
+
# Check if collection is empty and populate if needed
|
155 |
try:
|
156 |
+
existing = collection.get(limit=1)
|
157 |
if not existing.get("documents"):
|
158 |
raise ValueError("Empty Chroma DB")
|
159 |
except Exception:
|
160 |
+
# Add documents to collection
|
161 |
+
ids = [f"doc_{i}" for i in range(len(docs))]
|
162 |
+
collection.add(
|
163 |
+
documents=docs,
|
164 |
+
embeddings=embeddings.tolist(),
|
165 |
+
ids=ids
|
166 |
+
)
|
167 |
+
print(f"Added {len(docs)} documents to Chroma DB")
|
168 |
|
169 |
_chatbot_embedder = embedder
|
170 |
_chatbot_collection = collection
|
171 |
|
|
|
172 |
def get_chatbot_response(query: str) -> str:
|
173 |
"""Generate a reply to the user's query using Chroma + Hugging Face model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
try:
|
175 |
+
init_chatbot()
|
176 |
+
init_hf_model()
|
177 |
+
|
178 |
+
# Safety: prevent empty input
|
179 |
+
if not query or not query.strip():
|
180 |
+
return "Please type a question about the Codingo platform."
|
181 |
+
|
182 |
+
embedder = _chatbot_embedder
|
183 |
+
collection = _chatbot_collection
|
184 |
+
model = _hf_model
|
185 |
+
tokenizer = _hf_tokenizer
|
186 |
+
device = model.device
|
187 |
+
|
188 |
+
# Retrieve context from Chroma
|
189 |
+
query_embedding = embedder.encode([query])[0]
|
190 |
+
results = collection.query(
|
191 |
+
query_embeddings=[query_embedding.tolist()],
|
192 |
+
n_results=3
|
193 |
+
)
|
194 |
+
retrieved_docs = results.get("documents", [[]])[0] if results else []
|
195 |
+
context = "\n".join(retrieved_docs[:3]) # Limit context to top 3 results
|
196 |
+
|
197 |
+
# Build conversational prompt
|
198 |
+
system_instruction = (
|
199 |
+
"You are LUNA AI, a helpful assistant for the Codingo recruitment platform. "
|
200 |
+
"Use the provided context to answer questions about Codingo. "
|
201 |
+
"If the question is not related to Codingo, politely redirect the conversation. "
|
202 |
+
"Keep responses concise and friendly."
|
203 |
)
|
204 |
+
|
205 |
+
# Format prompt for DialoGPT
|
206 |
+
prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
|
207 |
+
|
208 |
+
# Tokenize with proper truncation
|
209 |
+
inputs = tokenizer.encode(
|
210 |
+
prompt,
|
211 |
+
return_tensors="pt",
|
212 |
+
truncation=True,
|
213 |
+
max_length=512,
|
214 |
+
padding=True
|
215 |
+
).to(device)
|
216 |
+
|
217 |
+
# Generate response
|
218 |
+
with torch.no_grad():
|
219 |
+
output_ids = model.generate(
|
220 |
+
inputs,
|
221 |
+
max_length=inputs.shape[1] + 150,
|
222 |
+
num_beams=3,
|
223 |
+
do_sample=True,
|
224 |
+
temperature=0.7,
|
225 |
+
pad_token_id=tokenizer.eos_token_id,
|
226 |
+
eos_token_id=tokenizer.eos_token_id,
|
227 |
+
early_stopping=True
|
228 |
+
)
|
229 |
+
|
230 |
+
# Decode response
|
231 |
+
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
232 |
+
|
233 |
+
# Extract only the bot's response
|
234 |
+
if "LUNA AI:" in response:
|
235 |
+
response = response.split("LUNA AI:")[-1].strip()
|
236 |
+
elif prompt in response:
|
237 |
+
response = response.replace(prompt, "").strip()
|
238 |
+
|
239 |
+
# Fallback if response is empty
|
240 |
+
if not response:
|
241 |
+
response = "I'm here to help you with questions about the Codingo platform. What would you like to know?"
|
242 |
+
|
243 |
+
return response
|
244 |
+
|
245 |
except Exception as e:
|
246 |
+
print(f"Chatbot error: {str(e)}")
|
247 |
+
return "I'm having trouble processing your request. Please try again or ask about Codingo's features, job matching, or how to use the platform."
|
248 |
|
249 |
# Initialize Flask app
|
250 |
app = Flask(
|
|
|
252 |
static_folder='backend/static',
|
253 |
static_url_path='/static',
|
254 |
template_folder='backend/templates',
|
255 |
+
instance_path=safe_instance_path
|
256 |
)
|
257 |
|
258 |
app.config['SECRET_KEY'] = 'saadi'
|
259 |
|
|
|
260 |
# Cookie configuration for Hugging Face Spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
app.config['SESSION_COOKIE_SAMESITE'] = 'None'
|
262 |
app.config['SESSION_COOKIE_SECURE'] = True
|
263 |
app.config['REMEMBER_COOKIE_SAMESITE'] = 'None'
|
264 |
app.config['REMEMBER_COOKIE_SECURE'] = True
|
265 |
|
266 |
# Configure the database connection
|
|
|
|
|
267 |
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/codingo.db'
|
268 |
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
|
|
|
|
|
|
269 |
|
270 |
# Create necessary directories in writable locations
|
271 |
os.makedirs('/tmp/static/audio', exist_ok=True)
|
|
|
287 |
app.register_blueprint(auth_bp)
|
288 |
app.register_blueprint(interview_api, url_prefix="/api")
|
289 |
|
290 |
+
# Routes
|
291 |
@app.route('/')
|
292 |
def index():
|
293 |
return render_template('index.html')
|
|
|
307 |
def apply(job_id):
|
308 |
job = Job.query.get_or_404(job_id)
|
309 |
if request.method == 'POST':
|
|
|
|
|
310 |
file = request.files.get('resume')
|
|
|
|
|
|
|
|
|
311 |
features, error, filepath = handle_resume_upload(file)
|
312 |
|
|
|
|
|
|
|
313 |
if error:
|
314 |
flash("Resume upload failed. Please try again.", "danger")
|
315 |
return render_template('apply.html', job=job)
|
316 |
|
|
|
|
|
|
|
317 |
def parse_entries(raw_value: str):
|
318 |
import re
|
319 |
entries = []
|
320 |
if raw_value:
|
|
|
321 |
for item in re.split(r'[\n,;]+', raw_value):
|
322 |
item = item.strip()
|
323 |
if item:
|
|
|
334 |
"education": parse_entries(education_input)
|
335 |
}
|
336 |
|
|
|
|
|
|
|
|
|
337 |
application = Application(
|
338 |
job_id=job_id,
|
339 |
user_id=current_user.id,
|
|
|
359 |
).order_by(Application.date_applied.desc()).all()
|
360 |
return render_template('my_applications.html', applications=applications)
|
361 |
|
|
|
362 |
# Chatbot API endpoint
|
|
|
|
|
|
|
|
|
|
|
363 |
@app.route('/chatbot', methods=['POST'])
|
364 |
def chatbot_endpoint():
|
365 |
+
"""Handle chatbot queries from the frontend."""
|
|
|
|
|
|
|
366 |
try:
|
367 |
+
data = request.get_json(silent=True) or {}
|
368 |
+
user_input = str(data.get('message', '')).strip()
|
369 |
+
|
370 |
+
if not user_input:
|
371 |
+
return jsonify({"error": "Empty message"}), 400
|
372 |
+
|
373 |
+
# Get chatbot response
|
374 |
reply = get_chatbot_response(user_input)
|
375 |
return jsonify({"response": reply})
|
376 |
+
|
377 |
except Exception as exc:
|
378 |
+
print(f"Chatbot endpoint error: {exc}", file=sys.stderr)
|
379 |
+
return jsonify({"error": "I'm having trouble right now. Please try again."}), 500
|
|
|
|
|
|
|
380 |
|
381 |
@app.route('/parse_resume', methods=['POST'])
|
382 |
def parse_resume():
|
383 |
file = request.files.get('resume')
|
384 |
features, error, filepath = handle_resume_upload(file)
|
385 |
|
|
|
|
|
386 |
if error:
|
387 |
return {"error": "Error processing resume. Please try again."}, 400
|
388 |
|
|
|
|
|
|
|
|
|
389 |
if not features:
|
390 |
return {
|
391 |
"name": "",
|
|
|
397 |
"summary": ""
|
398 |
}, 200
|
399 |
|
|
|
|
|
400 |
response = {
|
401 |
"name": features.get('name', ''),
|
402 |
"email": features.get('email', ''),
|
|
|
424 |
cv_data = json.loads(application.extracted_features)
|
425 |
return render_template("interview.html", job=job, cv=cv_data)
|
426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
427 |
@app.route('/post_job', methods=['GET', 'POST'])
|
428 |
@login_required
|
429 |
def post_job():
|
|
|
430 |
if current_user.role not in ('recruiter', 'admin'):
|
431 |
flash('You do not have permission to post jobs.', 'warning')
|
432 |
return redirect(url_for('jobs'))
|
433 |
|
434 |
if request.method == 'POST':
|
|
|
435 |
role_title = request.form.get('role', '').strip()
|
436 |
description = request.form.get('description', '').strip()
|
437 |
seniority = request.form.get('seniority', '').strip()
|
438 |
skills_input = request.form.get('skills', '').strip()
|
439 |
company = request.form.get('company', '').strip()
|
440 |
|
|
|
441 |
errors = []
|
442 |
if not role_title:
|
443 |
errors.append('Job title is required.')
|
|
|
455 |
flash(err, 'danger')
|
456 |
return render_template('post_job.html')
|
457 |
|
|
|
|
|
458 |
skills_list = [s.strip() for s in re.split(r'[\n,;]+', skills_input) if s.strip()]
|
459 |
skills_json = json.dumps(skills_list)
|
460 |
|
|
|
461 |
new_job = Job(
|
462 |
role=role_title,
|
463 |
description=description,
|
|
|
472 |
flash('Job posted successfully!', 'success')
|
473 |
return redirect(url_for('jobs'))
|
474 |
|
|
|
475 |
return render_template('post_job.html')
|
476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
@app.route('/dashboard')
|
478 |
@login_required
|
479 |
def dashboard():
|
|
|
480 |
if current_user.role not in ('recruiter', 'admin'):
|
481 |
flash('You do not have permission to access the dashboard.', 'warning')
|
482 |
return redirect(url_for('index'))
|
483 |
|
|
|
484 |
posted_jobs = Job.query.filter_by(recruiter_id=current_user.id).all()
|
485 |
job_ids = [job.id for job in posted_jobs]
|
486 |
|
487 |
candidates_with_scores = []
|
488 |
if job_ids:
|
|
|
489 |
candidate_apps = Application.query.filter(Application.job_id.in_(job_ids)).all()
|
490 |
|
|
|
491 |
def compute_score(application):
|
492 |
try:
|
|
|
493 |
candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
|
494 |
candidate_skills = candidate_features.get('skills', [])
|
|
|
495 |
job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
|
496 |
if not job_skills:
|
497 |
+
return ('Medium', 2)
|
498 |
|
|
|
499 |
candidate_set = {s.lower() for s in candidate_skills}
|
500 |
job_set = {s.lower() for s in job_skills}
|
501 |
common = candidate_set & job_set
|
502 |
ratio = len(common) / len(job_set) if job_set else 0
|
503 |
|
|
|
504 |
if ratio >= 0.75:
|
505 |
return ('Excellent', 4)
|
506 |
elif ratio >= 0.5:
|
|
|
512 |
except Exception:
|
513 |
return ('Medium', 2)
|
514 |
|
|
|
515 |
for app_record in candidate_apps:
|
516 |
score_label, score_value = compute_score(app_record)
|
517 |
candidates_with_scores.append({
|
|
|
520 |
'score_value': score_value
|
521 |
})
|
522 |
|
|
|
523 |
candidates_with_scores.sort(key=lambda item: item['score_value'], reverse=True)
|
524 |
|
525 |
return render_template('dashboard.html', candidates=candidates_with_scores)
|
526 |
|
527 |
if __name__ == '__main__':
|
528 |
print("Starting Codingo application...")
|
529 |
+
|
530 |
+
# Import torch to check GPU availability
|
531 |
+
try:
|
532 |
+
import torch
|
533 |
+
if torch.cuda.is_available():
|
534 |
+
print(f"GPU Available: {torch.cuda.get_device_name(0)}")
|
535 |
+
print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
|
536 |
+
else:
|
537 |
+
print("No GPU available, using CPU")
|
538 |
+
except ImportError:
|
539 |
+
print("PyTorch not installed, chatbot will use CPU")
|
540 |
+
|
541 |
with app.app_context():
|
542 |
db.create_all()
|
543 |
+
# Pre-initialize chatbot on startup for faster first response
|
544 |
+
print("Initializing chatbot...")
|
545 |
+
try:
|
546 |
+
init_chatbot()
|
547 |
+
init_hf_model()
|
548 |
+
print("Chatbot initialized successfully")
|
549 |
+
except Exception as e:
|
550 |
+
print(f"Chatbot initialization warning: {e}")
|
551 |
|
552 |
# Use port from environment or default to 7860
|
553 |
port = int(os.environ.get('PORT', 7860))
|
requirements.txt
CHANGED
@@ -28,15 +28,12 @@ cohere==5.16.1
|
|
28 |
# Vector DB
|
29 |
qdrant-client==1.14.3
|
30 |
|
31 |
-
# PDF & DOCX parsing (removed; resume parsing is no longer supported)
|
32 |
-
|
33 |
# Audio processing
|
34 |
ffmpeg-python==0.2.0
|
35 |
inputimeout==1.0.4
|
36 |
evaluate==0.4.5
|
37 |
accelerate==0.29.3
|
38 |
huggingface_hub==0.20.3
|
39 |
-
# textract removed; no resume parsing
|
40 |
bitsandbytes
|
41 |
faster-whisper==0.10.0
|
42 |
edge-tts==6.1.2
|
@@ -46,17 +43,17 @@ gunicorn
|
|
46 |
python-dotenv
|
47 |
|
48 |
# --- Chatbot Dependencies ---
|
49 |
-
#
|
50 |
-
# the knowledge base stored in ``chatbot/chatbot.txt``. ``chromadb`` provides
|
51 |
-
# this capability. We removed the OpenAI dependency in favour of a local
|
52 |
-
# Hugging Face model, so no openai package is required. ``flask-cors`` is
|
53 |
-
# retained to allow cross‑origin requests should the chat UI be decoupled in
|
54 |
-
# the future.
|
55 |
chromadb>=0.4.0
|
|
|
56 |
flask-cors>=4.0.0
|
57 |
|
58 |
# Audio format conversion (critical for WebM/WAV handling)
|
59 |
pydub>=0.25.1
|
60 |
|
61 |
# Better error handling for API calls
|
62 |
-
requests>=2.31.0
|
|
|
|
|
|
|
|
|
|
28 |
# Vector DB
|
29 |
qdrant-client==1.14.3
|
30 |
|
|
|
|
|
31 |
# Audio processing
|
32 |
ffmpeg-python==0.2.0
|
33 |
inputimeout==1.0.4
|
34 |
evaluate==0.4.5
|
35 |
accelerate==0.29.3
|
36 |
huggingface_hub==0.20.3
|
|
|
37 |
bitsandbytes
|
38 |
faster-whisper==0.10.0
|
39 |
edge-tts==6.1.2
|
|
|
43 |
python-dotenv
|
44 |
|
45 |
# --- Chatbot Dependencies ---
|
46 |
+
# Vector database for semantic search
|
|
|
|
|
|
|
|
|
|
|
47 |
chromadb>=0.4.0
|
48 |
+
# CORS support for potential future decoupling
|
49 |
flask-cors>=4.0.0
|
50 |
|
51 |
# Audio format conversion (critical for WebM/WAV handling)
|
52 |
pydub>=0.25.1
|
53 |
|
54 |
# Better error handling for API calls
|
55 |
+
requests>=2.31.0
|
56 |
+
|
57 |
+
# Additional dependencies for improved chatbot functionality
|
58 |
+
# Note: We're using DialoGPT which requires transformers (already included above)
|
59 |
+
# No OpenAI dependency needed - using Hugging Face models instead
|