Codingo / app.py
husseinelsaadi's picture
chatbot integrated
d8529bc
raw
history blame
22.6 kB
import os
import sys
# Hugging Face safe cache
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface/hub"
# Force Flask instance path to a writable temporary folder
safe_instance_path = "/tmp/flask_instance"
# Create the safe instance path after imports
os.makedirs(safe_instance_path, exist_ok=True)
from flask import Flask, render_template, redirect, url_for, flash, request, jsonify
from flask_login import LoginManager, login_required, current_user
from werkzeug.utils import secure_filename
import sys
import json
from datetime import datetime
# Adjust sys.path for import flexibility
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)
# Import and initialize DB
from backend.models.database import db, Job, Application, init_db
from backend.models.user import User
from backend.routes.auth import auth_bp, handle_resume_upload
from backend.routes.interview_api import interview_api
# Import additional utilities
import re
import json
# -----------------------------------------------------------------------------
# Chatbot setup
#
# The chatbot feature uses a local vector database (Chroma) to search the
# ``chatbot/chatbot.txt`` knowledge base and then calls the Groq API via the
# OpenAI client. To avoid the expensive model and database initialisation on
# every request, we lazily load the embeddings and collection the first time
# a chat query is processed. Subsequent requests reuse the same global
# objects. See ``init_chatbot()`` and ``get_chatbot_response()`` below for
# implementation details.
# Paths for the chatbot knowledge base and persistent vector store. We
# compute these relative to the current file so that the app can be deployed
# anywhere without needing to change configuration. The ``chroma_db``
# directory will be created automatically by the Chroma client if it does not
# exist.
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
CHATBOT_DB_DIR = os.path.join(current_dir, 'chatbot', 'chroma_db')
# API credentials for Groq. These values mirror those in the standalone
# ``chatbot/chatbot.py`` script. If you need to update your API key or
# model name, modify these constants. The API key is public in this
# repository purely for demonstration purposes; in a real deployment it
# should be stored securely (e.g. via environment variables or Secrets).
GROQ_API_KEY = "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"
GROQ_MODEL = "llama3-8b-8192"
# Global objects used by the chatbot. They remain ``None`` until
# ``init_chatbot()`` runs. After initialisation, ``_chatbot_embedder`` holds
# the SentenceTransformer model and ``_chatbot_collection`` is the Chroma
# collection with embedded knowledge base documents. A separate import of
# the OpenAI client is performed in ``get_chatbot_response()`` to avoid
# unintentional import side effects at module import time.
_chatbot_embedder = None
_chatbot_collection = None
def init_chatbot() -> None:
"""Initialise the chatbot embedding model and vector database.
This function is designed to be idempotent: it only performs the heavy
initialisation steps once. Subsequent calls will return immediately if
the global variables are already populated. The knowledge base is read
from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
using a lightweight sentence transformer. The resulting embeddings are
stored in a Chroma collection located at ``CHATBOT_DB_DIR``. We set
``anonymized_telemetry=False`` to prevent any external network calls from
the Chroma client.
"""
global _chatbot_embedder, _chatbot_collection
if _chatbot_embedder is not None and _chatbot_collection is not None:
return
# Perform imports locally to avoid slowing down application startup. These
# libraries are heavy and only needed when the chatbot is used.
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
# Ensure the persist directory exists. Chroma will create it if missing,
# but explicitly creating it avoids permission errors on some platforms.
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
# Read the raw FAQ text and split into overlapping chunks to improve
# retrieval granularity. The chunk size and overlap are tuned to
# accommodate the relatively small knowledge base.
with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
text = f.read()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
docs = [doc.strip() for doc in splitter.split_text(text)]
# Load the sentence transformer. This model is small and runs quickly on
# CPU. If you wish to change the model, update the name here.
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
# Initialise Chroma with an on‑disk persistent store. If the collection
# already exists and contains all documents, the add operation below will
# silently merge duplicates.
client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
collection = client.get_or_create_collection('chatbot')
ids = [f'doc_{i}' for i in range(len(docs))]
try:
# Attempt to query an existing document to see if the collection is
# populated. If this fails, we'll proceed to add all documents.
existing = collection.get(ids=ids[:1])
if not existing.get('documents'):
raise ValueError('No documents in collection')
except Exception:
collection.add(documents=docs, embeddings=embeddings, ids=ids)
_chatbot_embedder = embedder
_chatbot_collection = collection
def get_chatbot_response(query: str) -> str:
"""Generate a reply to the user's query using the knowledge base and Groq API.
The function first calls ``init_chatbot()`` to ensure that the embedding
model and Chroma collection are loaded. It then embeds the user's query
and retrieves the top three most relevant context chunks via a nearest
neighbour search. These chunks are concatenated and passed to the
Groq API via the OpenAI client. The system prompt constrains the model
to only answer questions about Codingo; for unrelated queries it will
politely decline to answer. Any exceptions during the API call are
propagated to the caller.
Parameters
----------
query: str
The user's input message.
Returns
-------
str
The assistant's reply.
"""
init_chatbot()
# Local imports to avoid pulling heavy dependencies on module import.
import openai
embedder = _chatbot_embedder
collection = _chatbot_collection
query_embedding = embedder.encode([query])[0]
results = collection.query(query_embeddings=[query_embedding], n_results=3)
retrieved_docs = results['documents'][0]
context = "\n".join(retrieved_docs)
system_prompt = (
"You are a helpful assistant for the Codingo website. "
"Only answer questions that are directly relevant to the context provided. "
"If the user asks anything unrelated, politely refuse by saying: "
"\"I'm only trained to answer questions about the Codingo platform.\""
)
user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
# Configure the OpenAI client to talk to the Groq API. The base URL is
# set here rather than globally to avoid interfering with other parts of
# the application that might use OpenAI for different providers.
openai.api_key = GROQ_API_KEY
openai.api_base = "https://api.groq.com/openai/v1"
completion = openai.ChatCompletion.create(
model=GROQ_MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
max_tokens=200,
temperature=0.3,
)
return completion['choices'][0]['message']['content'].strip()
# Initialize Flask app
app = Flask(
__name__,
static_folder='backend/static',
static_url_path='/static',
template_folder='backend/templates',
instance_path=safe_instance_path # ✅ points to writable '/tmp/flask_instance'
)
app.config['SECRET_KEY'] = 'saadi'
# -----------------------------------------------------------------------------
# Cookie configuration for Hugging Face Spaces
#
# When running this app inside an iframe (as is typical on Hugging Face Spaces),
# browsers will drop cookies that have the default SameSite policy of ``Lax``.
# This prevents the Flask session cookie from being stored and means that
# ``login_user()`` will appear to have no effect – the user will be redirected
# back to the home page but remain anonymous. By explicitly setting the
# SameSite policy to ``None`` and enabling the ``Secure`` flag, we allow the
# session and remember cookies to be sent even when the app is embedded in an
# iframe. Without these settings the sign‑up and login flows work locally
# but silently fail in Spaces, causing the "redirect to home page without
# anything" behaviour reported by users.
app.config['SESSION_COOKIE_SAMESITE'] = 'None'
app.config['SESSION_COOKIE_SECURE'] = True
app.config['REMEMBER_COOKIE_SAMESITE'] = 'None'
app.config['REMEMBER_COOKIE_SECURE'] = True
# Configure the database connection
# Use /tmp directory for database in Hugging Face Spaces
# Note: Data will be lost when the space restarts
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/codingo.db'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
from flask_wtf.csrf import CSRFProtect
# csrf = CSRFProtect(app)
# Create necessary directories in writable locations
os.makedirs('/tmp/static/audio', exist_ok=True)
os.makedirs('/tmp/temp', exist_ok=True)
# Initialize DB with app
init_db(app)
# Flask-Login setup
login_manager = LoginManager()
login_manager.login_view = 'auth.login'
login_manager.init_app(app)
@login_manager.user_loader
def load_user(user_id):
return db.session.get(User, int(user_id))
# Register blueprints
app.register_blueprint(auth_bp)
app.register_blueprint(interview_api, url_prefix="/api")
# Routes (keep your existing routes)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/jobs')
def jobs():
all_jobs = Job.query.order_by(Job.date_posted.desc()).all()
return render_template('jobs.html', jobs=all_jobs)
@app.route('/job/<int:job_id>')
def job_detail(job_id):
job = Job.query.get_or_404(job_id)
return render_template('job_detail.html', job=job)
@app.route('/apply/<int:job_id>', methods=['GET', 'POST'])
@login_required
def apply(job_id):
job = Job.query.get_or_404(job_id)
if request.method == 'POST':
# Retrieve the uploaded resume file from the request. The ``name``
# attribute in the HTML form is ``resume``.
file = request.files.get('resume')
# Use our safe upload helper to store the resume. ``filepath``
# contains the location where the file was saved so that recruiters
# can download it later. Resume parsing has been disabled, so
# ``features`` will always be an empty dictionary.
features, error, filepath = handle_resume_upload(file)
# If there was an error saving the resume, notify the user. We no
# longer attempt to parse the resume contents, so the manual fields
# collected below will form the entire feature set.
if error:
flash("Resume upload failed. Please try again.", "danger")
return render_template('apply.html', job=job)
# Collect the manually entered fields for skills, experience and education.
# Users can separate entries with commas, semicolons or newlines; we
# normalise the input into lists of trimmed strings.
def parse_entries(raw_value: str):
import re
entries = []
if raw_value:
# Split on commas, semicolons or newlines
for item in re.split(r'[\n,;]+', raw_value):
item = item.strip()
if item:
entries.append(item)
return entries
skills_input = request.form.get('skills', '')
experience_input = request.form.get('experience', '')
education_input = request.form.get('education', '')
manual_features = {
"skills": parse_entries(skills_input),
"experience": parse_entries(experience_input),
"education": parse_entries(education_input)
}
# Prepare the application record. We ignore the empty ``features``
# returned by ``handle_resume_upload`` and instead persist the
# manually collected attributes. The extracted_features column
# expects a JSON string; json.dumps handles proper serialization.
application = Application(
job_id=job_id,
user_id=current_user.id,
name=current_user.username,
email=current_user.email,
resume_path=filepath,
extracted_features=json.dumps(manual_features)
)
db.session.add(application)
db.session.commit()
flash('Your application has been submitted successfully!', 'success')
return redirect(url_for('jobs'))
return render_template('apply.html', job=job)
@app.route('/my_applications')
@login_required
def my_applications():
applications = Application.query.filter_by(
user_id=current_user.id
).order_by(Application.date_applied.desc()).all()
return render_template('my_applications.html', applications=applications)
# -----------------------------------------------------------------------------
# Chatbot API endpoint
#
# This route receives a JSON payload containing a ``message`` field from the
# front‑end chat widget. It validates the input, invokes the chatbot
# response function and returns a JSON response. Any errors are surfaced
# as a 400 or 500 response with an ``error`` message field.
@app.route('/chatbot', methods=['POST'])
def chatbot_endpoint():
data = request.get_json(silent=True) or {}
user_input = str(data.get('message', '')).strip()
if not user_input:
return jsonify({"error": "Empty message"}), 400
try:
reply = get_chatbot_response(user_input)
return jsonify({"response": reply})
except Exception as exc:
# Log the exception to stderr for debugging in the console. In a
# production setting you might want to log this to a proper logging
# facility instead.
print(f"Chatbot error: {exc}", file=sys.stderr)
return jsonify({"error": str(exc)}), 500
@app.route('/parse_resume', methods=['POST'])
def parse_resume():
file = request.files.get('resume')
features, error, filepath = handle_resume_upload(file)
# If the upload failed, return an error. Parsing is no longer
# supported, so we do not attempt to inspect the resume contents.
if error:
return {"error": "Error processing resume. Please try again."}, 400
# If no features were extracted (the normal case now), respond with
# empty fields rather than an error. This preserves the API
# contract expected by any front‑end code that might call this
# endpoint.
if not features:
return {
"name": "",
"email": "",
"mobile_number": "",
"skills": [],
"experience": [],
"education": [],
"summary": ""
}, 200
# Should features contain values (unlikely in the new implementation),
# pass them through to the client.
response = {
"name": features.get('name', ''),
"email": features.get('email', ''),
"mobile_number": features.get('mobile_number', ''),
"skills": features.get('skills', []),
"experience": features.get('experience', []),
"education": features.get('education', []),
"summary": features.get('summary', '')
}
return response, 200
@app.route("/interview/<int:job_id>")
@login_required
def interview_page(job_id):
job = Job.query.get_or_404(job_id)
application = Application.query.filter_by(
user_id=current_user.id,
job_id=job_id
).first()
if not application or not application.extracted_features:
flash("Please apply for this job and upload your resume first.", "warning")
return redirect(url_for('job_detail', job_id=job_id))
cv_data = json.loads(application.extracted_features)
return render_template("interview.html", job=job, cv=cv_data)
# -----------------------------------------------------------------------------
# Recruiter job posting route
#
# Authenticated users with a recruiter or admin role can access this page to
# create new job listings. Posted jobs are associated with the current
# recruiter via the ``recruiter_id`` foreign key on the ``Job`` model.
@app.route('/post_job', methods=['GET', 'POST'])
@login_required
def post_job():
# Only allow recruiters and admins to post jobs
if current_user.role not in ('recruiter', 'admin'):
flash('You do not have permission to post jobs.', 'warning')
return redirect(url_for('jobs'))
if request.method == 'POST':
# Extract fields from the form
role_title = request.form.get('role', '').strip()
description = request.form.get('description', '').strip()
seniority = request.form.get('seniority', '').strip()
skills_input = request.form.get('skills', '').strip()
company = request.form.get('company', '').strip()
# Validate required fields
errors = []
if not role_title:
errors.append('Job title is required.')
if not description:
errors.append('Job description is required.')
if not seniority:
errors.append('Seniority level is required.')
if not skills_input:
errors.append('Skills are required.')
if not company:
errors.append('Company name is required.')
if errors:
for err in errors:
flash(err, 'danger')
return render_template('post_job.html')
# Normalise the skills input into a JSON encoded list. Users can
# separate entries with commas, semicolons or newlines.
skills_list = [s.strip() for s in re.split(r'[\n,;]+', skills_input) if s.strip()]
skills_json = json.dumps(skills_list)
# Create and persist the new job
new_job = Job(
role=role_title,
description=description,
seniority=seniority,
skills=skills_json,
company=company,
recruiter_id=current_user.id
)
db.session.add(new_job)
db.session.commit()
flash('Job posted successfully!', 'success')
return redirect(url_for('jobs'))
# GET request returns the form
return render_template('post_job.html')
# -----------------------------------------------------------------------------
# Recruiter dashboard route
#
# Displays a list of candidates who applied to jobs posted by the current
# recruiter. Candidates are sorted by a simple skill match score computed
# against the job requirements. A placeholder download button is provided
# for future PDF report functionality.
@app.route('/dashboard')
@login_required
def dashboard():
# Only recruiters and admins can view the dashboard
if current_user.role not in ('recruiter', 'admin'):
flash('You do not have permission to access the dashboard.', 'warning')
return redirect(url_for('index'))
# Fetch jobs posted by the current recruiter
posted_jobs = Job.query.filter_by(recruiter_id=current_user.id).all()
job_ids = [job.id for job in posted_jobs]
candidates_with_scores = []
if job_ids:
# Fetch applications associated with these job IDs
candidate_apps = Application.query.filter(Application.job_id.in_(job_ids)).all()
# Helper to compute a match score based on skills overlap
def compute_score(application):
try:
# Extract candidate skills from stored JSON
candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
candidate_skills = candidate_features.get('skills', [])
# Retrieve the job's required skills and parse from JSON
job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
if not job_skills:
return ('Medium', 2) # Default when job specifies no skills
# Compute case‑insensitive intersection
candidate_set = {s.lower() for s in candidate_skills}
job_set = {s.lower() for s in job_skills}
common = candidate_set & job_set
ratio = len(common) / len(job_set) if job_set else 0
# Map ratio to qualitative score
if ratio >= 0.75:
return ('Excellent', 4)
elif ratio >= 0.5:
return ('Good', 3)
elif ratio >= 0.25:
return ('Medium', 2)
else:
return ('Poor', 1)
except Exception:
return ('Medium', 2)
# Build a list of candidate applications with computed scores
for app_record in candidate_apps:
score_label, score_value = compute_score(app_record)
candidates_with_scores.append({
'application': app_record,
'score_label': score_label,
'score_value': score_value
})
# Sort candidates from highest to lowest score
candidates_with_scores.sort(key=lambda item: item['score_value'], reverse=True)
return render_template('dashboard.html', candidates=candidates_with_scores)
if __name__ == '__main__':
print("Starting Codingo application...")
with app.app_context():
db.create_all()
# Use port from environment or default to 7860
port = int(os.environ.get('PORT', 7860))
app.run(debug=True, host='0.0.0.0', port=port)