Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

Codingo / backend /services /report_generator.py

husseinelsaadi

updated

b822cf0 13 days ago

raw

history blame

23.9 kB

	"""Utilities for assembling and exporting interview reports.

	This module provides two primary helpers used by the recruiter dashboard:

	``generate_llm_interview_report(application)``
	Given a candidate's ``Application`` record, assemble a plain‑text report
	summarising the interview. Because the interview process currently
	executes entirely client‑side and does not persist questions or answers
	to the database, this report focuses on the information available on
	the server: the candidate's profile, the job requirements and a skills
	match score. Should future iterations store richer interview data
	server‑side, this function can be extended to include question/answer
	transcripts, per‑question scores and LLM‑generated feedback.

	``create_pdf_report(report_text)``
	Convert a multi‑line string into a simple PDF. The implementation
	leverages Matplotlib's PDF backend (available by default) to avoid
	heavyweight dependencies such as ReportLab or WeasyPrint, which are
	absent from the runtime environment. Text is wrapped and split
	across multiple pages as necessary.
	"""

	from __future__ import annotations
	import json
	from io import BytesIO
	import textwrap
	from typing import List, Dict, Any, Tuple
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_pdf import PdfPages
	import matplotlib.patches as mpatches
	from matplotlib.patches import Rectangle, FancyBboxPatch
	from datetime import datetime


	def generate_llm_interview_report(application) -> str:
	"""Generate a human‑readable interview report for a candidate.

	The report includes the candidate's name and email, job details,
	application date, a computed skills match summary and placeholder
	sections for future enhancements. If server‑side storage of
	question/answer pairs is added later, this function can be updated
	to incorporate those details.

	Parameters
	----------
	application : backend.models.database.Application
	The SQLAlchemy Application instance representing the candidate's
	job application. Assumed to have related ``job`` and
	``date_applied`` attributes available.

	Returns
	-------
	str
	A multi‑line string containing the report contents.
	"""
	# Defensive imports to avoid circular dependencies at import time
	try:
	from datetime import datetime # noqa: F401
	except Exception:
	pass

	# Extract candidate skills and job skills
	try:
	candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
	except Exception:
	candidate_features = {}

	candidate_skills: List[str] = candidate_features.get('skills', []) or []
	job_skills: List[str] = []
	try:
	job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
	except Exception:
	job_skills = []

	# Compute skills match ratio and label. Normalise to lower case for
	# comparison and avoid dividing by zero when ``job_skills`` is empty.
	candidate_set = {s.strip().lower() for s in candidate_skills}
	job_set = {s.strip().lower() for s in job_skills}
	common = candidate_set & job_set
	ratio = len(common) / len(job_set) if job_set else 0.0

	if ratio >= 0.75:
	score_label = 'Excellent'
	elif ratio >= 0.5:
	score_label = 'Good'
	elif ratio >= 0.25:
	score_label = 'Medium'
	else:
	score_label = 'Poor'

	# Assemble report lines
	lines: List[str] = []
	lines.append('Interview Report')
	lines.append('=================')
	lines.append('')
	lines.append(f'Candidate Name: {application.name}')
	lines.append(f'Candidate Email: {application.email}')
	if application.job:
	lines.append(f'Job Applied: {application.job.role}')
	lines.append(f'Company: {application.job.company}')
	else:
	lines.append('Job Applied: N/A')
	lines.append('Company: N/A')

	# Format date_applied if available
	try:
	date_str = application.date_applied.strftime('%Y-%m-%d') if application.date_applied else 'N/A'
	except Exception:
	date_str = 'N/A'
	lines.append(f'Date Applied: {date_str}')
	lines.append('')

	lines.append('Skills Match Summary:')
	# Represent required and candidate skills as comma‑separated lists. Use
	# title‑case for presentation and handle empty lists gracefully.
	formatted_job_skills = ', '.join(job_skills) if job_skills else 'N/A'
	formatted_candidate_skills = ', '.join(candidate_skills) if candidate_skills else 'N/A'
	formatted_common = ', '.join(sorted(common)) if common else 'None'

	lines.append(f' Required Skills: {formatted_job_skills}')
	lines.append(f' Candidate Skills: {formatted_candidate_skills}')
	lines.append(f' Skills in Common: {formatted_common}')
	lines.append(f' Match Ratio: {ratio * 100:.0f}%')
	lines.append(f' Score: {score_label}')
	lines.append('')

	lines.append('Interview Transcript & Evaluation:')
	try:
	if application.interview_log:
	try:
	qa_log = json.loads(application.interview_log)
	except Exception:
	qa_log = []

	if qa_log:
	for idx, entry in enumerate(qa_log, 1):
	q = entry.get("question", "N/A")
	a = entry.get("answer", "N/A")

	# Handle salary question specifically
	if "salary" in q.lower() and (a == "0$" or a == "0" or a == "$0"):
	a = "Prefer not to disclose"

	eval_score = entry.get("evaluation", {}).get("score", "N/A")
	eval_feedback = entry.get("evaluation", {}).get("feedback", "N/A")

	lines.append(f"\nQuestion {idx}: {q}")
	lines.append(f"Answer: {a}")
	lines.append(f"Score: {eval_score}")
	lines.append(f"Feedback: {eval_feedback}")
	else:
	lines.append("No interview log data recorded.")
	else:
	lines.append("No interview log data recorded.")
	except Exception as e:
	lines.append(f"Error loading interview log: {e}")

	return '\n'.join(lines)


	def create_pdf_report(report_text: str) -> BytesIO:
	"""Convert a formatted report into a clean, professional A4 PDF."""
	buffer = BytesIO()

	# A4 dimensions in inches (210mm x 297mm)
	A4_WIDTH = 8.27
	A4_HEIGHT = 11.69

	# Margins in inches
	LEFT_MARGIN = 0.75
	RIGHT_MARGIN = 0.75
	TOP_MARGIN = 0.75
	BOTTOM_MARGIN = 0.75

	# Calculate content area
	CONTENT_WIDTH = A4_WIDTH - LEFT_MARGIN - RIGHT_MARGIN
	CONTENT_HEIGHT = A4_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN

	# Professional color scheme - single accent color
	ACCENT_COLOR = '#1e40af' # Dark blue
	TEXT_COLOR = '#111827' # Dark gray/black
	LIGHT_GRAY = '#f8fafc' # Light background
	BORDER_COLOR = '#e2e8f0' # Light border

	# Parse report data
	report_data = _parse_report_text(report_text)

	# Create PDF
	with PdfPages(buffer) as pdf:
	# Page 1: Header, Candidate Info, and Skills Summary
	fig = plt.figure(figsize=(A4_WIDTH, A4_HEIGHT))
	fig.patch.set_facecolor('white')

	# Create main axis
	ax = fig.add_subplot(111)
	ax.set_xlim(0, A4_WIDTH)
	ax.set_ylim(0, A4_HEIGHT)
	ax.axis('off')

	# Current Y position (start from top)
	y_pos = A4_HEIGHT - TOP_MARGIN

	# === HEADER SECTION ===
	# Clean header with company info
	ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW ASSESSMENT REPORT',
	fontsize=20, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	# Date
	current_date = datetime.now().strftime('%B %d, %Y')
	ax.text(A4_WIDTH - RIGHT_MARGIN, y_pos, current_date,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
	horizontalalignment='right')

	y_pos -= 0.8

	# === CANDIDATE INFO AND OVERALL SCORE ===
	# Large overall score box (prominent)
	overall_score = _calculate_overall_score(report_data)
	score_color = _get_score_color(overall_score['label'])

	# Score box on the right
	score_box_width = 2.5
	score_box_height = 1.8
	score_x = A4_WIDTH - RIGHT_MARGIN - score_box_width

	# Score background
	score_rect = FancyBboxPatch(
	(score_x, y_pos - score_box_height), score_box_width, score_box_height,
	boxstyle="round,pad=0.1",
	facecolor=LIGHT_GRAY,
	edgecolor=ACCENT_COLOR,
	linewidth=2
	)
	ax.add_patch(score_rect)

	# Large score percentage
	ax.text(score_x + score_box_width/2, y_pos - 0.6, f"{overall_score['percentage']:.0f}%",
	fontsize=32, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif',
	horizontalalignment='center', verticalalignment='center')

	# Score label
	ax.text(score_x + score_box_width/2, y_pos - 1.2, 'OVERALL SCORE',
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
	horizontalalignment='center', fontweight='bold')

	ax.text(score_x + score_box_width/2, y_pos - 1.5, overall_score['label'].upper(),
	fontsize=14, fontweight='bold', color=score_color, fontfamily='sans-serif',
	horizontalalignment='center')

	# Candidate information on the left
	info_width = CONTENT_WIDTH - score_box_width - 0.5

	# Candidate name (large)
	ax.text(LEFT_MARGIN, y_pos - 0.3, report_data['candidate_name'],
	fontsize=18, fontweight='bold', color=TEXT_COLOR, fontfamily='sans-serif')

	# Position and company
	ax.text(LEFT_MARGIN, y_pos - 0.7, f"{report_data['job_role']} • {report_data['company']}",
	fontsize=12, color=TEXT_COLOR, fontfamily='sans-serif')

	# Email and date
	ax.text(LEFT_MARGIN, y_pos - 1.0, f"Email: {report_data['candidate_email']}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	ax.text(LEFT_MARGIN, y_pos - 1.3, f"Application Date: {report_data['date_applied']}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	y_pos -= 2.5

	# === SKILLS MATCH SUMMARY ===
	# Section header
	ax.text(LEFT_MARGIN, y_pos, 'SKILLS MATCH SUMMARY',
	fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	# Underline
	ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1],
	color=ACCENT_COLOR, linewidth=2)

	y_pos -= 0.5

	skills_data = report_data['skills_match']

	# Skills match percentage bar
	bar_width = CONTENT_WIDTH - 1
	bar_height = 0.3

	# Background bar
	bg_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), bar_width, bar_height,
	facecolor=LIGHT_GRAY, edgecolor=BORDER_COLOR)
	ax.add_patch(bg_rect)

	# Progress bar
	progress_width = bar_width * (skills_data['ratio'] / 100)
	progress_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), progress_width, bar_height,
	facecolor=ACCENT_COLOR, edgecolor='none')
	ax.add_patch(progress_rect)

	# Percentage text
	ax.text(LEFT_MARGIN + 0.5 + bar_width/2, y_pos - bar_height/2,
	f"{skills_data['ratio']:.0f}% Skills Match",
	fontsize=11, fontweight='bold', color='white', fontfamily='sans-serif',
	horizontalalignment='center', verticalalignment='center')

	y_pos -= 0.8

	# Skills details (simplified)
	ax.text(LEFT_MARGIN, y_pos, f"Required Skills: {skills_data['required'][:80]}{'...' if len(skills_data['required']) > 80 else ''}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	y_pos -= 0.3

	ax.text(LEFT_MARGIN, y_pos, f"Candidate Skills: {skills_data['candidate'][:80]}{'...' if len(skills_data['candidate']) > 80 else ''}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	y_pos -= 0.3

	ax.text(LEFT_MARGIN, y_pos, f"Matching Skills: {skills_data['common'][:80]}{'...' if len(skills_data['common']) > 80 else ''}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	y_pos -= 0.8

	# === INTERVIEW TRANSCRIPT PREVIEW ===
	if report_data['qa_log']:
	ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW TRANSCRIPT',
	fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	# Underline
	ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1],
	color=ACCENT_COLOR, linewidth=2)

	y_pos -= 0.5

	# Show first 2-3 Q&As on first page
	max_qa_on_page1 = min(3, len(report_data['qa_log']))

	for i in range(max_qa_on_page1):
	qa = report_data['qa_log'][i]

	# Check if we have space
	if y_pos < BOTTOM_MARGIN + 2:
	break

	# Question
	ax.text(LEFT_MARGIN, y_pos, f"Q{i+1}: {qa['question'][:90]}{'...' if len(qa['question']) > 90 else ''}",
	fontsize=11, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	y_pos -= 0.4

	# Answer
	answer_text = qa['answer']
	if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
	answer_text = "Prefer not to disclose"

	wrapped_answer = textwrap.fill(answer_text, width=85)
	answer_lines = wrapped_answer.split('\n')[:2] # Max 2 lines

	for line in answer_lines:
	ax.text(LEFT_MARGIN + 0.3, y_pos, line,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
	y_pos -= 0.25

	# Evaluation
	eval_color = _get_score_color(qa['score'])
	ax.text(LEFT_MARGIN + 0.3, y_pos, f"Evaluation: {qa['score']}",
	fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')

	y_pos -= 0.6

	# Save first page
	pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
	plt.close(fig)

	# === PAGE 2: REMAINING TRANSCRIPT ===
	if report_data['qa_log'] and len(report_data['qa_log']) > 3:
	_create_transcript_page(pdf, report_data['qa_log'][3:], A4_WIDTH, A4_HEIGHT,
	LEFT_MARGIN, RIGHT_MARGIN, TOP_MARGIN, BOTTOM_MARGIN,
	ACCENT_COLOR, TEXT_COLOR, start_index=4)

	buffer.seek(0)
	return buffer


	def _parse_report_text(report_text: str) -> Dict[str, Any]:
	"""Parse the text report into structured data."""
	lines = report_text.split('\n')
	data = {
	'candidate_name': 'N/A',
	'candidate_email': 'N/A',
	'job_role': 'N/A',
	'company': 'N/A',
	'date_applied': 'N/A',
	'skills_match': {
	'required': 'N/A',
	'candidate': 'N/A',
	'common': 'N/A',
	'ratio': 0,
	'score': 'N/A'
	},
	'qa_log': []
	}

	current_question = None

	for line in lines:
	line = line.strip()
	if line.startswith('Candidate Name:'):
	data['candidate_name'] = line.split(':', 1)[1].strip()
	elif line.startswith('Candidate Email:'):
	data['candidate_email'] = line.split(':', 1)[1].strip()
	elif line.startswith('Job Applied:'):
	data['job_role'] = line.split(':', 1)[1].strip()
	elif line.startswith('Company:'):
	data['company'] = line.split(':', 1)[1].strip()
	elif line.startswith('Date Applied:'):
	data['date_applied'] = line.split(':', 1)[1].strip()
	elif line.startswith('Required Skills:'):
	data['skills_match']['required'] = line.split(':', 1)[1].strip()
	elif line.startswith('Candidate Skills:'):
	data['skills_match']['candidate'] = line.split(':', 1)[1].strip()
	elif line.startswith('Skills in Common:'):
	data['skills_match']['common'] = line.split(':', 1)[1].strip()
	elif line.startswith('Match Ratio:'):
	try:
	data['skills_match']['ratio'] = float(line.split(':')[1].strip().rstrip('%'))
	except:
	data['skills_match']['ratio'] = 0
	elif line.startswith('Score:') and 'skills_match' in str(data):
	data['skills_match']['score'] = line.split(':', 1)[1].strip()
	elif line.startswith('Question'):
	if current_question:
	data['qa_log'].append(current_question)
	current_question = {
	'question': line.split(':', 1)[1].strip() if ':' in line else line,
	'answer': '',
	'score': '',
	'feedback': ''
	}
	elif line.startswith('Answer:') and current_question:
	current_question['answer'] = line.split(':', 1)[1].strip()
	elif line.startswith('Score:') and current_question:
	current_question['score'] = line.split(':', 1)[1].strip()
	elif line.startswith('Feedback:') and current_question:
	current_question['feedback'] = line.split(':', 1)[1].strip()

	if current_question:
	data['qa_log'].append(current_question)

	return data


	def _calculate_overall_score(report_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Calculate overall score from skills match and QA scores."""
	# Skills match contributes 40%
	skills_ratio = report_data['skills_match']['ratio'] / 100

	# QA scores contribute 60%
	qa_scores = []
	for qa in report_data['qa_log']:
	score_text = qa['score'].lower()
	if 'excellent' in score_text or '5' in score_text or '10' in score_text:
	qa_scores.append(1.0)
	elif 'good' in score_text or '4' in score_text or '8' in score_text or '9' in score_text:
	qa_scores.append(0.8)
	elif 'satisfactory' in score_text or 'medium' in score_text or '3' in score_text or '6' in score_text or '7' in score_text:
	qa_scores.append(0.6)
	elif 'needs improvement' in score_text or 'poor' in score_text or '2' in score_text or '4' in score_text or '5' in score_text:
	qa_scores.append(0.4)
	else:
	qa_scores.append(0.2)

	qa_average = sum(qa_scores) / len(qa_scores) if qa_scores else 0.5

	# Calculate weighted average
	overall = (skills_ratio * 0.4) + (qa_average * 0.6)
	percentage = overall * 100

	if overall >= 0.8:
	label = 'Excellent'
	elif overall >= 0.65:
	label = 'Good'
	elif overall >= 0.45:
	label = 'Satisfactory'
	else:
	label = 'Needs Improvement'

	return {'percentage': percentage, 'label': label}


	def _get_score_color(score_label: str) -> str:
	"""Get color based on score label."""
	score_label = score_label.lower()
	if 'excellent' in score_label:
	return '#059669' # Green
	elif 'good' in score_label:
	return '#2563eb' # Blue
	elif 'medium' in score_label or 'satisfactory' in score_label:
	return '#d97706' # Orange
	else:
	return '#dc2626' # Red


	def _create_transcript_page(pdf, qa_log: List[Dict], page_width: float, page_height: float,
	left_margin: float, right_margin: float, top_margin: float, bottom_margin: float,
	accent_color: str, text_color: str, start_index: int = 1):
	"""Create a clean page for remaining interview transcript."""
	content_width = page_width - left_margin - right_margin

	fig = plt.figure(figsize=(page_width, page_height))
	fig.patch.set_facecolor('white')
	ax = fig.add_subplot(111)
	ax.set_xlim(0, page_width)
	ax.set_ylim(0, page_height)
	ax.axis('off')

	# Start from top
	y_pos = page_height - top_margin

	# Page header
	ax.text(left_margin, y_pos, 'INTERVIEW TRANSCRIPT (CONTINUED)',
	fontsize=14, fontweight='bold', color=accent_color, fontfamily='sans-serif')

	# Underline
	ax.plot([left_margin, left_margin + 4], [y_pos - 0.1, y_pos - 0.1],
	color=accent_color, linewidth=2)

	y_pos -= 0.8

	# Process remaining Q&As
	for i, qa in enumerate(qa_log):
	# Check if we have space for this Q&A
	if y_pos < bottom_margin + 1.5:
	break

	# Question
	question_text = f"Q{start_index + i}: {qa['question']}"
	wrapped_question = textwrap.fill(question_text, width=85)
	question_lines = wrapped_question.split('\n')

	for line in question_lines:
	ax.text(left_margin, y_pos, line,
	fontsize=11, fontweight='bold', color=accent_color, fontfamily='sans-serif')
	y_pos -= 0.3

	y_pos -= 0.1

	# Answer
	answer_text = qa['answer']
	if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
	answer_text = "Prefer not to disclose"

	wrapped_answer = textwrap.fill(answer_text, width=80)
	answer_lines = wrapped_answer.split('\n')

	for line in answer_lines[:3]: # Max 3 lines per answer
	ax.text(left_margin + 0.3, y_pos, line,
	fontsize=10, color=text_color, fontfamily='sans-serif')
	y_pos -= 0.25

	# Evaluation
	eval_color = _get_score_color(qa['score'])
	ax.text(left_margin + 0.3, y_pos, f"Evaluation: {qa['score']}",
	fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')

	y_pos -= 0.2

	# Feedback (if available and space permits)
	if qa['feedback'] and qa['feedback'] != 'N/A' and y_pos > bottom_margin + 0.8:
	feedback_text = f"Feedback: {qa['feedback']}"
	wrapped_feedback = textwrap.fill(feedback_text, width=75)
	feedback_lines = wrapped_feedback.split('\n')[:2] # Max 2 lines

	for line in feedback_lines:
	ax.text(left_margin + 0.3, y_pos, line,
	fontsize=9, color='#6b7280', fontfamily='sans-serif', style='italic')
	y_pos -= 0.2

	y_pos -= 0.4

	# Add separator line between questions
	if i < len(qa_log) - 1 and y_pos > bottom_margin + 1:
	ax.plot([left_margin + 0.5, left_margin + content_width - 0.5],
	[y_pos + 0.1, y_pos + 0.1],
	color='#e5e7eb', linewidth=0.5, linestyle='--')
	y_pos -= 0.3

	# Save page
	pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
	plt.close(fig)


	__all__ = ['generate_llm_interview_report', 'create_pdf_report']