Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

Codingo / backend /services /report_generator.py

husseinelsaadi

updated

e0470f3 10 days ago

raw

history blame contribute delete

24.5 kB


	from __future__ import annotations
	import json
	from io import BytesIO
	import textwrap
	from typing import List, Dict, Any, Tuple
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_pdf import PdfPages
	import matplotlib.patches as mpatches
	from matplotlib.patches import Rectangle, FancyBboxPatch
	from datetime import datetime


	def generate_llm_interview_report(application) -> str:
	"""Generate a human‑readable interview report for a candidate.

	The report includes the candidate's name and email, job details,
	application date, a computed skills match summary and placeholder
	sections for future enhancements. If server‑side storage of
	question/answer pairs is added later, this function can be updated
	to incorporate those details.

	Parameters
	----------
	application : backend.models.database.Application
	The SQLAlchemy Application instance representing the candidate's
	job application. Assumed to have related ``job`` and
	``date_applied`` attributes available.

	Returns
	-------
	str
	A multi‑line string containing the report contents.
	"""
	# Defensive imports to avoid circular dependencies at import time
	try:
	from datetime import datetime # noqa: F401
	except Exception:
	pass

	# Extract candidate skills and job skills
	try:
	candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
	except Exception:
	candidate_features = {}

	candidate_skills: List[str] = candidate_features.get('skills', []) or []
	job_skills: List[str] = []
	try:
	job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
	except Exception:
	job_skills = []

	# Compute skills match ratio and label. Normalise to lower case for
	# comparison and avoid dividing by zero when ``job_skills`` is empty.
	candidate_set = {s.strip().lower() for s in candidate_skills}
	job_set = {s.strip().lower() for s in job_skills}
	common = candidate_set & job_set
	ratio = len(common) / len(job_set) if job_set else 0.0

	if ratio >= 0.75:
	score_label = 'Excellent'
	elif ratio >= 0.5:
	score_label = 'Good'
	elif ratio >= 0.25:
	score_label = 'Medium'
	else:
	score_label = 'Poor'

	# Assemble report lines
	lines: List[str] = []
	lines.append('Interview Report')
	lines.append('=================')
	lines.append('')
	lines.append(f'Candidate Name: {application.name}')
	lines.append(f'Candidate Email: {application.email}')
	if application.job:
	lines.append(f'Job Applied: {application.job.role}')
	lines.append(f'Company: {application.job.company}')
	else:
	lines.append('Job Applied: N/A')
	lines.append('Company: N/A')

	# Format date_applied if available
	try:
	date_str = application.date_applied.strftime('%Y-%m-%d') if application.date_applied else 'N/A'
	except Exception:
	date_str = 'N/A'
	lines.append(f'Date Applied: {date_str}')
	lines.append('')

	lines.append('Skills Match Summary:')
	# Represent required and candidate skills as comma‑separated lists. Use
	# title‑case for presentation and handle empty lists gracefully.
	formatted_job_skills = ', '.join(job_skills) if job_skills else 'N/A'
	formatted_candidate_skills = ', '.join(candidate_skills) if candidate_skills else 'N/A'
	formatted_common = ', '.join(sorted(common)) if common else 'None'

	lines.append(f' Required Skills: {formatted_job_skills}')
	lines.append(f' Candidate Skills: {formatted_candidate_skills}')
	lines.append(f' Skills in Common: {formatted_common}')
	lines.append(f' Match Ratio: {ratio * 100:.0f}%')
	lines.append(f' Score: {score_label}')
	lines.append('')

	lines.append('Interview Transcript & Evaluation:')
	try:
	if application.interview_log:
	try:
	qa_log = json.loads(application.interview_log)
	except Exception:
	qa_log = []

	if qa_log:
	for idx, entry in enumerate(qa_log, 1):
	q = entry.get("question", "N/A")
	a = entry.get("answer", "N/A")

	# Handle salary question specifically
	if "salary" in q.lower() and (a == "0$" or a == "0" or a == "$0"):
	a = "Prefer not to disclose"

	eval_score = entry.get("evaluation", {}).get("score", "N/A")
	eval_feedback = entry.get("evaluation", {}).get("feedback", "N/A")

	lines.append(f"\nQuestion {idx}: {q}")
	lines.append(f"Answer: {a}")
	lines.append(f"Score: {eval_score}")
	lines.append(f"Feedback: {eval_feedback}")
	else:
	lines.append("No interview log data recorded.")
	else:
	lines.append("No interview log data recorded.")
	except Exception as e:
	lines.append(f"Error loading interview log: {e}")

	return '\n'.join(lines)


	def create_pdf_report(report_text: str) -> BytesIO:
	"""Convert a formatted report into a clean, professional A4 PDF."""
	buffer = BytesIO()

	# A4 dimensions in inches (210mm x 297mm)
	A4_WIDTH = 8.27
	A4_HEIGHT = 11.69

	# Margins in inches
	LEFT_MARGIN = 0.75
	RIGHT_MARGIN = 0.75
	TOP_MARGIN = 0.75
	BOTTOM_MARGIN = 0.75

	# Calculate content area
	CONTENT_WIDTH = A4_WIDTH - LEFT_MARGIN - RIGHT_MARGIN
	CONTENT_HEIGHT = A4_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN

	# Professional color scheme - single accent color
	ACCENT_COLOR = '#1e40af' # Dark blue
	TEXT_COLOR = '#111827' # Dark gray/black
	LIGHT_GRAY = '#f8fafc' # Light background
	BORDER_COLOR = '#e2e8f0' # Light border

	# Parse report data
	report_data = _parse_report_text(report_text)

	# Create PDF
	with PdfPages(buffer) as pdf:
	# Page 1: Header, Candidate Info, and Skills Summary
	fig = plt.figure(figsize=(A4_WIDTH, A4_HEIGHT))
	fig.patch.set_facecolor('white')

	# Create main axis
	ax = fig.add_subplot(111)
	ax.set_xlim(0, A4_WIDTH)
	ax.set_ylim(0, A4_HEIGHT)
	ax.axis('off')

	# Current Y position (start from top)
	y_pos = A4_HEIGHT - TOP_MARGIN

	# === HEADER SECTION ===
	# Clean header with company info
	ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW ASSESSMENT REPORT',
	fontsize=20, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	# Date
	current_date = datetime.now().strftime('%B %d, %Y')
	ax.text(A4_WIDTH - RIGHT_MARGIN, y_pos, current_date,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
	horizontalalignment='right')

	y_pos -= 0.8

	# === CANDIDATE INFO AND OVERALL SCORE ===
	# Large overall score box (prominent)
	overall_score = _calculate_overall_score(report_data)
	score_color = _get_score_color(overall_score['label'])

	# Score box on the right
	score_box_width = 2.5
	score_box_height = 1.8
	score_x = A4_WIDTH - RIGHT_MARGIN - score_box_width

	# Score background
	score_rect = FancyBboxPatch(
	(score_x, y_pos - score_box_height), score_box_width, score_box_height,
	boxstyle="round,pad=0.1",
	facecolor=LIGHT_GRAY,
	edgecolor=ACCENT_COLOR,
	linewidth=2
	)
	ax.add_patch(score_rect)

	# Large score percentage
	ax.text(score_x + score_box_width/2, y_pos - 0.6, f"{overall_score['percentage']:.0f}%",
	fontsize=32, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif',
	horizontalalignment='center', verticalalignment='center')

	# Score label
	ax.text(score_x + score_box_width/2, y_pos - 1.2, 'OVERALL SCORE',
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
	horizontalalignment='center', fontweight='bold')

	ax.text(score_x + score_box_width/2, y_pos - 1.5, overall_score['label'].upper(),
	fontsize=14, fontweight='bold', color=score_color, fontfamily='sans-serif',
	horizontalalignment='center')

	# Candidate information on the left
	info_width = CONTENT_WIDTH - score_box_width - 0.5

	# Candidate name (large)
	ax.text(LEFT_MARGIN, y_pos - 0.3, report_data['candidate_name'],
	fontsize=18, fontweight='bold', color=TEXT_COLOR, fontfamily='sans-serif')

	# Position and company
	ax.text(LEFT_MARGIN, y_pos - 0.7, f"{report_data['job_role']} • {report_data['company']}",
	fontsize=12, color=TEXT_COLOR, fontfamily='sans-serif')

	# Email and date
	ax.text(LEFT_MARGIN, y_pos - 1.0, f"Email: {report_data['candidate_email']}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	ax.text(LEFT_MARGIN, y_pos - 1.3, f"Application Date: {report_data['date_applied']}",
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')

	y_pos -= 2.5

	# === SKILLS MATCH SUMMARY ===
	# Section header
	ax.text(LEFT_MARGIN, y_pos, 'SKILLS MATCH SUMMARY',
	fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	# Underline
	ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1],
	color=ACCENT_COLOR, linewidth=2)

	y_pos -= 0.5

	skills_data = report_data['skills_match']

	# Skills match percentage bar
	bar_width = CONTENT_WIDTH - 1
	bar_height = 0.3

	# Background bar
	bg_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), bar_width, bar_height,
	facecolor=LIGHT_GRAY, edgecolor=BORDER_COLOR)
	ax.add_patch(bg_rect)

	# Progress bar
	progress_width = bar_width * (skills_data['ratio'] / 100)
	progress_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), progress_width, bar_height,
	facecolor=ACCENT_COLOR, edgecolor='none')
	ax.add_patch(progress_rect)

	# Percentage text
	ax.text(LEFT_MARGIN + 0.5 + bar_width/2, y_pos - bar_height/2,
	f"{skills_data['ratio']:.0f}% Skills Match",
	fontsize=11, fontweight='bold', color='white', fontfamily='sans-serif',
	horizontalalignment='center', verticalalignment='center')

	y_pos -= 0.8

	# Skills details (simplified)
	required_text = f"Required Skills: {skills_data['required']}"
	for line in textwrap.wrap(required_text, width=85):
	ax.text(LEFT_MARGIN, y_pos, line,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
	y_pos -= 0.3

	y_pos -= 0.3

	candidate_text = f"Candidate Skills: {skills_data['candidate']}"
	for line in textwrap.wrap(candidate_text, width=85):
	ax.text(LEFT_MARGIN, y_pos, line,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
	y_pos -= 0.3


	y_pos -= 0.3

	matching_text = f"Matching Skills: {skills_data['common']}"
	for line in textwrap.wrap(matching_text, width=85):
	ax.text(LEFT_MARGIN, y_pos, line,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
	y_pos -= 0.3


	y_pos -= 0.8

	# === INTERVIEW TRANSCRIPT PREVIEW ===
	if report_data['qa_log']:
	ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW TRANSCRIPT',
	fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')

	# Underline
	ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1],
	color=ACCENT_COLOR, linewidth=2)

	y_pos -= 0.5

	# Show up to 3 Q&As on the first page. The number actually
	# displayed depends on available space. We track how many
	# questions we render so the remainder can be displayed on
	# subsequent pages without skipping any entries.
	max_qa_on_page1 = min(3, len(report_data['qa_log']))
	qa_count_on_page1 = 0
	for i in range(max_qa_on_page1):
	qa = report_data['qa_log'][i]

	# Check if we have space for the next Q&A. If not, break
	# early. The 2.2 constant accounts for the approximate
	# vertical space needed for a question, answer, evaluation
	# and some spacing. If insufficient space remains, we
	# stop adding to this page.
	if y_pos < BOTTOM_MARGIN + 2.2:
	break

	# Question number starts at 1 on the first page
	question_text = f"Q{qa_count_on_page1 + 1}: {qa['question']}"
	for line in textwrap.wrap(question_text, width=85):
	ax.text(LEFT_MARGIN, y_pos, line,
	fontsize=11, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
	y_pos -= 0.25
	y_pos -= 0.15 # extra spacing after question block

	# Answer. Mask salary disclosure if applicable.
	answer_text = qa['answer']
	if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
	answer_text = "Prefer not to disclose"

	wrapped_answer = textwrap.fill(answer_text, width=85)
	answer_lines = wrapped_answer.split('\n')[:2] # Max 2 lines
	for line in answer_lines:
	ax.text(LEFT_MARGIN + 0.3, y_pos, line,
	fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
	y_pos -= 0.25

	# Evaluation
	eval_color = _get_score_color(qa['score'])
	ax.text(LEFT_MARGIN + 0.3, y_pos, f"Evaluation: {qa['score']}",
	fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')
	y_pos -= 0.6

	qa_count_on_page1 += 1

	# Save first page
	pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
	plt.close(fig)

	# === PAGE 2: REMAINING TRANSCRIPT ===
	# Render the remainder of the Q&A log on additional pages. Use
	# qa_count_on_page1 (actual number shown on the first page) rather
	# than the theoretical max_qa_on_page1 so that no entries are
	# inadvertently skipped when the first page runs out of space.
	if report_data['qa_log'] and len(report_data['qa_log']) > qa_count_on_page1:
	_create_transcript_page(
	pdf,
	report_data['qa_log'][qa_count_on_page1:], # Continue from the next unanswered question
	A4_WIDTH, A4_HEIGHT,
	LEFT_MARGIN, RIGHT_MARGIN, TOP_MARGIN, BOTTOM_MARGIN,
	ACCENT_COLOR, TEXT_COLOR,
	start_index=qa_count_on_page1 + 1 # Correct numbering
	)


	buffer.seek(0)
	return buffer


	def _parse_report_text(report_text: str) -> Dict[str, Any]:
	"""Parse the text report into structured data."""
	lines = report_text.split('\n')
	data = {
	'candidate_name': 'N/A',
	'candidate_email': 'N/A',
	'job_role': 'N/A',
	'company': 'N/A',
	'date_applied': 'N/A',
	'skills_match': {
	'required': 'N/A',
	'candidate': 'N/A',
	'common': 'N/A',
	'ratio': 0,
	'score': 'N/A'
	},
	'qa_log': []
	}

	current_question = None

	for line in lines:
	line = line.strip()
	if line.startswith('Candidate Name:'):
	data['candidate_name'] = line.split(':', 1)[1].strip()
	elif line.startswith('Candidate Email:'):
	data['candidate_email'] = line.split(':', 1)[1].strip()
	elif line.startswith('Job Applied:'):
	data['job_role'] = line.split(':', 1)[1].strip()
	elif line.startswith('Company:'):
	data['company'] = line.split(':', 1)[1].strip()
	elif line.startswith('Date Applied:'):
	data['date_applied'] = line.split(':', 1)[1].strip()
	elif line.startswith('Required Skills:'):
	data['skills_match']['required'] = line.split(':', 1)[1].strip()
	elif line.startswith('Candidate Skills:'):
	data['skills_match']['candidate'] = line.split(':', 1)[1].strip()
	elif line.startswith('Skills in Common:'):
	data['skills_match']['common'] = line.split(':', 1)[1].strip()
	elif line.startswith('Match Ratio:'):
	try:
	data['skills_match']['ratio'] = float(line.split(':')[1].strip().rstrip('%'))
	except:
	data['skills_match']['ratio'] = 0
	elif line.startswith('Score:'):
	# Distinguish between the overall skills match score and per‑question scores.
	# If no question has been started yet (i.e. current_question is None),
	# interpret this Score line as the skills match score. Otherwise it
	# belongs to the most recent question.
	score_value = line.split(':', 1)[1].strip()
	if current_question is None:
	data['skills_match']['score'] = score_value
	else:
	current_question['score'] = score_value
	continue
	elif line.startswith('Question'):
	if current_question:
	data['qa_log'].append(current_question)
	current_question = {
	'question': line.split(':', 1)[1].strip() if ':' in line else line,
	'answer': '',
	'score': '',
	'feedback': ''
	}
	elif line.startswith('Answer:') and current_question:
	current_question['answer'] = line.split(':', 1)[1].strip()
	elif line.startswith('Feedback:') and current_question:
	current_question['feedback'] = line.split(':', 1)[1].strip()

	if current_question:
	data['qa_log'].append(current_question)

	return data


	def _calculate_overall_score(report_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Calculate overall score from skills match and QA scores."""
	# Skills match contributes 40%
	skills_ratio = report_data['skills_match']['ratio'] / 100

	# QA scores contribute 60%
	qa_scores = []
	for qa in report_data['qa_log']:
	score_text = qa['score'].lower()
	if 'excellent' in score_text or '5' in score_text or '10' in score_text:
	qa_scores.append(1.0)
	elif 'good' in score_text or '4' in score_text or '8' in score_text or '9' in score_text:
	qa_scores.append(0.8)
	elif 'satisfactory' in score_text or 'medium' in score_text or '3' in score_text or '6' in score_text or '7' in score_text:
	qa_scores.append(0.6)
	elif 'needs improvement' in score_text or 'poor' in score_text or '2' in score_text or '4' in score_text or '5' in score_text:
	qa_scores.append(0.4)
	else:
	qa_scores.append(0.2)

	qa_average = sum(qa_scores) / len(qa_scores) if qa_scores else 0.5

	# Calculate weighted average
	overall = (skills_ratio * 0.4) + (qa_average * 0.6)
	percentage = overall * 100

	if overall >= 0.8:
	label = 'Excellent'
	elif overall >= 0.65:
	label = 'Good'
	elif overall >= 0.45:
	label = 'Satisfactory'
	else:
	label = 'Needs Improvement'

	return {'percentage': percentage, 'label': label}


	def _get_score_color(score_label: str) -> str:
	"""Get color based on score label."""
	score_label = score_label.lower()
	if 'excellent' in score_label:
	return '#059669' # Green
	elif 'good' in score_label:
	return '#2563eb' # Blue
	elif 'medium' in score_label or 'satisfactory' in score_label:
	return '#d97706' # Orange
	else:
	return '#dc2626' # Red


	def _create_transcript_page(pdf, qa_log: List[Dict], page_width: float, page_height: float,
	left_margin: float, right_margin: float, top_margin: float, bottom_margin: float,
	accent_color: str, text_color: str, start_index: int = 1):
	"""Create a clean page for remaining interview transcript."""
	content_width = page_width - left_margin - right_margin

	fig = plt.figure(figsize=(page_width, page_height))
	fig.patch.set_facecolor('white')
	ax = fig.add_subplot(111)
	ax.set_xlim(0, page_width)
	ax.set_ylim(0, page_height)
	ax.axis('off')

	# Start from top
	y_pos = page_height - top_margin

	# Page header
	ax.text(left_margin, y_pos, 'INTERVIEW TRANSCRIPT (CONTINUED)',
	fontsize=14, fontweight='bold', color=accent_color, fontfamily='sans-serif')

	# Underline
	ax.plot([left_margin, left_margin + 4], [y_pos - 0.1, y_pos - 0.1],
	color=accent_color, linewidth=2)

	y_pos -= 0.8

	# Process remaining Q&As
	for i, qa in enumerate(qa_log):
	# Check if we have space for this Q&A
	if y_pos < bottom_margin + 1.5:
	break

	# Question
	question_text = f"Q{start_index + i}: {qa['question']}"
	wrapped_question = textwrap.fill(question_text, width=85)
	question_lines = wrapped_question.split('\n')

	for line in question_lines:
	ax.text(left_margin, y_pos, line,
	fontsize=11, fontweight='bold', color=accent_color, fontfamily='sans-serif')
	y_pos -= 0.3

	y_pos -= 0.1

	# Answer
	answer_text = qa['answer']
	if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
	answer_text = "Prefer not to disclose"

	wrapped_answer = textwrap.fill(answer_text, width=80)
	answer_lines = wrapped_answer.split('\n')

	for line in answer_lines[:3]: # Max 3 lines per answer
	ax.text(left_margin + 0.3, y_pos, line,
	fontsize=10, color=text_color, fontfamily='sans-serif')
	y_pos -= 0.25

	# Evaluation
	eval_color = _get_score_color(qa['score'])
	ax.text(left_margin + 0.3, y_pos, f"Evaluation: {qa['score']}",
	fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')

	y_pos -= 0.2

	# Feedback (if available and space permits)
	if qa['feedback'] and qa['feedback'] != 'N/A' and y_pos > bottom_margin + 0.8:
	feedback_text = f"Feedback: {qa['feedback']}"
	wrapped_feedback = textwrap.fill(feedback_text, width=75)
	feedback_lines = wrapped_feedback.split('\n')[:2] # Max 2 lines

	for line in feedback_lines:
	ax.text(left_margin + 0.3, y_pos, line,
	fontsize=9, color='#6b7280', fontfamily='sans-serif', style='italic')
	y_pos -= 0.2

	y_pos -= 0.4

	# Add separator line between questions
	if i < len(qa_log) - 1 and y_pos > bottom_margin + 1:
	ax.plot([left_margin + 0.5, left_margin + content_width - 0.5],
	[y_pos + 0.1, y_pos + 0.1],
	color='#e5e7eb', linewidth=0.5, linestyle='--')
	y_pos -= 0.3

	# Save page
	pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
	plt.close(fig)


	__all__ = ['generate_llm_interview_report', 'create_pdf_report']