Codingo / backend /services /report_generator.py
husseinelsaadi's picture
updated
e0470f3
from __future__ import annotations
import json
from io import BytesIO
import textwrap
from typing import List, Dict, Any, Tuple
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.patches import Rectangle, FancyBboxPatch
from datetime import datetime
def generate_llm_interview_report(application) -> str:
"""Generate a human‑readable interview report for a candidate.
The report includes the candidate's name and email, job details,
application date, a computed skills match summary and placeholder
sections for future enhancements. If server‑side storage of
question/answer pairs is added later, this function can be updated
to incorporate those details.
Parameters
----------
application : backend.models.database.Application
The SQLAlchemy Application instance representing the candidate's
job application. Assumed to have related ``job`` and
``date_applied`` attributes available.
Returns
-------
str
A multi‑line string containing the report contents.
"""
# Defensive imports to avoid circular dependencies at import time
try:
from datetime import datetime # noqa: F401
except Exception:
pass
# Extract candidate skills and job skills
try:
candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
except Exception:
candidate_features = {}
candidate_skills: List[str] = candidate_features.get('skills', []) or []
job_skills: List[str] = []
try:
job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
except Exception:
job_skills = []
# Compute skills match ratio and label. Normalise to lower case for
# comparison and avoid dividing by zero when ``job_skills`` is empty.
candidate_set = {s.strip().lower() for s in candidate_skills}
job_set = {s.strip().lower() for s in job_skills}
common = candidate_set & job_set
ratio = len(common) / len(job_set) if job_set else 0.0
if ratio >= 0.75:
score_label = 'Excellent'
elif ratio >= 0.5:
score_label = 'Good'
elif ratio >= 0.25:
score_label = 'Medium'
else:
score_label = 'Poor'
# Assemble report lines
lines: List[str] = []
lines.append('Interview Report')
lines.append('=================')
lines.append('')
lines.append(f'Candidate Name: {application.name}')
lines.append(f'Candidate Email: {application.email}')
if application.job:
lines.append(f'Job Applied: {application.job.role}')
lines.append(f'Company: {application.job.company}')
else:
lines.append('Job Applied: N/A')
lines.append('Company: N/A')
# Format date_applied if available
try:
date_str = application.date_applied.strftime('%Y-%m-%d') if application.date_applied else 'N/A'
except Exception:
date_str = 'N/A'
lines.append(f'Date Applied: {date_str}')
lines.append('')
lines.append('Skills Match Summary:')
# Represent required and candidate skills as comma‑separated lists. Use
# title‑case for presentation and handle empty lists gracefully.
formatted_job_skills = ', '.join(job_skills) if job_skills else 'N/A'
formatted_candidate_skills = ', '.join(candidate_skills) if candidate_skills else 'N/A'
formatted_common = ', '.join(sorted(common)) if common else 'None'
lines.append(f' Required Skills: {formatted_job_skills}')
lines.append(f' Candidate Skills: {formatted_candidate_skills}')
lines.append(f' Skills in Common: {formatted_common}')
lines.append(f' Match Ratio: {ratio * 100:.0f}%')
lines.append(f' Score: {score_label}')
lines.append('')
lines.append('Interview Transcript & Evaluation:')
try:
if application.interview_log:
try:
qa_log = json.loads(application.interview_log)
except Exception:
qa_log = []
if qa_log:
for idx, entry in enumerate(qa_log, 1):
q = entry.get("question", "N/A")
a = entry.get("answer", "N/A")
# Handle salary question specifically
if "salary" in q.lower() and (a == "0$" or a == "0" or a == "$0"):
a = "Prefer not to disclose"
eval_score = entry.get("evaluation", {}).get("score", "N/A")
eval_feedback = entry.get("evaluation", {}).get("feedback", "N/A")
lines.append(f"\nQuestion {idx}: {q}")
lines.append(f"Answer: {a}")
lines.append(f"Score: {eval_score}")
lines.append(f"Feedback: {eval_feedback}")
else:
lines.append("No interview log data recorded.")
else:
lines.append("No interview log data recorded.")
except Exception as e:
lines.append(f"Error loading interview log: {e}")
return '\n'.join(lines)
def create_pdf_report(report_text: str) -> BytesIO:
"""Convert a formatted report into a clean, professional A4 PDF."""
buffer = BytesIO()
# A4 dimensions in inches (210mm x 297mm)
A4_WIDTH = 8.27
A4_HEIGHT = 11.69
# Margins in inches
LEFT_MARGIN = 0.75
RIGHT_MARGIN = 0.75
TOP_MARGIN = 0.75
BOTTOM_MARGIN = 0.75
# Calculate content area
CONTENT_WIDTH = A4_WIDTH - LEFT_MARGIN - RIGHT_MARGIN
CONTENT_HEIGHT = A4_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN
# Professional color scheme - single accent color
ACCENT_COLOR = '#1e40af' # Dark blue
TEXT_COLOR = '#111827' # Dark gray/black
LIGHT_GRAY = '#f8fafc' # Light background
BORDER_COLOR = '#e2e8f0' # Light border
# Parse report data
report_data = _parse_report_text(report_text)
# Create PDF
with PdfPages(buffer) as pdf:
# Page 1: Header, Candidate Info, and Skills Summary
fig = plt.figure(figsize=(A4_WIDTH, A4_HEIGHT))
fig.patch.set_facecolor('white')
# Create main axis
ax = fig.add_subplot(111)
ax.set_xlim(0, A4_WIDTH)
ax.set_ylim(0, A4_HEIGHT)
ax.axis('off')
# Current Y position (start from top)
y_pos = A4_HEIGHT - TOP_MARGIN
# === HEADER SECTION ===
# Clean header with company info
ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW ASSESSMENT REPORT',
fontsize=20, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
# Date
current_date = datetime.now().strftime('%B %d, %Y')
ax.text(A4_WIDTH - RIGHT_MARGIN, y_pos, current_date,
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
horizontalalignment='right')
y_pos -= 0.8
# === CANDIDATE INFO AND OVERALL SCORE ===
# Large overall score box (prominent)
overall_score = _calculate_overall_score(report_data)
score_color = _get_score_color(overall_score['label'])
# Score box on the right
score_box_width = 2.5
score_box_height = 1.8
score_x = A4_WIDTH - RIGHT_MARGIN - score_box_width
# Score background
score_rect = FancyBboxPatch(
(score_x, y_pos - score_box_height), score_box_width, score_box_height,
boxstyle="round,pad=0.1",
facecolor=LIGHT_GRAY,
edgecolor=ACCENT_COLOR,
linewidth=2
)
ax.add_patch(score_rect)
# Large score percentage
ax.text(score_x + score_box_width/2, y_pos - 0.6, f"{overall_score['percentage']:.0f}%",
fontsize=32, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif',
horizontalalignment='center', verticalalignment='center')
# Score label
ax.text(score_x + score_box_width/2, y_pos - 1.2, 'OVERALL SCORE',
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
horizontalalignment='center', fontweight='bold')
ax.text(score_x + score_box_width/2, y_pos - 1.5, overall_score['label'].upper(),
fontsize=14, fontweight='bold', color=score_color, fontfamily='sans-serif',
horizontalalignment='center')
# Candidate information on the left
info_width = CONTENT_WIDTH - score_box_width - 0.5
# Candidate name (large)
ax.text(LEFT_MARGIN, y_pos - 0.3, report_data['candidate_name'],
fontsize=18, fontweight='bold', color=TEXT_COLOR, fontfamily='sans-serif')
# Position and company
ax.text(LEFT_MARGIN, y_pos - 0.7, f"{report_data['job_role']}{report_data['company']}",
fontsize=12, color=TEXT_COLOR, fontfamily='sans-serif')
# Email and date
ax.text(LEFT_MARGIN, y_pos - 1.0, f"Email: {report_data['candidate_email']}",
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
ax.text(LEFT_MARGIN, y_pos - 1.3, f"Application Date: {report_data['date_applied']}",
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
y_pos -= 2.5
# === SKILLS MATCH SUMMARY ===
# Section header
ax.text(LEFT_MARGIN, y_pos, 'SKILLS MATCH SUMMARY',
fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
# Underline
ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1],
color=ACCENT_COLOR, linewidth=2)
y_pos -= 0.5
skills_data = report_data['skills_match']
# Skills match percentage bar
bar_width = CONTENT_WIDTH - 1
bar_height = 0.3
# Background bar
bg_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), bar_width, bar_height,
facecolor=LIGHT_GRAY, edgecolor=BORDER_COLOR)
ax.add_patch(bg_rect)
# Progress bar
progress_width = bar_width * (skills_data['ratio'] / 100)
progress_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), progress_width, bar_height,
facecolor=ACCENT_COLOR, edgecolor='none')
ax.add_patch(progress_rect)
# Percentage text
ax.text(LEFT_MARGIN + 0.5 + bar_width/2, y_pos - bar_height/2,
f"{skills_data['ratio']:.0f}% Skills Match",
fontsize=11, fontweight='bold', color='white', fontfamily='sans-serif',
horizontalalignment='center', verticalalignment='center')
y_pos -= 0.8
# Skills details (simplified)
required_text = f"Required Skills: {skills_data['required']}"
for line in textwrap.wrap(required_text, width=85):
ax.text(LEFT_MARGIN, y_pos, line,
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
y_pos -= 0.3
y_pos -= 0.3
candidate_text = f"Candidate Skills: {skills_data['candidate']}"
for line in textwrap.wrap(candidate_text, width=85):
ax.text(LEFT_MARGIN, y_pos, line,
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
y_pos -= 0.3
y_pos -= 0.3
matching_text = f"Matching Skills: {skills_data['common']}"
for line in textwrap.wrap(matching_text, width=85):
ax.text(LEFT_MARGIN, y_pos, line,
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
y_pos -= 0.3
y_pos -= 0.8
# === INTERVIEW TRANSCRIPT PREVIEW ===
if report_data['qa_log']:
ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW TRANSCRIPT',
fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
# Underline
ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1],
color=ACCENT_COLOR, linewidth=2)
y_pos -= 0.5
# Show up to 3 Q&As on the first page. The number actually
# displayed depends on available space. We track how many
# questions we render so the remainder can be displayed on
# subsequent pages without skipping any entries.
max_qa_on_page1 = min(3, len(report_data['qa_log']))
qa_count_on_page1 = 0
for i in range(max_qa_on_page1):
qa = report_data['qa_log'][i]
# Check if we have space for the next Q&A. If not, break
# early. The 2.2 constant accounts for the approximate
# vertical space needed for a question, answer, evaluation
# and some spacing. If insufficient space remains, we
# stop adding to this page.
if y_pos < BOTTOM_MARGIN + 2.2:
break
# Question number starts at 1 on the first page
question_text = f"Q{qa_count_on_page1 + 1}: {qa['question']}"
for line in textwrap.wrap(question_text, width=85):
ax.text(LEFT_MARGIN, y_pos, line,
fontsize=11, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
y_pos -= 0.25
y_pos -= 0.15 # extra spacing after question block
# Answer. Mask salary disclosure if applicable.
answer_text = qa['answer']
if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
answer_text = "Prefer not to disclose"
wrapped_answer = textwrap.fill(answer_text, width=85)
answer_lines = wrapped_answer.split('\n')[:2] # Max 2 lines
for line in answer_lines:
ax.text(LEFT_MARGIN + 0.3, y_pos, line,
fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
y_pos -= 0.25
# Evaluation
eval_color = _get_score_color(qa['score'])
ax.text(LEFT_MARGIN + 0.3, y_pos, f"Evaluation: {qa['score']}",
fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')
y_pos -= 0.6
qa_count_on_page1 += 1
# Save first page
pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
plt.close(fig)
# === PAGE 2: REMAINING TRANSCRIPT ===
# Render the remainder of the Q&A log on additional pages. Use
# qa_count_on_page1 (actual number shown on the first page) rather
# than the theoretical max_qa_on_page1 so that no entries are
# inadvertently skipped when the first page runs out of space.
if report_data['qa_log'] and len(report_data['qa_log']) > qa_count_on_page1:
_create_transcript_page(
pdf,
report_data['qa_log'][qa_count_on_page1:], # Continue from the next unanswered question
A4_WIDTH, A4_HEIGHT,
LEFT_MARGIN, RIGHT_MARGIN, TOP_MARGIN, BOTTOM_MARGIN,
ACCENT_COLOR, TEXT_COLOR,
start_index=qa_count_on_page1 + 1 # Correct numbering
)
buffer.seek(0)
return buffer
def _parse_report_text(report_text: str) -> Dict[str, Any]:
"""Parse the text report into structured data."""
lines = report_text.split('\n')
data = {
'candidate_name': 'N/A',
'candidate_email': 'N/A',
'job_role': 'N/A',
'company': 'N/A',
'date_applied': 'N/A',
'skills_match': {
'required': 'N/A',
'candidate': 'N/A',
'common': 'N/A',
'ratio': 0,
'score': 'N/A'
},
'qa_log': []
}
current_question = None
for line in lines:
line = line.strip()
if line.startswith('Candidate Name:'):
data['candidate_name'] = line.split(':', 1)[1].strip()
elif line.startswith('Candidate Email:'):
data['candidate_email'] = line.split(':', 1)[1].strip()
elif line.startswith('Job Applied:'):
data['job_role'] = line.split(':', 1)[1].strip()
elif line.startswith('Company:'):
data['company'] = line.split(':', 1)[1].strip()
elif line.startswith('Date Applied:'):
data['date_applied'] = line.split(':', 1)[1].strip()
elif line.startswith('Required Skills:'):
data['skills_match']['required'] = line.split(':', 1)[1].strip()
elif line.startswith('Candidate Skills:'):
data['skills_match']['candidate'] = line.split(':', 1)[1].strip()
elif line.startswith('Skills in Common:'):
data['skills_match']['common'] = line.split(':', 1)[1].strip()
elif line.startswith('Match Ratio:'):
try:
data['skills_match']['ratio'] = float(line.split(':')[1].strip().rstrip('%'))
except:
data['skills_match']['ratio'] = 0
elif line.startswith('Score:'):
# Distinguish between the overall skills match score and per‑question scores.
# If no question has been started yet (i.e. current_question is None),
# interpret this Score line as the skills match score. Otherwise it
# belongs to the most recent question.
score_value = line.split(':', 1)[1].strip()
if current_question is None:
data['skills_match']['score'] = score_value
else:
current_question['score'] = score_value
continue
elif line.startswith('Question'):
if current_question:
data['qa_log'].append(current_question)
current_question = {
'question': line.split(':', 1)[1].strip() if ':' in line else line,
'answer': '',
'score': '',
'feedback': ''
}
elif line.startswith('Answer:') and current_question:
current_question['answer'] = line.split(':', 1)[1].strip()
elif line.startswith('Feedback:') and current_question:
current_question['feedback'] = line.split(':', 1)[1].strip()
if current_question:
data['qa_log'].append(current_question)
return data
def _calculate_overall_score(report_data: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate overall score from skills match and QA scores."""
# Skills match contributes 40%
skills_ratio = report_data['skills_match']['ratio'] / 100
# QA scores contribute 60%
qa_scores = []
for qa in report_data['qa_log']:
score_text = qa['score'].lower()
if 'excellent' in score_text or '5' in score_text or '10' in score_text:
qa_scores.append(1.0)
elif 'good' in score_text or '4' in score_text or '8' in score_text or '9' in score_text:
qa_scores.append(0.8)
elif 'satisfactory' in score_text or 'medium' in score_text or '3' in score_text or '6' in score_text or '7' in score_text:
qa_scores.append(0.6)
elif 'needs improvement' in score_text or 'poor' in score_text or '2' in score_text or '4' in score_text or '5' in score_text:
qa_scores.append(0.4)
else:
qa_scores.append(0.2)
qa_average = sum(qa_scores) / len(qa_scores) if qa_scores else 0.5
# Calculate weighted average
overall = (skills_ratio * 0.4) + (qa_average * 0.6)
percentage = overall * 100
if overall >= 0.8:
label = 'Excellent'
elif overall >= 0.65:
label = 'Good'
elif overall >= 0.45:
label = 'Satisfactory'
else:
label = 'Needs Improvement'
return {'percentage': percentage, 'label': label}
def _get_score_color(score_label: str) -> str:
"""Get color based on score label."""
score_label = score_label.lower()
if 'excellent' in score_label:
return '#059669' # Green
elif 'good' in score_label:
return '#2563eb' # Blue
elif 'medium' in score_label or 'satisfactory' in score_label:
return '#d97706' # Orange
else:
return '#dc2626' # Red
def _create_transcript_page(pdf, qa_log: List[Dict], page_width: float, page_height: float,
left_margin: float, right_margin: float, top_margin: float, bottom_margin: float,
accent_color: str, text_color: str, start_index: int = 1):
"""Create a clean page for remaining interview transcript."""
content_width = page_width - left_margin - right_margin
fig = plt.figure(figsize=(page_width, page_height))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(111)
ax.set_xlim(0, page_width)
ax.set_ylim(0, page_height)
ax.axis('off')
# Start from top
y_pos = page_height - top_margin
# Page header
ax.text(left_margin, y_pos, 'INTERVIEW TRANSCRIPT (CONTINUED)',
fontsize=14, fontweight='bold', color=accent_color, fontfamily='sans-serif')
# Underline
ax.plot([left_margin, left_margin + 4], [y_pos - 0.1, y_pos - 0.1],
color=accent_color, linewidth=2)
y_pos -= 0.8
# Process remaining Q&As
for i, qa in enumerate(qa_log):
# Check if we have space for this Q&A
if y_pos < bottom_margin + 1.5:
break
# Question
question_text = f"Q{start_index + i}: {qa['question']}"
wrapped_question = textwrap.fill(question_text, width=85)
question_lines = wrapped_question.split('\n')
for line in question_lines:
ax.text(left_margin, y_pos, line,
fontsize=11, fontweight='bold', color=accent_color, fontfamily='sans-serif')
y_pos -= 0.3
y_pos -= 0.1
# Answer
answer_text = qa['answer']
if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
answer_text = "Prefer not to disclose"
wrapped_answer = textwrap.fill(answer_text, width=80)
answer_lines = wrapped_answer.split('\n')
for line in answer_lines[:3]: # Max 3 lines per answer
ax.text(left_margin + 0.3, y_pos, line,
fontsize=10, color=text_color, fontfamily='sans-serif')
y_pos -= 0.25
# Evaluation
eval_color = _get_score_color(qa['score'])
ax.text(left_margin + 0.3, y_pos, f"Evaluation: {qa['score']}",
fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')
y_pos -= 0.2
# Feedback (if available and space permits)
if qa['feedback'] and qa['feedback'] != 'N/A' and y_pos > bottom_margin + 0.8:
feedback_text = f"Feedback: {qa['feedback']}"
wrapped_feedback = textwrap.fill(feedback_text, width=75)
feedback_lines = wrapped_feedback.split('\n')[:2] # Max 2 lines
for line in feedback_lines:
ax.text(left_margin + 0.3, y_pos, line,
fontsize=9, color='#6b7280', fontfamily='sans-serif', style='italic')
y_pos -= 0.2
y_pos -= 0.4
# Add separator line between questions
if i < len(qa_log) - 1 and y_pos > bottom_margin + 1:
ax.plot([left_margin + 0.5, left_margin + content_width - 0.5],
[y_pos + 0.1, y_pos + 0.1],
color='#e5e7eb', linewidth=0.5, linestyle='--')
y_pos -= 0.3
# Save page
pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
plt.close(fig)
__all__ = ['generate_llm_interview_report', 'create_pdf_report']