Spaces:

husseinelsaadi
/

Codingo

Paused

File size: 24,485 Bytes


from __future__ import annotations
import json
from io import BytesIO
import textwrap
from typing import List, Dict, Any, Tuple
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.patches import Rectangle, FancyBboxPatch
from datetime import datetime


def generate_llm_interview_report(application) -> str:
    """Generate a human‑readable interview report for a candidate.
    
    The report includes the candidate's name and email, job details,
    application date, a computed skills match summary and placeholder
    sections for future enhancements.  If server‑side storage of
    question/answer pairs is added later, this function can be updated
    to incorporate those details.
    
    Parameters
    ----------
    application : backend.models.database.Application
        The SQLAlchemy Application instance representing the candidate's
        job application.  Assumed to have related ``job`` and
        ``date_applied`` attributes available.
    
    Returns
    -------
    str
        A multi‑line string containing the report contents.
    """
    # Defensive imports to avoid circular dependencies at import time
    try:
        from datetime import datetime  # noqa: F401
    except Exception:
        pass

    # Extract candidate skills and job skills
    try:
        candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
    except Exception:
        candidate_features = {}

    candidate_skills: List[str] = candidate_features.get('skills', []) or []
    job_skills: List[str] = []
    try:
        job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
    except Exception:
        job_skills = []

    # Compute skills match ratio and label.  Normalise to lower case for
    # comparison and avoid dividing by zero when ``job_skills`` is empty.
    candidate_set = {s.strip().lower() for s in candidate_skills}
    job_set = {s.strip().lower() for s in job_skills}
    common = candidate_set & job_set
    ratio = len(common) / len(job_set) if job_set else 0.0

    if ratio >= 0.75:
        score_label = 'Excellent'
    elif ratio >= 0.5:
        score_label = 'Good'
    elif ratio >= 0.25:
        score_label = 'Medium'
    else:
        score_label = 'Poor'

    # Assemble report lines
    lines: List[str] = []
    lines.append('Interview Report')
    lines.append('=================')
    lines.append('')
    lines.append(f'Candidate Name: {application.name}')
    lines.append(f'Candidate Email: {application.email}')
    if application.job:
        lines.append(f'Job Applied: {application.job.role}')
        lines.append(f'Company: {application.job.company}')
    else:
        lines.append('Job Applied: N/A')
        lines.append('Company: N/A')

    # Format date_applied if available
    try:
        date_str = application.date_applied.strftime('%Y-%m-%d') if application.date_applied else 'N/A'
    except Exception:
        date_str = 'N/A'
    lines.append(f'Date Applied: {date_str}')
    lines.append('')

    lines.append('Skills Match Summary:')
    # Represent required and candidate skills as comma‑separated lists.  Use
    # title‑case for presentation and handle empty lists gracefully.
    formatted_job_skills = ', '.join(job_skills) if job_skills else 'N/A'
    formatted_candidate_skills = ', '.join(candidate_skills) if candidate_skills else 'N/A'
    formatted_common = ', '.join(sorted(common)) if common else 'None'

    lines.append(f'  Required Skills: {formatted_job_skills}')
    lines.append(f'  Candidate Skills: {formatted_candidate_skills}')
    lines.append(f'  Skills in Common: {formatted_common}')
    lines.append(f'  Match Ratio: {ratio * 100:.0f}%')
    lines.append(f'  Score: {score_label}')
    lines.append('')

    lines.append('Interview Transcript & Evaluation:')
    try:
        if application.interview_log:
            try:
                qa_log = json.loads(application.interview_log)
            except Exception:
                qa_log = []
            
            if qa_log:
                for idx, entry in enumerate(qa_log, 1):
                    q = entry.get("question", "N/A")
                    a = entry.get("answer", "N/A")
                    
                    # Handle salary question specifically
                    if "salary" in q.lower() and (a == "0$" or a == "0" or a == "$0"):
                        a = "Prefer not to disclose"
                    
                    eval_score = entry.get("evaluation", {}).get("score", "N/A")
                    eval_feedback = entry.get("evaluation", {}).get("feedback", "N/A")
                    
                    lines.append(f"\nQuestion {idx}: {q}")
                    lines.append(f"Answer: {a}")
                    lines.append(f"Score: {eval_score}")
                    lines.append(f"Feedback: {eval_feedback}")
            else:
                lines.append("No interview log data recorded.")
        else:
            lines.append("No interview log data recorded.")
    except Exception as e:
        lines.append(f"Error loading interview log: {e}")

    return '\n'.join(lines)


def create_pdf_report(report_text: str) -> BytesIO:
    """Convert a formatted report into a clean, professional A4 PDF."""
    buffer = BytesIO()
    
    # A4 dimensions in inches (210mm x 297mm)
    A4_WIDTH = 8.27
    A4_HEIGHT = 11.69
    
    # Margins in inches
    LEFT_MARGIN = 0.75
    RIGHT_MARGIN = 0.75
    TOP_MARGIN = 0.75
    BOTTOM_MARGIN = 0.75
    
    # Calculate content area
    CONTENT_WIDTH = A4_WIDTH - LEFT_MARGIN - RIGHT_MARGIN
    CONTENT_HEIGHT = A4_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN
    
    # Professional color scheme - single accent color
    ACCENT_COLOR = '#1e40af'  # Dark blue
    TEXT_COLOR = '#111827'    # Dark gray/black
    LIGHT_GRAY = '#f8fafc'    # Light background
    BORDER_COLOR = '#e2e8f0'  # Light border
    
    # Parse report data
    report_data = _parse_report_text(report_text)
    
    # Create PDF
    with PdfPages(buffer) as pdf:
        # Page 1: Header, Candidate Info, and Skills Summary
        fig = plt.figure(figsize=(A4_WIDTH, A4_HEIGHT))
        fig.patch.set_facecolor('white')
        
        # Create main axis
        ax = fig.add_subplot(111)
        ax.set_xlim(0, A4_WIDTH)
        ax.set_ylim(0, A4_HEIGHT)
        ax.axis('off')
        
        # Current Y position (start from top)
        y_pos = A4_HEIGHT - TOP_MARGIN
        
        # === HEADER SECTION ===
        # Clean header with company info
        ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW ASSESSMENT REPORT', 
                fontsize=20, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
        
        # Date
        current_date = datetime.now().strftime('%B %d, %Y')
        ax.text(A4_WIDTH - RIGHT_MARGIN, y_pos, current_date,
                fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
                horizontalalignment='right')
        
        y_pos -= 0.8
        
        # === CANDIDATE INFO AND OVERALL SCORE ===
        # Large overall score box (prominent)
        overall_score = _calculate_overall_score(report_data)
        score_color = _get_score_color(overall_score['label'])
        
        # Score box on the right
        score_box_width = 2.5
        score_box_height = 1.8
        score_x = A4_WIDTH - RIGHT_MARGIN - score_box_width
        
        # Score background
        score_rect = FancyBboxPatch(
            (score_x, y_pos - score_box_height), score_box_width, score_box_height,
            boxstyle="round,pad=0.1",
            facecolor=LIGHT_GRAY,
            edgecolor=ACCENT_COLOR,
            linewidth=2
        )
        ax.add_patch(score_rect)
        
        # Large score percentage
        ax.text(score_x + score_box_width/2, y_pos - 0.6, f"{overall_score['percentage']:.0f}%",
                fontsize=32, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif',
                horizontalalignment='center', verticalalignment='center')
        
        # Score label
        ax.text(score_x + score_box_width/2, y_pos - 1.2, 'OVERALL SCORE',
                fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif',
                horizontalalignment='center', fontweight='bold')
        
        ax.text(score_x + score_box_width/2, y_pos - 1.5, overall_score['label'].upper(),
                fontsize=14, fontweight='bold', color=score_color, fontfamily='sans-serif',
                horizontalalignment='center')
        
        # Candidate information on the left
        info_width = CONTENT_WIDTH - score_box_width - 0.5
        
        # Candidate name (large)
        ax.text(LEFT_MARGIN, y_pos - 0.3, report_data['candidate_name'],
                fontsize=18, fontweight='bold', color=TEXT_COLOR, fontfamily='sans-serif')
        
        # Position and company
        ax.text(LEFT_MARGIN, y_pos - 0.7, f"{report_data['job_role']} • {report_data['company']}",
                fontsize=12, color=TEXT_COLOR, fontfamily='sans-serif')
        
        # Email and date
        ax.text(LEFT_MARGIN, y_pos - 1.0, f"Email: {report_data['candidate_email']}",
                fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
        
        ax.text(LEFT_MARGIN, y_pos - 1.3, f"Application Date: {report_data['date_applied']}",
                fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
        
        y_pos -= 2.5
        
        # === SKILLS MATCH SUMMARY ===
        # Section header
        ax.text(LEFT_MARGIN, y_pos, 'SKILLS MATCH SUMMARY',
                fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
        
        # Underline
        ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1], 
                color=ACCENT_COLOR, linewidth=2)
        
        y_pos -= 0.5
        
        skills_data = report_data['skills_match']
        
        # Skills match percentage bar
        bar_width = CONTENT_WIDTH - 1
        bar_height = 0.3
        
        # Background bar
        bg_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), bar_width, bar_height,
                           facecolor=LIGHT_GRAY, edgecolor=BORDER_COLOR)
        ax.add_patch(bg_rect)
        
        # Progress bar
        progress_width = bar_width * (skills_data['ratio'] / 100)
        progress_rect = Rectangle((LEFT_MARGIN + 0.5, y_pos - bar_height), progress_width, bar_height,
                                 facecolor=ACCENT_COLOR, edgecolor='none')
        ax.add_patch(progress_rect)
        
        # Percentage text
        ax.text(LEFT_MARGIN + 0.5 + bar_width/2, y_pos - bar_height/2, 
                f"{skills_data['ratio']:.0f}% Skills Match",
                fontsize=11, fontweight='bold', color='white', fontfamily='sans-serif',
                horizontalalignment='center', verticalalignment='center')
        
        y_pos -= 0.8
        
        # Skills details (simplified)
        required_text = f"Required Skills: {skills_data['required']}"
        for line in textwrap.wrap(required_text, width=85):
            ax.text(LEFT_MARGIN, y_pos, line,
                    fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
            y_pos -= 0.3
        
        y_pos -= 0.3
        
        candidate_text = f"Candidate Skills: {skills_data['candidate']}"
        for line in textwrap.wrap(candidate_text, width=85):
            ax.text(LEFT_MARGIN, y_pos, line,
                    fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
            y_pos -= 0.3

        
        y_pos -= 0.3
        
        matching_text = f"Matching Skills: {skills_data['common']}"
        for line in textwrap.wrap(matching_text, width=85):
            ax.text(LEFT_MARGIN, y_pos, line,
                    fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
            y_pos -= 0.3

        
        y_pos -= 0.8
        
        # === INTERVIEW TRANSCRIPT PREVIEW ===
        if report_data['qa_log']:
            ax.text(LEFT_MARGIN, y_pos, 'INTERVIEW TRANSCRIPT',
                    fontsize=14, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
            
            # Underline
            ax.plot([LEFT_MARGIN, LEFT_MARGIN + 3], [y_pos - 0.1, y_pos - 0.1], 
                    color=ACCENT_COLOR, linewidth=2)
            
            y_pos -= 0.5
            
            # Show up to 3 Q&As on the first page.  The number actually
            # displayed depends on available space.  We track how many
            # questions we render so the remainder can be displayed on
            # subsequent pages without skipping any entries.
            max_qa_on_page1 = min(3, len(report_data['qa_log']))
            qa_count_on_page1 = 0
            for i in range(max_qa_on_page1):
                qa = report_data['qa_log'][i]
                
                # Check if we have space for the next Q&A.  If not, break
                # early.  The 2.2 constant accounts for the approximate
                # vertical space needed for a question, answer, evaluation
                # and some spacing.  If insufficient space remains, we
                # stop adding to this page.
                if y_pos < BOTTOM_MARGIN + 2.2:
                    break
                
                # Question number starts at 1 on the first page
                question_text = f"Q{qa_count_on_page1 + 1}: {qa['question']}"
                for line in textwrap.wrap(question_text, width=85):
                    ax.text(LEFT_MARGIN, y_pos, line,
                            fontsize=11, fontweight='bold', color=ACCENT_COLOR, fontfamily='sans-serif')
                    y_pos -= 0.25
                y_pos -= 0.15  # extra spacing after question block

                # Answer.  Mask salary disclosure if applicable.
                answer_text = qa['answer']
                if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
                    answer_text = "Prefer not to disclose"
                
                wrapped_answer = textwrap.fill(answer_text, width=85)
                answer_lines = wrapped_answer.split('\n')[:2]  # Max 2 lines
                for line in answer_lines:
                    ax.text(LEFT_MARGIN + 0.3, y_pos, line,
                            fontsize=10, color=TEXT_COLOR, fontfamily='sans-serif')
                    y_pos -= 0.25
                
                # Evaluation
                eval_color = _get_score_color(qa['score'])
                ax.text(LEFT_MARGIN + 0.3, y_pos, f"Evaluation: {qa['score']}",
                        fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')
                y_pos -= 0.6

                qa_count_on_page1 += 1
        
        # Save first page
        pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
        plt.close(fig)
        
        # === PAGE 2: REMAINING TRANSCRIPT ===
        # Render the remainder of the Q&A log on additional pages.  Use
        # qa_count_on_page1 (actual number shown on the first page) rather
        # than the theoretical max_qa_on_page1 so that no entries are
        # inadvertently skipped when the first page runs out of space.
        if report_data['qa_log'] and len(report_data['qa_log']) > qa_count_on_page1:
            _create_transcript_page(
                pdf,
                report_data['qa_log'][qa_count_on_page1:],  # Continue from the next unanswered question
                A4_WIDTH, A4_HEIGHT,
                LEFT_MARGIN, RIGHT_MARGIN, TOP_MARGIN, BOTTOM_MARGIN,
                ACCENT_COLOR, TEXT_COLOR,
                start_index=qa_count_on_page1 + 1  # Correct numbering
            )

    
    buffer.seek(0)
    return buffer


def _parse_report_text(report_text: str) -> Dict[str, Any]:
    """Parse the text report into structured data."""
    lines = report_text.split('\n')
    data = {
        'candidate_name': 'N/A',
        'candidate_email': 'N/A',
        'job_role': 'N/A',
        'company': 'N/A',
        'date_applied': 'N/A',
        'skills_match': {
            'required': 'N/A',
            'candidate': 'N/A',
            'common': 'N/A',
            'ratio': 0,
            'score': 'N/A'
        },
        'qa_log': []
    }
    
    current_question = None
    
    for line in lines:
        line = line.strip()
        if line.startswith('Candidate Name:'):
            data['candidate_name'] = line.split(':', 1)[1].strip()
        elif line.startswith('Candidate Email:'):
            data['candidate_email'] = line.split(':', 1)[1].strip()
        elif line.startswith('Job Applied:'):
            data['job_role'] = line.split(':', 1)[1].strip()
        elif line.startswith('Company:'):
            data['company'] = line.split(':', 1)[1].strip()
        elif line.startswith('Date Applied:'):
            data['date_applied'] = line.split(':', 1)[1].strip()
        elif line.startswith('Required Skills:'):
            data['skills_match']['required'] = line.split(':', 1)[1].strip()
        elif line.startswith('Candidate Skills:'):
            data['skills_match']['candidate'] = line.split(':', 1)[1].strip()
        elif line.startswith('Skills in Common:'):
            data['skills_match']['common'] = line.split(':', 1)[1].strip()
        elif line.startswith('Match Ratio:'):
            try:
                data['skills_match']['ratio'] = float(line.split(':')[1].strip().rstrip('%'))
            except:
                data['skills_match']['ratio'] = 0
        elif line.startswith('Score:'):
            # Distinguish between the overall skills match score and per‑question scores.
            # If no question has been started yet (i.e. current_question is None),
            # interpret this Score line as the skills match score.  Otherwise it
            # belongs to the most recent question.
            score_value = line.split(':', 1)[1].strip()
            if current_question is None:
                data['skills_match']['score'] = score_value
            else:
                current_question['score'] = score_value
            continue
        elif line.startswith('Question'):
            if current_question:
                data['qa_log'].append(current_question)
            current_question = {
                'question': line.split(':', 1)[1].strip() if ':' in line else line,
                'answer': '',
                'score': '',
                'feedback': ''
            }
        elif line.startswith('Answer:') and current_question:
            current_question['answer'] = line.split(':', 1)[1].strip()
        elif line.startswith('Feedback:') and current_question:
            current_question['feedback'] = line.split(':', 1)[1].strip()
    
    if current_question:
        data['qa_log'].append(current_question)
    
    return data


def _calculate_overall_score(report_data: Dict[str, Any]) -> Dict[str, Any]:
    """Calculate overall score from skills match and QA scores."""
    # Skills match contributes 40%
    skills_ratio = report_data['skills_match']['ratio'] / 100
    
    # QA scores contribute 60%
    qa_scores = []
    for qa in report_data['qa_log']:
        score_text = qa['score'].lower()
        if 'excellent' in score_text or '5' in score_text or '10' in score_text:
            qa_scores.append(1.0)
        elif 'good' in score_text or '4' in score_text or '8' in score_text or '9' in score_text:
            qa_scores.append(0.8)
        elif 'satisfactory' in score_text or 'medium' in score_text or '3' in score_text or '6' in score_text or '7' in score_text:
            qa_scores.append(0.6)
        elif 'needs improvement' in score_text or 'poor' in score_text or '2' in score_text or '4' in score_text or '5' in score_text:
            qa_scores.append(0.4)
        else:
            qa_scores.append(0.2)
    
    qa_average = sum(qa_scores) / len(qa_scores) if qa_scores else 0.5
    
    # Calculate weighted average
    overall = (skills_ratio * 0.4) + (qa_average * 0.6)
    percentage = overall * 100
    
    if overall >= 0.8:
        label = 'Excellent'
    elif overall >= 0.65:
        label = 'Good'
    elif overall >= 0.45:
        label = 'Satisfactory'
    else:
        label = 'Needs Improvement'
    
    return {'percentage': percentage, 'label': label}


def _get_score_color(score_label: str) -> str:
    """Get color based on score label."""
    score_label = score_label.lower()
    if 'excellent' in score_label:
        return '#059669'  # Green
    elif 'good' in score_label:
        return '#2563eb'  # Blue
    elif 'medium' in score_label or 'satisfactory' in score_label:
        return '#d97706'  # Orange
    else:
        return '#dc2626'  # Red


def _create_transcript_page(pdf, qa_log: List[Dict], page_width: float, page_height: float,
                          left_margin: float, right_margin: float, top_margin: float, bottom_margin: float,
                          accent_color: str, text_color: str, start_index: int = 1):
    """Create a clean page for remaining interview transcript."""
    content_width = page_width - left_margin - right_margin
    
    fig = plt.figure(figsize=(page_width, page_height))
    fig.patch.set_facecolor('white')
    ax = fig.add_subplot(111)
    ax.set_xlim(0, page_width)
    ax.set_ylim(0, page_height)
    ax.axis('off')
    
    # Start from top
    y_pos = page_height - top_margin
    
    # Page header
    ax.text(left_margin, y_pos, 'INTERVIEW TRANSCRIPT (CONTINUED)',
            fontsize=14, fontweight='bold', color=accent_color, fontfamily='sans-serif')
    
    # Underline
    ax.plot([left_margin, left_margin + 4], [y_pos - 0.1, y_pos - 0.1], 
            color=accent_color, linewidth=2)
    
    y_pos -= 0.8
    
    # Process remaining Q&As
    for i, qa in enumerate(qa_log):
        # Check if we have space for this Q&A
        if y_pos < bottom_margin + 1.5:
            break
        
        # Question
        question_text = f"Q{start_index + i}: {qa['question']}"
        wrapped_question = textwrap.fill(question_text, width=85)
        question_lines = wrapped_question.split('\n')
        
        for line in question_lines:
            ax.text(left_margin, y_pos, line,
                    fontsize=11, fontweight='bold', color=accent_color, fontfamily='sans-serif')
            y_pos -= 0.3
        
        y_pos -= 0.1
        
        # Answer
        answer_text = qa['answer']
        if "salary" in qa['question'].lower() and (answer_text == "0$" or answer_text == "0" or answer_text == "$0"):
            answer_text = "Prefer not to disclose"
        
        wrapped_answer = textwrap.fill(answer_text, width=80)
        answer_lines = wrapped_answer.split('\n')
        
        for line in answer_lines[:3]:  # Max 3 lines per answer
            ax.text(left_margin + 0.3, y_pos, line,
                    fontsize=10, color=text_color, fontfamily='sans-serif')
            y_pos -= 0.25
        
        # Evaluation
        eval_color = _get_score_color(qa['score'])
        ax.text(left_margin + 0.3, y_pos, f"Evaluation: {qa['score']}",
                fontsize=10, fontweight='bold', color=eval_color, fontfamily='sans-serif')
        
        y_pos -= 0.2
        
        # Feedback (if available and space permits)
        if qa['feedback'] and qa['feedback'] != 'N/A' and y_pos > bottom_margin + 0.8:
            feedback_text = f"Feedback: {qa['feedback']}"
            wrapped_feedback = textwrap.fill(feedback_text, width=75)
            feedback_lines = wrapped_feedback.split('\n')[:2]  # Max 2 lines
            
            for line in feedback_lines:
                ax.text(left_margin + 0.3, y_pos, line,
                        fontsize=9, color='#6b7280', fontfamily='sans-serif', style='italic')
                y_pos -= 0.2
        
        y_pos -= 0.4
        
        # Add separator line between questions
        if i < len(qa_log) - 1 and y_pos > bottom_margin + 1:
            ax.plot([left_margin + 0.5, left_margin + content_width - 0.5], 
                   [y_pos + 0.1, y_pos + 0.1], 
                   color='#e5e7eb', linewidth=0.5, linestyle='--')
            y_pos -= 0.3
    
    # Save page
    pdf.savefig(fig, bbox_inches='tight', pad_inches=0)
    plt.close(fig)


__all__ = ['generate_llm_interview_report', 'create_pdf_report']