Spaces:

Dhahlan2000
/

AppyJob

Sleeping

File size: 7,446 Bytes

import streamlit as st
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
from PyPDF2 import PdfReader
import docx
import re
from typing import Dict

def parse_cv_sections(text: str) -> Dict[str, str]:
    """Parse CV text into structured sections."""
    sections = {
        'contact': '',
        'education': '',
        'experience': '',
        'skills': '',
        'projects': '',
    }
    
    # Common section headers in CVs
    section_patterns = {
        'contact': r'(?i)(contact|personal\s+information|profile)',
        'education': r'(?i)(education|academic|qualification)',
        'experience': r'(?i)(experience|work|employment|professional)',
        'skills': r'(?i)(skills|technical skills|competencies)',
        'projects': r'(?i)(projects|personal projects)',
    }
    
    # Split text into lines
    lines = text.split('\n')
    current_section = None
    
    for line in lines:
        line = line.strip()
        if not line:
            continue
            
        # Check if line is a section header
        for section, pattern in section_patterns.items():
            if re.search(pattern, line, re.IGNORECASE):
                current_section = section
                break
                
        if current_section and line:
            sections[current_section] += line + '\n'
    
    return sections

def extract_cv_text(file):
    """Extract text from PDF or DOCX CV files."""
    if file is None:
        return "No CV uploaded"
    
    file_ext = os.path.splitext(file.name)[1].lower()
    text = ""
    
    try:
        if file_ext == '.pdf':
            reader = PdfReader(file)
            for page in reader.pages:
                text += page.extract_text()
        
        elif file_ext == '.docx':
            doc = docx.Document(file)
            for paragraph in doc.paragraphs:
                text += paragraph.text + '\n'
        else:
            return "Unsupported file format. Please upload PDF or DOCX files."
        
        # Parse the CV into sections
        sections = parse_cv_sections(text)
        return sections
        
    except Exception as e:
        return f"Error processing file: {str(e)}"

# Replace 'your_huggingface_token' with your actual Hugging Face access token
access_token = os.getenv('API_KEY')

# Initialize the inference client (if needed for other API-based tasks)
client = InferenceClient(token=access_token)

def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
    """Create a detailed prompt for email generation."""
    return f"""Job Description:
{job_description}

Your CV Details:
Experience:
{cv_sections['experience']}

Skills:
{cv_sections['skills']}

Education:
{cv_sections['education']}

Instructions: Write a professional job application email following these guidelines:
1. Start with a proper greeting
2. First paragraph: Express interest in the position and mention how you found it
3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements
4. Third paragraph: Mention specific skills that align with the role
5. Closing paragraph: Express enthusiasm for an interview and provide contact information
6. End with a professional closing

Keep the tone professional, confident, and enthusiastic. Be concise but impactful.

Email:"""

def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
    """Generate a response using the model with streaming output."""
    prompt = create_email_prompt(input_text, cv_sections)
    
    # Use the streaming API
    try:
        for response in client.text_generation(
            model="google/gemma-2b-it",
            prompt=prompt,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.95,
            stream=True
        ):
            # The streaming response returns text directly
            yield response
    except Exception as e:
        st.error(f"Error generating response: {str(e)}")
        yield ""

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    cv_file,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    """Generate a response for a multi-turn chat conversation."""
    # Extract CV text and update system message
    cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided"
    
    updated_system_message = f"""Task: Write a professional job application email.

CV Summary:
{cv_text}

{system_message}"""

    messages = [{"role": "system", "content": updated_system_message}]

    for user_input, assistant_reply in history:
        if user_input:
            messages.append({"role": "user", "content": user_input})
        if assistant_reply:
            messages.append({"role": "assistant", "content": assistant_reply})

    messages.append({"role": "user", "content": message})

    response = ""

    for message_chunk in client.chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message_chunk["choices"][0]["delta"].get("content", "")
        response += token
        yield response

# Streamlit UI section
st.title("AI Job Application Email Generator")

def update_ui(message, cv_file, cv_sections):
    """Handle the UI updates for email generation."""
    # Create placeholder for the generated email
    email_placeholder = st.empty()
    
    # Generate button
    if st.button("Generate Email"):
        if message and cv_file and isinstance(cv_sections, dict):
            email_text = ""
            # Stream the response
            try:
                for chunk in conversation_predict(message, cv_sections):
                    if chunk:
                        email_text += chunk
                        # Update the text area with each chunk
                        email_placeholder.text_area(
                            "Generated Email",
                            value=email_text,
                            height=400,
                            key="email_output"
                        )
            except Exception as e:
                st.error(f"Error during email generation: {str(e)}")
        else:
            st.warning("Please upload a CV and enter a job description.")

# Add tabs for different sections
tab1, tab2 = st.tabs(["Generate Email", "View CV Details"])

with tab1:
    # CV file upload
    cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
    
    if cv_file:
        cv_sections = extract_cv_text(cv_file)
        if isinstance(cv_sections, dict):
            st.success("CV uploaded and parsed successfully!")
        else:
            st.error(cv_sections)
            cv_sections = None
    else:
        cv_sections = None
    
    # Job description input
    st.markdown("### Job Description")
    message = st.text_area("Paste the job description here:", height=200)
    
    # Call the updated UI function with parameters
    update_ui(message, cv_file, cv_sections)

with tab2:
    if cv_file and isinstance(cv_sections, dict):
        st.markdown("### Parsed CV Details")
        for section, content in cv_sections.items():
            with st.expander(f"{section.title()}"):
                st.text(content)
    else:
        st.info("Upload a CV to view parsed details")