Spaces:

Dhahlan2000
/

AppyJob

Sleeping

File size: 6,927 Bytes

import streamlit as st
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
from PyPDF2 import PdfReader
import docx
import re
from typing import Dict

def parse_cv_sections(text: str) -> Dict[str, str]:
    """Parse CV text into structured sections."""
    sections = {
        'contact': '',
        'education': '',
        'experience': '',
        'skills': '',
        'projects': '',
    }
    
    # Common section headers in CVs
    section_patterns = {
        'contact': r'(?i)(contact|personal\s+information|profile)',
        'education': r'(?i)(education|academic|qualification)',
        'experience': r'(?i)(experience|work|employment|professional)',
        'skills': r'(?i)(skills|technical skills|competencies)',
        'projects': r'(?i)(projects|personal projects)',
    }
    
    # Split text into lines
    lines = text.split('\n')
    current_section = None
    
    for line in lines:
        line = line.strip()
        if not line:
            continue
            
        # Check if line is a section header
        for section, pattern in section_patterns.items():
            if re.search(pattern, line, re.IGNORECASE):
                current_section = section
                break
                
        if current_section and line:
            sections[current_section] += line + '\n'
    
    return sections

def extract_cv_text(file):
    """Extract text from PDF or DOCX CV files."""
    if file is None:
        return "No CV uploaded"
    
    file_ext = os.path.splitext(file.name)[1].lower()
    text = ""
    
    try:
        if file_ext == '.pdf':
            reader = PdfReader(file)
            for page in reader.pages:
                text += page.extract_text()
        
        elif file_ext == '.docx':
            doc = docx.Document(file)
            for paragraph in doc.paragraphs:
                text += paragraph.text + '\n'
        else:
            return "Unsupported file format. Please upload PDF or DOCX files."
        
        # Parse the CV into sections
        sections = parse_cv_sections(text)
        return sections
        
    except Exception as e:
        return f"Error processing file: {str(e)}"

# Replace 'your_huggingface_token' with your actual Hugging Face access token
access_token = os.getenv('API_KEY')

# Initialize the tokenizer and model with the Hugging Face access token
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", use_auth_token=access_token)
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2b-it",
    torch_dtype=torch.bfloat16,
    use_auth_token=access_token
)
model.eval()  # Set the model to evaluation mode

# Initialize the inference client (if needed for other API-based tasks)
client = InferenceClient(token=access_token)

def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
    """Create a detailed prompt for email generation."""
    return f"""Job Description:
{job_description}

Your CV Details:
Experience:
{cv_sections['experience']}

Skills:
{cv_sections['skills']}

Education:
{cv_sections['education']}

Instructions: Write a professional job application email following these guidelines:
1. Start with a proper greeting
2. First paragraph: Express interest in the position and mention how you found it
3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements
4. Third paragraph: Mention specific skills that align with the role
5. Closing paragraph: Express enthusiasm for an interview and provide contact information
6. End with a professional closing

Keep the tone professional, confident, and enthusiastic. Be concise but impactful.

Email:"""

def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
    """Generate a response using the model with improved prompting."""
    prompt = create_email_prompt(input_text, cv_sections)
    
    # Tokenize the input text
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    # Generate a response with the model
    outputs = model.generate(
        input_ids,
        max_new_tokens=2048,
        temperature=0.7,
        top_p=0.95,
        do_sample=True
    )

    # Decode and return the generated response
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    cv_file,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    """Generate a response for a multi-turn chat conversation."""
    # Extract CV text and update system message
    cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided"
    
    updated_system_message = f"""Task: Write a professional job application email.

CV Summary:
{cv_text}

{system_message}"""

    messages = [{"role": "system", "content": updated_system_message}]

    for user_input, assistant_reply in history:
        if user_input:
            messages.append({"role": "user", "content": user_input})
        if assistant_reply:
            messages.append({"role": "assistant", "content": assistant_reply})

    messages.append({"role": "user", "content": message})

    response = ""

    for message_chunk in client.chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message_chunk["choices"][0]["delta"].get("content", "")
        response += token
        yield response

# Streamlit UI section
st.title("AI Job Application Email Generator")

# Add tabs for different sections
tab1, tab2 = st.tabs(["Generate Email", "View CV Details"])

with tab1:
    # CV file upload
    cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
    
    if cv_file:
        cv_sections = extract_cv_text(cv_file)
        if isinstance(cv_sections, dict):
            st.success("CV uploaded and parsed successfully!")
        else:
            st.error(cv_sections)  # Show error message if parsing failed
    
    # Job description input
    st.markdown("### Job Description")
    message = st.text_area("Paste the job description here:", height=200)
    
    # Generate button
    if st.button("Generate Email"):
        if message and cv_file and isinstance(cv_sections, dict):
            response = conversation_predict(message, cv_sections)
            st.markdown("### Generated Email:")
            st.markdown(response)
        else:
            st.warning("Please upload a CV and enter a job description.")

with tab2:
    if cv_file and isinstance(cv_sections, dict):
        st.markdown("### Parsed CV Details")
        for section, content in cv_sections.items():
            with st.expander(f"{section.title()}"):
                st.text(content)
    else:
        st.info("Upload a CV to view parsed details")