import streamlit as st from huggingface_hub import InferenceClient from transformers import AutoTokenizer, AutoModelForCausalLM import torch import os from PyPDF2 import PdfReader import docx import re from typing import Dict def parse_cv_sections(text: str) -> Dict[str, str]: """Parse CV text into structured sections.""" sections = { 'contact': '', 'education': '', 'experience': '', 'skills': '', 'projects': '', } # Common section headers in CVs section_patterns = { 'contact': r'(?i)(contact|personal\s+information|profile)', 'education': r'(?i)(education|academic|qualification)', 'experience': r'(?i)(experience|work|employment|professional)', 'skills': r'(?i)(skills|technical skills|competencies)', 'projects': r'(?i)(projects|personal projects)', } # Split text into lines lines = text.split('\n') current_section = None for line in lines: line = line.strip() if not line: continue # Check if line is a section header for section, pattern in section_patterns.items(): if re.search(pattern, line, re.IGNORECASE): current_section = section break if current_section and line: sections[current_section] += line + '\n' return sections def extract_cv_text(file): """Extract text from PDF or DOCX CV files.""" if file is None: return "No CV uploaded" file_ext = os.path.splitext(file.name)[1].lower() text = "" try: if file_ext == '.pdf': reader = PdfReader(file) for page in reader.pages: text += page.extract_text() elif file_ext == '.docx': doc = docx.Document(file) for paragraph in doc.paragraphs: text += paragraph.text + '\n' else: return "Unsupported file format. Please upload PDF or DOCX files." # Parse the CV into sections sections = parse_cv_sections(text) return sections except Exception as e: return f"Error processing file: {str(e)}" # Replace 'your_huggingface_token' with your actual Hugging Face access token access_token = os.getenv('API_KEY') # Initialize the tokenizer and model with the Hugging Face access token tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", use_auth_token=access_token) model = AutoModelForCausalLM.from_pretrained( "google/gemma-2b-it", torch_dtype=torch.bfloat16, use_auth_token=access_token ) model.eval() # Set the model to evaluation mode # Initialize the inference client (if needed for other API-based tasks) client = InferenceClient(token=access_token) def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str: """Create a detailed prompt for email generation.""" return f"""Job Description: {job_description} Your CV Details: Experience: {cv_sections['experience']} Skills: {cv_sections['skills']} Education: {cv_sections['education']} Instructions: Write a professional job application email following these guidelines: 1. Start with a proper greeting 2. First paragraph: Express interest in the position and mention how you found it 3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements 4. Third paragraph: Mention specific skills that align with the role 5. Closing paragraph: Express enthusiasm for an interview and provide contact information 6. End with a professional closing Keep the tone professional, confident, and enthusiastic. Be concise but impactful. Email:""" def conversation_predict(input_text: str, cv_sections: Dict[str, str]): """Generate a response using the model with improved prompting.""" prompt = create_email_prompt(input_text, cv_sections) # Tokenize the input text input_ids = tokenizer(prompt, return_tensors="pt").input_ids # Generate a response with the model outputs = model.generate( input_ids, max_new_tokens=2048, temperature=0.7, top_p=0.95, do_sample=True ) # Decode and return the generated response return tokenizer.decode(outputs[0], skip_special_tokens=True) def respond( message: str, history: list[tuple[str, str]], system_message: str, cv_file, max_tokens: int, temperature: float, top_p: float, ): """Generate a response for a multi-turn chat conversation.""" # Extract CV text and update system message cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided" updated_system_message = f"""Task: Write a professional job application email. CV Summary: {cv_text} {system_message}""" messages = [{"role": "system", "content": updated_system_message}] for user_input, assistant_reply in history: if user_input: messages.append({"role": "user", "content": user_input}) if assistant_reply: messages.append({"role": "assistant", "content": assistant_reply}) messages.append({"role": "user", "content": message}) response = "" for message_chunk in client.chat_completion( messages=messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message_chunk["choices"][0]["delta"].get("content", "") response += token yield response # Streamlit UI section st.title("AI Job Application Email Generator") # Add tabs for different sections tab1, tab2 = st.tabs(["Generate Email", "View CV Details"]) with tab1: # CV file upload cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"]) if cv_file: cv_sections = extract_cv_text(cv_file) if isinstance(cv_sections, dict): st.success("CV uploaded and parsed successfully!") else: st.error(cv_sections) # Show error message if parsing failed # Job description input st.markdown("### Job Description") message = st.text_area("Paste the job description here:", height=200) # Generate button if st.button("Generate Email"): if message and cv_file and isinstance(cv_sections, dict): response = conversation_predict(message, cv_sections) st.markdown("### Generated Email:") st.markdown(response) else: st.warning("Please upload a CV and enter a job description.") with tab2: if cv_file and isinstance(cv_sections, dict): st.markdown("### Parsed CV Details") for section, content in cv_sections.items(): with st.expander(f"{section.title()}"): st.text(content) else: st.info("Upload a CV to view parsed details")