Spaces:
Sleeping
Sleeping
Refactor app.py to streamline CV parsing and email generation. Removed the 'other' section from CV parsing logic, simplifying the handling of miscellaneous content. Updated the email prompt to clarify instructions and enhance professionalism. Increased max_new_tokens for model generation to improve response quality. Improved UI feedback for generated emails in the Streamlit interface.
dc6ab46
import streamlit as st | |
from huggingface_hub import InferenceClient | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import os | |
from PyPDF2 import PdfReader | |
import docx | |
import re | |
from typing import Dict | |
def parse_cv_sections(text: str) -> Dict[str, str]: | |
"""Parse CV text into structured sections.""" | |
sections = { | |
'contact': '', | |
'education': '', | |
'experience': '', | |
'skills': '', | |
'projects': '', | |
} | |
# Common section headers in CVs | |
section_patterns = { | |
'contact': r'(?i)(contact|personal\s+information|profile)', | |
'education': r'(?i)(education|academic|qualification)', | |
'experience': r'(?i)(experience|work|employment|professional)', | |
'skills': r'(?i)(skills|technical skills|competencies)', | |
'projects': r'(?i)(projects|personal projects)', | |
} | |
# Split text into lines | |
lines = text.split('\n') | |
current_section = None | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
# Check if line is a section header | |
for section, pattern in section_patterns.items(): | |
if re.search(pattern, line, re.IGNORECASE): | |
current_section = section | |
break | |
if current_section and line: | |
sections[current_section] += line + '\n' | |
return sections | |
def extract_cv_text(file): | |
"""Extract text from PDF or DOCX CV files.""" | |
if file is None: | |
return "No CV uploaded" | |
file_ext = os.path.splitext(file.name)[1].lower() | |
text = "" | |
try: | |
if file_ext == '.pdf': | |
reader = PdfReader(file) | |
for page in reader.pages: | |
text += page.extract_text() | |
elif file_ext == '.docx': | |
doc = docx.Document(file) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + '\n' | |
else: | |
return "Unsupported file format. Please upload PDF or DOCX files." | |
# Parse the CV into sections | |
sections = parse_cv_sections(text) | |
return sections | |
except Exception as e: | |
return f"Error processing file: {str(e)}" | |
# Replace 'your_huggingface_token' with your actual Hugging Face access token | |
access_token = os.getenv('API_KEY') | |
# Initialize the tokenizer and model with the Hugging Face access token | |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", use_auth_token=access_token) | |
model = AutoModelForCausalLM.from_pretrained( | |
"google/gemma-2b-it", | |
torch_dtype=torch.bfloat16, | |
use_auth_token=access_token | |
) | |
model.eval() # Set the model to evaluation mode | |
# Initialize the inference client (if needed for other API-based tasks) | |
client = InferenceClient(token=access_token) | |
def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str: | |
"""Create a detailed prompt for email generation.""" | |
return f"""Job Description: | |
{job_description} | |
Your CV Details: | |
Experience: | |
{cv_sections['experience']} | |
Skills: | |
{cv_sections['skills']} | |
Education: | |
{cv_sections['education']} | |
Instructions: Write a professional job application email following these guidelines: | |
1. Start with a proper greeting | |
2. First paragraph: Express interest in the position and mention how you found it | |
3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements | |
4. Third paragraph: Mention specific skills that align with the role | |
5. Closing paragraph: Express enthusiasm for an interview and provide contact information | |
6. End with a professional closing | |
Keep the tone professional, confident, and enthusiastic. Be concise but impactful. | |
Email:""" | |
def conversation_predict(input_text: str, cv_sections: Dict[str, str]): | |
"""Generate a response using the model with improved prompting.""" | |
prompt = create_email_prompt(input_text, cv_sections) | |
# Tokenize the input text | |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids | |
# Generate a response with the model | |
outputs = model.generate( | |
input_ids, | |
max_new_tokens=2048, | |
temperature=0.7, | |
top_p=0.95, | |
do_sample=True | |
) | |
# Decode and return the generated response | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
def respond( | |
message: str, | |
history: list[tuple[str, str]], | |
system_message: str, | |
cv_file, | |
max_tokens: int, | |
temperature: float, | |
top_p: float, | |
): | |
"""Generate a response for a multi-turn chat conversation.""" | |
# Extract CV text and update system message | |
cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided" | |
updated_system_message = f"""Task: Write a professional job application email. | |
CV Summary: | |
{cv_text} | |
{system_message}""" | |
messages = [{"role": "system", "content": updated_system_message}] | |
for user_input, assistant_reply in history: | |
if user_input: | |
messages.append({"role": "user", "content": user_input}) | |
if assistant_reply: | |
messages.append({"role": "assistant", "content": assistant_reply}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for message_chunk in client.chat_completion( | |
messages=messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message_chunk["choices"][0]["delta"].get("content", "") | |
response += token | |
yield response | |
# Streamlit UI section | |
st.title("AI Job Application Email Generator") | |
# Add tabs for different sections | |
tab1, tab2 = st.tabs(["Generate Email", "View CV Details"]) | |
with tab1: | |
# CV file upload | |
cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"]) | |
if cv_file: | |
cv_sections = extract_cv_text(cv_file) | |
if isinstance(cv_sections, dict): | |
st.success("CV uploaded and parsed successfully!") | |
else: | |
st.error(cv_sections) # Show error message if parsing failed | |
# Job description input | |
st.markdown("### Job Description") | |
message = st.text_area("Paste the job description here:", height=200) | |
# Generate button | |
if st.button("Generate Email"): | |
if message and cv_file and isinstance(cv_sections, dict): | |
response = conversation_predict(message, cv_sections) | |
st.markdown("### Generated Email:") | |
st.markdown(response) | |
else: | |
st.warning("Please upload a CV and enter a job description.") | |
with tab2: | |
if cv_file and isinstance(cv_sections, dict): | |
st.markdown("### Parsed CV Details") | |
for section, content in cv_sections.items(): | |
with st.expander(f"{section.title()}"): | |
st.text(content) | |
else: | |
st.info("Upload a CV to view parsed details") | |