Spaces:
Sleeping
Sleeping
Refactor app.py to implement streaming response generation for email creation. Removed tokenizer and model initialization in favor of using the InferenceClient for improved performance and reduced complexity. Updated conversation_predict function to yield streaming output, enhancing user experience with real-time email generation feedback. Added a new update_ui function to manage email display in the Streamlit interface, ensuring a smoother interaction flow.
204d33b
import streamlit as st | |
from huggingface_hub import InferenceClient | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import os | |
from PyPDF2 import PdfReader | |
import docx | |
import re | |
from typing import Dict | |
def parse_cv_sections(text: str) -> Dict[str, str]: | |
"""Parse CV text into structured sections.""" | |
sections = { | |
'contact': '', | |
'education': '', | |
'experience': '', | |
'skills': '', | |
'projects': '', | |
} | |
# Common section headers in CVs | |
section_patterns = { | |
'contact': r'(?i)(contact|personal\s+information|profile)', | |
'education': r'(?i)(education|academic|qualification)', | |
'experience': r'(?i)(experience|work|employment|professional)', | |
'skills': r'(?i)(skills|technical skills|competencies)', | |
'projects': r'(?i)(projects|personal projects)', | |
} | |
# Split text into lines | |
lines = text.split('\n') | |
current_section = None | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
# Check if line is a section header | |
for section, pattern in section_patterns.items(): | |
if re.search(pattern, line, re.IGNORECASE): | |
current_section = section | |
break | |
if current_section and line: | |
sections[current_section] += line + '\n' | |
return sections | |
def extract_cv_text(file): | |
"""Extract text from PDF or DOCX CV files.""" | |
if file is None: | |
return "No CV uploaded" | |
file_ext = os.path.splitext(file.name)[1].lower() | |
text = "" | |
try: | |
if file_ext == '.pdf': | |
reader = PdfReader(file) | |
for page in reader.pages: | |
text += page.extract_text() | |
elif file_ext == '.docx': | |
doc = docx.Document(file) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + '\n' | |
else: | |
return "Unsupported file format. Please upload PDF or DOCX files." | |
# Parse the CV into sections | |
sections = parse_cv_sections(text) | |
return sections | |
except Exception as e: | |
return f"Error processing file: {str(e)}" | |
# Replace 'your_huggingface_token' with your actual Hugging Face access token | |
access_token = os.getenv('API_KEY') | |
# Initialize the inference client (if needed for other API-based tasks) | |
client = InferenceClient(token=access_token) | |
def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str: | |
"""Create a detailed prompt for email generation.""" | |
return f"""Job Description: | |
{job_description} | |
Your CV Details: | |
Experience: | |
{cv_sections['experience']} | |
Skills: | |
{cv_sections['skills']} | |
Education: | |
{cv_sections['education']} | |
Instructions: Write a professional job application email following these guidelines: | |
1. Start with a proper greeting | |
2. First paragraph: Express interest in the position and mention how you found it | |
3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements | |
4. Third paragraph: Mention specific skills that align with the role | |
5. Closing paragraph: Express enthusiasm for an interview and provide contact information | |
6. End with a professional closing | |
Keep the tone professional, confident, and enthusiastic. Be concise but impactful. | |
Email:""" | |
def conversation_predict(input_text: str, cv_sections: Dict[str, str]): | |
"""Generate a response using the model with streaming output.""" | |
prompt = create_email_prompt(input_text, cv_sections) | |
# Use the streaming API | |
for response in client.text_generation( | |
model="google/gemma-2b-it", | |
prompt=prompt, | |
max_new_tokens=512, | |
temperature=0.7, | |
top_p=0.95, | |
stream=True | |
): | |
if hasattr(response, 'token'): # Handle different response formats | |
yield response.token.text | |
else: | |
yield response.generated_text | |
def respond( | |
message: str, | |
history: list[tuple[str, str]], | |
system_message: str, | |
cv_file, | |
max_tokens: int, | |
temperature: float, | |
top_p: float, | |
): | |
"""Generate a response for a multi-turn chat conversation.""" | |
# Extract CV text and update system message | |
cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided" | |
updated_system_message = f"""Task: Write a professional job application email. | |
CV Summary: | |
{cv_text} | |
{system_message}""" | |
messages = [{"role": "system", "content": updated_system_message}] | |
for user_input, assistant_reply in history: | |
if user_input: | |
messages.append({"role": "user", "content": user_input}) | |
if assistant_reply: | |
messages.append({"role": "assistant", "content": assistant_reply}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for message_chunk in client.chat_completion( | |
messages=messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message_chunk["choices"][0]["delta"].get("content", "") | |
response += token | |
yield response | |
# Streamlit UI section | |
st.title("AI Job Application Email Generator") | |
# Add tabs for different sections | |
tab1, tab2 = st.tabs(["Generate Email", "View CV Details"]) | |
with tab1: | |
# CV file upload | |
cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"]) | |
if cv_file: | |
cv_sections = extract_cv_text(cv_file) | |
if isinstance(cv_sections, dict): | |
st.success("CV uploaded and parsed successfully!") | |
else: | |
st.error(cv_sections) | |
# Job description input | |
st.markdown("### Job Description") | |
message = st.text_area("Paste the job description here:", height=200) | |
# Call the updated UI function | |
update_ui() | |
def update_ui(): | |
# Create placeholder for the generated email | |
email_placeholder = st.empty() | |
# Generate button | |
if st.button("Generate Email"): | |
if message and cv_file and isinstance(cv_sections, dict): | |
email_text = "" | |
# Stream the response | |
for chunk in conversation_predict(message, cv_sections): | |
if chunk: | |
email_text += chunk | |
# Update the text area with each chunk | |
email_placeholder.text_area( | |
"Generated Email", | |
value=email_text, | |
height=400, | |
key="email_output" | |
) | |
else: | |
st.warning("Please upload a CV and enter a job description.") | |
with tab2: | |
if cv_file and isinstance(cv_sections, dict): | |
st.markdown("### Parsed CV Details") | |
for section, content in cv_sections.items(): | |
with st.expander(f"{section.title()}"): | |
st.text(content) | |
else: | |
st.info("Upload a CV to view parsed details") | |