Spaces:

Dhahlan2000
/

AppyJob

Sleeping

AppyJob / app.py

Dhahlan2000

Refactor app.py to implement streaming response generation for email creation. Removed tokenizer and model initialization in favor of using the InferenceClient for improved performance and reduced complexity. Updated conversation_predict function to yield streaming output, enhancing user experience with real-time email generation feedback. Added a new update_ui function to manage email display in the Streamlit interface, ensuring a smoother interaction flow.

204d33b 8 months ago

raw

history blame

7.03 kB

	import streamlit as st
	from huggingface_hub import InferenceClient
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import os
	from PyPDF2 import PdfReader
	import docx
	import re
	from typing import Dict

	def parse_cv_sections(text: str) -> Dict[str, str]:
	"""Parse CV text into structured sections."""
	sections = {
	'contact': '',
	'education': '',
	'experience': '',
	'skills': '',
	'projects': '',
	}

	# Common section headers in CVs
	section_patterns = {
	'contact': r'(?i)(contact\|personal\s+information\|profile)',
	'education': r'(?i)(education\|academic\|qualification)',
	'experience': r'(?i)(experience\|work\|employment\|professional)',
	'skills': r'(?i)(skills\|technical skills\|competencies)',
	'projects': r'(?i)(projects\|personal projects)',
	}

	# Split text into lines
	lines = text.split('\n')
	current_section = None

	for line in lines:
	line = line.strip()
	if not line:
	continue

	# Check if line is a section header
	for section, pattern in section_patterns.items():
	if re.search(pattern, line, re.IGNORECASE):
	current_section = section
	break

	if current_section and line:
	sections[current_section] += line + '\n'

	return sections

	def extract_cv_text(file):
	"""Extract text from PDF or DOCX CV files."""
	if file is None:
	return "No CV uploaded"

	file_ext = os.path.splitext(file.name)[1].lower()
	text = ""

	try:
	if file_ext == '.pdf':
	reader = PdfReader(file)
	for page in reader.pages:
	text += page.extract_text()

	elif file_ext == '.docx':
	doc = docx.Document(file)
	for paragraph in doc.paragraphs:
	text += paragraph.text + '\n'
	else:
	return "Unsupported file format. Please upload PDF or DOCX files."

	# Parse the CV into sections
	sections = parse_cv_sections(text)
	return sections

	except Exception as e:
	return f"Error processing file: {str(e)}"

	# Replace 'your_huggingface_token' with your actual Hugging Face access token
	access_token = os.getenv('API_KEY')

	# Initialize the inference client (if needed for other API-based tasks)
	client = InferenceClient(token=access_token)

	def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
	"""Create a detailed prompt for email generation."""
	return f"""Job Description:
	{job_description}

	Your CV Details:
	Experience:
	{cv_sections['experience']}

	Skills:
	{cv_sections['skills']}

	Education:
	{cv_sections['education']}

	Instructions: Write a professional job application email following these guidelines:
	1. Start with a proper greeting
	2. First paragraph: Express interest in the position and mention how you found it
	3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements
	4. Third paragraph: Mention specific skills that align with the role
	5. Closing paragraph: Express enthusiasm for an interview and provide contact information
	6. End with a professional closing

	Keep the tone professional, confident, and enthusiastic. Be concise but impactful.

	Email:"""

	def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
	"""Generate a response using the model with streaming output."""
	prompt = create_email_prompt(input_text, cv_sections)

	# Use the streaming API
	for response in client.text_generation(
	model="google/gemma-2b-it",
	prompt=prompt,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.95,
	stream=True
	):
	if hasattr(response, 'token'): # Handle different response formats
	yield response.token.text
	else:
	yield response.generated_text

	def respond(
	message: str,
	history: list[tuple[str, str]],
	system_message: str,
	cv_file,
	max_tokens: int,
	temperature: float,
	top_p: float,
	):
	"""Generate a response for a multi-turn chat conversation."""
	# Extract CV text and update system message
	cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided"

	updated_system_message = f"""Task: Write a professional job application email.

	CV Summary:
	{cv_text}

	{system_message}"""

	messages = [{"role": "system", "content": updated_system_message}]

	for user_input, assistant_reply in history:
	if user_input:
	messages.append({"role": "user", "content": user_input})
	if assistant_reply:
	messages.append({"role": "assistant", "content": assistant_reply})

	messages.append({"role": "user", "content": message})

	response = ""

	for message_chunk in client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message_chunk["choices"][0]["delta"].get("content", "")
	response += token
	yield response

	# Streamlit UI section
	st.title("AI Job Application Email Generator")

	# Add tabs for different sections
	tab1, tab2 = st.tabs(["Generate Email", "View CV Details"])

	with tab1:
	# CV file upload
	cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])

	if cv_file:
	cv_sections = extract_cv_text(cv_file)
	if isinstance(cv_sections, dict):
	st.success("CV uploaded and parsed successfully!")
	else:
	st.error(cv_sections)

	# Job description input
	st.markdown("### Job Description")
	message = st.text_area("Paste the job description here:", height=200)

	# Call the updated UI function
	update_ui()

	def update_ui():
	# Create placeholder for the generated email
	email_placeholder = st.empty()

	# Generate button
	if st.button("Generate Email"):
	if message and cv_file and isinstance(cv_sections, dict):
	email_text = ""
	# Stream the response
	for chunk in conversation_predict(message, cv_sections):
	if chunk:
	email_text += chunk
	# Update the text area with each chunk
	email_placeholder.text_area(
	"Generated Email",
	value=email_text,
	height=400,
	key="email_output"
	)
	else:
	st.warning("Please upload a CV and enter a job description.")

	with tab2:
	if cv_file and isinstance(cv_sections, dict):
	st.markdown("### Parsed CV Details")
	for section, content in cv_sections.items():
	with st.expander(f"{section.title()}"):
	st.text(content)
	else:
	st.info("Upload a CV to view parsed details")