Spaces:
Sleeping
Sleeping
Commit
·
23c19e0
1
Parent(s):
3baf333
Enhance app.py with CV parsing functionality and improved email generation. Introduced a new function to parse CV sections from uploaded PDF or DOCX files, allowing structured extraction of contact, education, experience, skills, and projects. Updated the email generation prompt to include detailed guidelines for crafting job application emails based on parsed CV data. Streamlined the Streamlit UI to support CV uploads and display parsed details, improving user interaction and experience.
Browse files
app.py
CHANGED
@@ -5,6 +5,47 @@ import torch
|
|
5 |
import os
|
6 |
from PyPDF2 import PdfReader
|
7 |
import docx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
def extract_cv_text(file):
|
10 |
"""Extract text from PDF or DOCX CV files."""
|
@@ -12,23 +53,27 @@ def extract_cv_text(file):
|
|
12 |
return "No CV uploaded"
|
13 |
|
14 |
file_ext = os.path.splitext(file.name)[1].lower()
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
return
|
|
|
|
|
|
|
32 |
|
33 |
# Replace 'your_huggingface_token' with your actual Hugging Face access token
|
34 |
access_token = os.getenv('API_KEY')
|
@@ -45,20 +90,48 @@ model.eval() # Set the model to evaluation mode
|
|
45 |
# Initialize the inference client (if needed for other API-based tasks)
|
46 |
client = InferenceClient(token=access_token)
|
47 |
|
48 |
-
def
|
49 |
-
"""
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
Instructions: Write a
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
# Generate a response with the model
|
61 |
-
outputs = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# Decode and return the generated response
|
64 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
@@ -106,30 +179,41 @@ CV Summary:
|
|
106 |
response += token
|
107 |
yield response
|
108 |
|
109 |
-
# Streamlit UI
|
110 |
-
st.title("Job Application Email Generator")
|
111 |
-
|
112 |
-
# Instructions text area
|
113 |
-
system_message = st.text_area("System message",
|
114 |
-
"Instructions: Write a concise and professional email expressing interest in the position.",
|
115 |
-
height=150)
|
116 |
|
117 |
-
#
|
118 |
-
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
else:
|
135 |
-
st.
|
|
|
5 |
import os
|
6 |
from PyPDF2 import PdfReader
|
7 |
import docx
|
8 |
+
import re
|
9 |
+
from typing import Dict
|
10 |
+
|
11 |
+
def parse_cv_sections(text: str) -> Dict[str, str]:
|
12 |
+
"""Parse CV text into structured sections."""
|
13 |
+
sections = {
|
14 |
+
'contact': '',
|
15 |
+
'education': '',
|
16 |
+
'experience': '',
|
17 |
+
'skills': '',
|
18 |
+
'projects': '',
|
19 |
+
}
|
20 |
+
|
21 |
+
# Common section headers in CVs
|
22 |
+
section_patterns = {
|
23 |
+
'contact': r'(?i)(contact|personal\s+information|profile)',
|
24 |
+
'education': r'(?i)(education|academic|qualification)',
|
25 |
+
'experience': r'(?i)(experience|work|employment|professional)',
|
26 |
+
'skills': r'(?i)(skills|technical skills|competencies)',
|
27 |
+
'projects': r'(?i)(projects|personal projects)',
|
28 |
+
}
|
29 |
+
|
30 |
+
# Split text into lines
|
31 |
+
lines = text.split('\n')
|
32 |
+
current_section = None
|
33 |
+
|
34 |
+
for line in lines:
|
35 |
+
line = line.strip()
|
36 |
+
if not line:
|
37 |
+
continue
|
38 |
+
|
39 |
+
# Check if line is a section header
|
40 |
+
for section, pattern in section_patterns.items():
|
41 |
+
if re.search(pattern, line, re.IGNORECASE):
|
42 |
+
current_section = section
|
43 |
+
break
|
44 |
+
|
45 |
+
if current_section and line:
|
46 |
+
sections[current_section] += line + '\n'
|
47 |
+
|
48 |
+
return sections
|
49 |
|
50 |
def extract_cv_text(file):
|
51 |
"""Extract text from PDF or DOCX CV files."""
|
|
|
53 |
return "No CV uploaded"
|
54 |
|
55 |
file_ext = os.path.splitext(file.name)[1].lower()
|
56 |
+
text = ""
|
57 |
|
58 |
+
try:
|
59 |
+
if file_ext == '.pdf':
|
60 |
+
reader = PdfReader(file)
|
61 |
+
for page in reader.pages:
|
62 |
+
text += page.extract_text()
|
63 |
+
|
64 |
+
elif file_ext == '.docx':
|
65 |
+
doc = docx.Document(file)
|
66 |
+
for paragraph in doc.paragraphs:
|
67 |
+
text += paragraph.text + '\n'
|
68 |
+
else:
|
69 |
+
return "Unsupported file format. Please upload PDF or DOCX files."
|
70 |
+
|
71 |
+
# Parse the CV into sections
|
72 |
+
sections = parse_cv_sections(text)
|
73 |
+
return sections
|
74 |
+
|
75 |
+
except Exception as e:
|
76 |
+
return f"Error processing file: {str(e)}"
|
77 |
|
78 |
# Replace 'your_huggingface_token' with your actual Hugging Face access token
|
79 |
access_token = os.getenv('API_KEY')
|
|
|
90 |
# Initialize the inference client (if needed for other API-based tasks)
|
91 |
client = InferenceClient(token=access_token)
|
92 |
|
93 |
+
def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
|
94 |
+
"""Create a detailed prompt for email generation."""
|
95 |
+
return f"""Job Description:
|
96 |
+
{job_description}
|
97 |
+
|
98 |
+
Your CV Details:
|
99 |
+
Experience:
|
100 |
+
{cv_sections['experience']}
|
101 |
+
|
102 |
+
Skills:
|
103 |
+
{cv_sections['skills']}
|
104 |
+
|
105 |
+
Education:
|
106 |
+
{cv_sections['education']}
|
107 |
|
108 |
+
Instructions: Write a professional job application email following these guidelines:
|
109 |
+
1. Start with a proper greeting
|
110 |
+
2. First paragraph: Express interest in the position and mention how you found it
|
111 |
+
3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements
|
112 |
+
4. Third paragraph: Mention specific skills that align with the role
|
113 |
+
5. Closing paragraph: Express enthusiasm for an interview and provide contact information
|
114 |
+
6. End with a professional closing
|
115 |
|
116 |
+
Keep the tone professional, confident, and enthusiastic. Be concise but impactful.
|
117 |
+
|
118 |
+
Email:"""
|
119 |
+
|
120 |
+
def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
|
121 |
+
"""Generate a response using the model with improved prompting."""
|
122 |
+
prompt = create_email_prompt(input_text, cv_sections)
|
123 |
+
|
124 |
+
# Tokenize the input text
|
125 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
126 |
|
127 |
# Generate a response with the model
|
128 |
+
outputs = model.generate(
|
129 |
+
input_ids,
|
130 |
+
max_new_tokens=2048,
|
131 |
+
temperature=0.7,
|
132 |
+
top_p=0.95,
|
133 |
+
do_sample=True
|
134 |
+
)
|
135 |
|
136 |
# Decode and return the generated response
|
137 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
179 |
response += token
|
180 |
yield response
|
181 |
|
182 |
+
# Streamlit UI section
|
183 |
+
st.title("AI Job Application Email Generator")
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
+
# Add tabs for different sections
|
186 |
+
tab1, tab2 = st.tabs(["Generate Email", "View CV Details"])
|
187 |
|
188 |
+
with tab1:
|
189 |
+
# CV file upload
|
190 |
+
cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
|
191 |
+
|
192 |
+
if cv_file:
|
193 |
+
cv_sections = extract_cv_text(cv_file)
|
194 |
+
if isinstance(cv_sections, dict):
|
195 |
+
st.success("CV uploaded and parsed successfully!")
|
196 |
+
else:
|
197 |
+
st.error(cv_sections) # Show error message if parsing failed
|
198 |
+
|
199 |
+
# Job description input
|
200 |
+
st.markdown("### Job Description")
|
201 |
+
message = st.text_area("Paste the job description here:", height=200)
|
202 |
+
|
203 |
+
# Generate button
|
204 |
+
if st.button("Generate Email"):
|
205 |
+
if message and cv_file and isinstance(cv_sections, dict):
|
206 |
+
response = conversation_predict(message, cv_sections)
|
207 |
+
st.markdown("### Generated Email:")
|
208 |
+
st.markdown(response)
|
209 |
+
else:
|
210 |
+
st.warning("Please upload a CV and enter a job description.")
|
211 |
+
|
212 |
+
with tab2:
|
213 |
+
if cv_file and isinstance(cv_sections, dict):
|
214 |
+
st.markdown("### Parsed CV Details")
|
215 |
+
for section, content in cv_sections.items():
|
216 |
+
with st.expander(f"{section.title()}"):
|
217 |
+
st.text(content)
|
218 |
else:
|
219 |
+
st.info("Upload a CV to view parsed details")
|