Dhahlan2000 commited on
Commit
23c19e0
·
1 Parent(s): 3baf333

Enhance app.py with CV parsing functionality and improved email generation. Introduced a new function to parse CV sections from uploaded PDF or DOCX files, allowing structured extraction of contact, education, experience, skills, and projects. Updated the email generation prompt to include detailed guidelines for crafting job application emails based on parsed CV data. Streamlined the Streamlit UI to support CV uploads and display parsed details, improving user interaction and experience.

Browse files
Files changed (1) hide show
  1. app.py +134 -50
app.py CHANGED
@@ -5,6 +5,47 @@ import torch
5
  import os
6
  from PyPDF2 import PdfReader
7
  import docx
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def extract_cv_text(file):
10
  """Extract text from PDF or DOCX CV files."""
@@ -12,23 +53,27 @@ def extract_cv_text(file):
12
  return "No CV uploaded"
13
 
14
  file_ext = os.path.splitext(file.name)[1].lower()
 
15
 
16
- if file_ext == '.pdf':
17
- reader = PdfReader(file)
18
- text = ""
19
- for page in reader.pages:
20
- text += page.extract_text()
21
- return text
22
-
23
- elif file_ext == '.docx':
24
- doc = docx.Document(file)
25
- text = ""
26
- for paragraph in doc.paragraphs:
27
- text += paragraph.text + "\n"
28
- return text
29
-
30
- else:
31
- return "Unsupported file format. Please upload PDF or DOCX files."
 
 
 
32
 
33
  # Replace 'your_huggingface_token' with your actual Hugging Face access token
34
  access_token = os.getenv('API_KEY')
@@ -45,20 +90,48 @@ model.eval() # Set the model to evaluation mode
45
  # Initialize the inference client (if needed for other API-based tasks)
46
  client = InferenceClient(token=access_token)
47
 
48
- def conversation_predict(input_text):
49
- """Generate a response for single-turn input using the model."""
50
- # Tokenize the input text
51
- input_ids = tokenizer(f"""Job Description:
52
- {input_text}
 
 
 
 
 
 
 
 
 
53
 
54
- Instructions: Write a concise and professional email expressing interest in the position.
55
- Highlight relevant experience and skills from the CV that match the job requirements.
56
- Keep the tone professional and enthusiastic.
 
 
 
 
57
 
58
- Email:""", return_tensors="pt").input_ids
 
 
 
 
 
 
 
 
 
59
 
60
  # Generate a response with the model
61
- outputs = model.generate(input_ids, max_new_tokens=2048)
 
 
 
 
 
 
62
 
63
  # Decode and return the generated response
64
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -106,30 +179,41 @@ CV Summary:
106
  response += token
107
  yield response
108
 
109
- # Streamlit UI
110
- st.title("Job Application Email Generator")
111
-
112
- # Instructions text area
113
- system_message = st.text_area("System message",
114
- "Instructions: Write a concise and professional email expressing interest in the position.",
115
- height=150)
116
 
117
- # CV file upload
118
- cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
119
 
120
- # Sliders for max tokens, temperature, and top-p
121
- max_tokens = st.slider("Max new tokens", min_value=1, max_value=2048, value=512, step=1)
122
- temperature = st.slider("Temperature", min_value=0.1, max_value=4.0, value=0.7, step=0.1)
123
- top_p = st.slider("Top-p (nucleus sampling)", min_value=0.1, max_value=1.0, value=0.95, step=0.05)
124
-
125
- # Input message field
126
- message = st.text_input("Job Description", "")
127
-
128
- # Button to generate response
129
- if st.button("Generate Email"):
130
- if message:
131
- response = conversation_predict(message)
132
- st.write("Generated Email:")
133
- st.write(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  else:
135
- st.warning("Please enter a job description.")
 
5
  import os
6
  from PyPDF2 import PdfReader
7
  import docx
8
+ import re
9
+ from typing import Dict
10
+
11
+ def parse_cv_sections(text: str) -> Dict[str, str]:
12
+ """Parse CV text into structured sections."""
13
+ sections = {
14
+ 'contact': '',
15
+ 'education': '',
16
+ 'experience': '',
17
+ 'skills': '',
18
+ 'projects': '',
19
+ }
20
+
21
+ # Common section headers in CVs
22
+ section_patterns = {
23
+ 'contact': r'(?i)(contact|personal\s+information|profile)',
24
+ 'education': r'(?i)(education|academic|qualification)',
25
+ 'experience': r'(?i)(experience|work|employment|professional)',
26
+ 'skills': r'(?i)(skills|technical skills|competencies)',
27
+ 'projects': r'(?i)(projects|personal projects)',
28
+ }
29
+
30
+ # Split text into lines
31
+ lines = text.split('\n')
32
+ current_section = None
33
+
34
+ for line in lines:
35
+ line = line.strip()
36
+ if not line:
37
+ continue
38
+
39
+ # Check if line is a section header
40
+ for section, pattern in section_patterns.items():
41
+ if re.search(pattern, line, re.IGNORECASE):
42
+ current_section = section
43
+ break
44
+
45
+ if current_section and line:
46
+ sections[current_section] += line + '\n'
47
+
48
+ return sections
49
 
50
  def extract_cv_text(file):
51
  """Extract text from PDF or DOCX CV files."""
 
53
  return "No CV uploaded"
54
 
55
  file_ext = os.path.splitext(file.name)[1].lower()
56
+ text = ""
57
 
58
+ try:
59
+ if file_ext == '.pdf':
60
+ reader = PdfReader(file)
61
+ for page in reader.pages:
62
+ text += page.extract_text()
63
+
64
+ elif file_ext == '.docx':
65
+ doc = docx.Document(file)
66
+ for paragraph in doc.paragraphs:
67
+ text += paragraph.text + '\n'
68
+ else:
69
+ return "Unsupported file format. Please upload PDF or DOCX files."
70
+
71
+ # Parse the CV into sections
72
+ sections = parse_cv_sections(text)
73
+ return sections
74
+
75
+ except Exception as e:
76
+ return f"Error processing file: {str(e)}"
77
 
78
  # Replace 'your_huggingface_token' with your actual Hugging Face access token
79
  access_token = os.getenv('API_KEY')
 
90
  # Initialize the inference client (if needed for other API-based tasks)
91
  client = InferenceClient(token=access_token)
92
 
93
+ def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
94
+ """Create a detailed prompt for email generation."""
95
+ return f"""Job Description:
96
+ {job_description}
97
+
98
+ Your CV Details:
99
+ Experience:
100
+ {cv_sections['experience']}
101
+
102
+ Skills:
103
+ {cv_sections['skills']}
104
+
105
+ Education:
106
+ {cv_sections['education']}
107
 
108
+ Instructions: Write a professional job application email following these guidelines:
109
+ 1. Start with a proper greeting
110
+ 2. First paragraph: Express interest in the position and mention how you found it
111
+ 3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements
112
+ 4. Third paragraph: Mention specific skills that align with the role
113
+ 5. Closing paragraph: Express enthusiasm for an interview and provide contact information
114
+ 6. End with a professional closing
115
 
116
+ Keep the tone professional, confident, and enthusiastic. Be concise but impactful.
117
+
118
+ Email:"""
119
+
120
+ def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
121
+ """Generate a response using the model with improved prompting."""
122
+ prompt = create_email_prompt(input_text, cv_sections)
123
+
124
+ # Tokenize the input text
125
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
126
 
127
  # Generate a response with the model
128
+ outputs = model.generate(
129
+ input_ids,
130
+ max_new_tokens=2048,
131
+ temperature=0.7,
132
+ top_p=0.95,
133
+ do_sample=True
134
+ )
135
 
136
  # Decode and return the generated response
137
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
179
  response += token
180
  yield response
181
 
182
+ # Streamlit UI section
183
+ st.title("AI Job Application Email Generator")
 
 
 
 
 
184
 
185
+ # Add tabs for different sections
186
+ tab1, tab2 = st.tabs(["Generate Email", "View CV Details"])
187
 
188
+ with tab1:
189
+ # CV file upload
190
+ cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
191
+
192
+ if cv_file:
193
+ cv_sections = extract_cv_text(cv_file)
194
+ if isinstance(cv_sections, dict):
195
+ st.success("CV uploaded and parsed successfully!")
196
+ else:
197
+ st.error(cv_sections) # Show error message if parsing failed
198
+
199
+ # Job description input
200
+ st.markdown("### Job Description")
201
+ message = st.text_area("Paste the job description here:", height=200)
202
+
203
+ # Generate button
204
+ if st.button("Generate Email"):
205
+ if message and cv_file and isinstance(cv_sections, dict):
206
+ response = conversation_predict(message, cv_sections)
207
+ st.markdown("### Generated Email:")
208
+ st.markdown(response)
209
+ else:
210
+ st.warning("Please upload a CV and enter a job description.")
211
+
212
+ with tab2:
213
+ if cv_file and isinstance(cv_sections, dict):
214
+ st.markdown("### Parsed CV Details")
215
+ for section, content in cv_sections.items():
216
+ with st.expander(f"{section.title()}"):
217
+ st.text(content)
218
  else:
219
+ st.info("Upload a CV to view parsed details")