Dhahlan2000 commited on
Commit
2d88e43
·
1 Parent(s): 7b2b80a

Refactor app.py to simplify CV text extraction and enhance email generation. Removed the parse_cv_sections function, returning the full CV text instead of structured sections. Updated related functions to accommodate this change, improving the overall flow and user experience in the Streamlit interface. This refactor streamlines the process of handling CV uploads and job descriptions, ensuring a more efficient email generation workflow.

Browse files
Files changed (1) hide show
  1. app.py +21 -120
app.py CHANGED
@@ -8,84 +8,6 @@ import docx
8
  import re
9
  from typing import Dict
10
 
11
- def parse_cv_sections(text: str) -> Dict[str, str]:
12
- """Parse CV text into structured sections."""
13
- sections = {
14
- 'contact': '',
15
- 'education': '',
16
- 'experience': '',
17
- 'skills': '',
18
- 'projects': '',
19
- }
20
-
21
- # Common section headers in CVs with more variations
22
- section_patterns = {
23
- 'contact': r'(?i)(contact|personal\s+information|profile|contact\s+details|about\s+me)',
24
- 'education': r'(?i)(education|academic|qualification|academic\s+background|educational\s+background)',
25
- 'experience': r'(?i)(experience|work|employment|professional|work\s+history|career|professional\s+experience)',
26
- 'skills': r'(?i)(skills|technical\s+skills|competencies|expertise|technologies|tools|programming|languages)',
27
- 'projects': r'(?i)(projects|personal\s+projects|portfolio|work\s+samples)',
28
- }
29
-
30
- # Split text into lines and clean
31
- lines = [line.strip() for line in text.split('\n') if line.strip()]
32
- current_section = None
33
- section_content = []
34
-
35
- # First pass: identify sections
36
- for i, line in enumerate(lines):
37
- # Check if line is a section header
38
- for section, pattern in section_patterns.items():
39
- if re.search(pattern, line, re.IGNORECASE):
40
- # If we found a section header
41
- if current_section:
42
- # Save previous section content
43
- sections[current_section] = '\n'.join(section_content)
44
- current_section = section
45
- section_content = []
46
- break
47
- else:
48
- # If line is not a header and we have a current section, add to content
49
- if current_section:
50
- section_content.append(line)
51
-
52
- # Save the last section
53
- if current_section and section_content:
54
- sections[current_section] = '\n'.join(section_content)
55
-
56
- # If no sections were found, try to categorize the content
57
- if all(not content for content in sections.values()):
58
- lines_text = '\n'.join(lines)
59
-
60
- # Look for email addresses and phone numbers for contact
61
- email_pattern = r'[\w\.-]+@[\w\.-]+'
62
- phone_pattern = r'[\+\d]?(\d{2,3}[-\.\s]?){2}\d{4}'
63
-
64
- emails = re.findall(email_pattern, lines_text)
65
- phones = re.findall(phone_pattern, lines_text)
66
- if emails or phones:
67
- sections['contact'] = '\n'.join(emails + phones)
68
-
69
- # Look for education keywords
70
- edu_keywords = r'(?i)(university|college|school|degree|bachelor|master|phd|diploma)'
71
- edu_lines = [l for l in lines if re.search(edu_keywords, l)]
72
- if edu_lines:
73
- sections['education'] = '\n'.join(edu_lines)
74
-
75
- # Look for experience keywords
76
- exp_keywords = r'(?i)(worked|developer|engineer|manager|consultant|analyst)'
77
- exp_lines = [l for l in lines if re.search(exp_keywords, l)]
78
- if exp_lines:
79
- sections['experience'] = '\n'.join(exp_lines)
80
-
81
- # Look for skills
82
- skill_keywords = r'(?i)(python|java|javascript|react|node|sql|aws|docker|kubernetes|git)'
83
- skill_lines = [l for l in lines if re.search(skill_keywords, l)]
84
- if skill_lines:
85
- sections['skills'] = '\n'.join(skill_lines)
86
-
87
- return sections
88
-
89
  def extract_cv_text(file):
90
  """Extract text from PDF or DOCX CV files."""
91
  if file is None:
@@ -98,28 +20,16 @@ def extract_cv_text(file):
98
  if file_ext == '.pdf':
99
  reader = PdfReader(file)
100
  for page in reader.pages:
101
- text += page.extract_text() + "\n"
102
 
103
  elif file_ext == '.docx':
104
  doc = docx.Document(file)
105
  for paragraph in doc.paragraphs:
106
- text += paragraph.text + "\n"
107
- # Also check tables in docx
108
- for table in doc.tables:
109
- for row in table.rows:
110
- for cell in row.cells:
111
- text += cell.text + "\n"
112
  else:
113
  return "Unsupported file format. Please upload PDF or DOCX files."
114
 
115
- # Parse the CV into sections
116
- sections = parse_cv_sections(text)
117
-
118
- # Verify that we have content
119
- if all(not content.strip() for content in sections.values()):
120
- return f"Could not parse CV sections. Raw text:\n{text}"
121
-
122
- return sections
123
 
124
  except Exception as e:
125
  return f"Error processing file: {str(e)}"
@@ -130,20 +40,13 @@ access_token = os.getenv('API_KEY')
130
  # Initialize the inference client (if needed for other API-based tasks)
131
  client = InferenceClient(token=access_token)
132
 
133
- def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
134
  """Create a detailed prompt for email generation."""
135
  return f"""Job Description:
136
  {job_description}
137
 
138
  Your CV Details:
139
- Experience:
140
- {cv_sections['experience']}
141
-
142
- Skills:
143
- {cv_sections['skills']}
144
-
145
- Education:
146
- {cv_sections['education']}
147
 
148
  Instructions: Write a professional job application email following these guidelines:
149
  1. Start with a proper greeting
@@ -157,9 +60,9 @@ Keep the tone professional, confident, and enthusiastic. Be concise but impactfu
157
 
158
  Email:"""
159
 
160
- def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
161
  """Generate a response using the model with streaming output."""
162
- prompt = create_email_prompt(input_text, cv_sections)
163
 
164
  # Use the streaming API
165
  try:
@@ -223,18 +126,18 @@ CV Summary:
223
  # Streamlit UI section
224
  st.title("AI Job Application Email Generator")
225
 
226
- def update_ui(message, cv_file, cv_sections):
227
  """Handle the UI updates for email generation."""
228
  # Create placeholder for the generated email
229
  email_placeholder = st.empty()
230
 
231
  # Generate button
232
  if st.button("Generate Email", key="generate_button"):
233
- if message and cv_file and isinstance(cv_sections, dict):
234
  email_text = ""
235
  # Stream the response
236
  try:
237
- for chunk in conversation_predict(message, cv_sections):
238
  if chunk:
239
  email_text += chunk
240
  # Update the text area with each chunk, using timestamp in key
@@ -256,27 +159,25 @@ with tab1:
256
  cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
257
 
258
  if cv_file:
259
- cv_sections = extract_cv_text(cv_file)
260
- if isinstance(cv_sections, dict):
261
- st.success("CV uploaded and parsed successfully!")
262
  else:
263
- st.error(cv_sections)
264
- cv_sections = None
265
  else:
266
- cv_sections = None
267
 
268
  # Job description input
269
  st.markdown("### Job Description")
270
  message = st.text_area("Paste the job description here:", height=200)
271
 
272
  # Call the updated UI function with parameters
273
- update_ui(message, cv_file, cv_sections)
274
 
275
  with tab2:
276
- if cv_file and isinstance(cv_sections, dict):
277
- st.markdown("### Parsed CV Details")
278
- for section, content in cv_sections.items():
279
- with st.expander(f"{section.title()}"):
280
- st.text(content)
281
  else:
282
- st.info("Upload a CV to view parsed details")
 
8
  import re
9
  from typing import Dict
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def extract_cv_text(file):
12
  """Extract text from PDF or DOCX CV files."""
13
  if file is None:
 
20
  if file_ext == '.pdf':
21
  reader = PdfReader(file)
22
  for page in reader.pages:
23
+ text += page.extract_text()
24
 
25
  elif file_ext == '.docx':
26
  doc = docx.Document(file)
27
  for paragraph in doc.paragraphs:
28
+ text += paragraph.text + '\n'
 
 
 
 
 
29
  else:
30
  return "Unsupported file format. Please upload PDF or DOCX files."
31
 
32
+ return text # Return the full text instead of parsed sections
 
 
 
 
 
 
 
33
 
34
  except Exception as e:
35
  return f"Error processing file: {str(e)}"
 
40
  # Initialize the inference client (if needed for other API-based tasks)
41
  client = InferenceClient(token=access_token)
42
 
43
+ def create_email_prompt(job_description: str, cv_text: str) -> str:
44
  """Create a detailed prompt for email generation."""
45
  return f"""Job Description:
46
  {job_description}
47
 
48
  Your CV Details:
49
+ {cv_text}
 
 
 
 
 
 
 
50
 
51
  Instructions: Write a professional job application email following these guidelines:
52
  1. Start with a proper greeting
 
60
 
61
  Email:"""
62
 
63
+ def conversation_predict(input_text: str, cv_text: str):
64
  """Generate a response using the model with streaming output."""
65
+ prompt = create_email_prompt(input_text, cv_text)
66
 
67
  # Use the streaming API
68
  try:
 
126
  # Streamlit UI section
127
  st.title("AI Job Application Email Generator")
128
 
129
+ def update_ui(message, cv_file, cv_text):
130
  """Handle the UI updates for email generation."""
131
  # Create placeholder for the generated email
132
  email_placeholder = st.empty()
133
 
134
  # Generate button
135
  if st.button("Generate Email", key="generate_button"):
136
+ if message and cv_file and isinstance(cv_text, str) and not cv_text.startswith("Error"):
137
  email_text = ""
138
  # Stream the response
139
  try:
140
+ for chunk in conversation_predict(message, cv_text):
141
  if chunk:
142
  email_text += chunk
143
  # Update the text area with each chunk, using timestamp in key
 
159
  cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
160
 
161
  if cv_file:
162
+ cv_text = extract_cv_text(cv_file)
163
+ if isinstance(cv_text, str) and not cv_text.startswith("Error"):
164
+ st.success("CV uploaded successfully!")
165
  else:
166
+ st.error(cv_text)
167
+ cv_text = None
168
  else:
169
+ cv_text = None
170
 
171
  # Job description input
172
  st.markdown("### Job Description")
173
  message = st.text_area("Paste the job description here:", height=200)
174
 
175
  # Call the updated UI function with parameters
176
+ update_ui(message, cv_file, cv_text)
177
 
178
  with tab2:
179
+ if cv_file and isinstance(cv_text, str) and not cv_text.startswith("Error"):
180
+ st.markdown("### CV Content")
181
+ st.text_area("Full CV Text", value=cv_text, height=400)
 
 
182
  else:
183
+ st.info("Upload a CV to view content")