Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- agent_framework.py +394 -0
- app.py +322 -0
- database.py +19 -0
- email_utils.py +45 -0
- jd_embedding_utils.py +125 -0
- matcher.py +94 -0
- models.py +91 -0
- requirements.txt +14 -0
- resume_embedding_utils.py +167 -0
- schemas.py +16 -0
agent_framework.py
ADDED
@@ -0,0 +1,394 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from datetime import datetime, timedelta
|
3 |
+
import random
|
4 |
+
from typing import List, Dict, Any
|
5 |
+
import json
|
6 |
+
|
7 |
+
# Configure logging
|
8 |
+
logging.basicConfig(level=logging.INFO,
|
9 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
10 |
+
|
11 |
+
class Agent:
|
12 |
+
"""Base class for all agents in the system"""
|
13 |
+
def __init__(self, name: str):
|
14 |
+
self.name = name
|
15 |
+
self.logger = logging.getLogger(f"Agent:{name}")
|
16 |
+
self.logger.info(f"Agent {name} initialized")
|
17 |
+
|
18 |
+
def log_action(self, action: str, details: Any = None):
|
19 |
+
"""Log an action taken by this agent"""
|
20 |
+
self.logger.info(f"Action: {action} - Details: {details}")
|
21 |
+
|
22 |
+
def __str__(self):
|
23 |
+
return f"Agent({self.name})"
|
24 |
+
|
25 |
+
class JDAnalyzerAgent(Agent):
|
26 |
+
"""Agent responsible for analyzing job descriptions"""
|
27 |
+
def __init__(self):
|
28 |
+
super().__init__("JDAnalyzer")
|
29 |
+
|
30 |
+
def analyze_jd(self, jd_text: str) -> Dict:
|
31 |
+
"""Analyze a job description to extract key information"""
|
32 |
+
from jd_embedding_utils import generate_jd_embedding, extract_sections
|
33 |
+
|
34 |
+
self.log_action("Analyzing job description", {"length": len(jd_text)})
|
35 |
+
|
36 |
+
# Extract and generate embeddings
|
37 |
+
title, embedding = generate_jd_embedding(jd_text)
|
38 |
+
sections = extract_sections(jd_text)
|
39 |
+
|
40 |
+
# Generate summary
|
41 |
+
summary = self.generate_summary(sections)
|
42 |
+
|
43 |
+
result = {
|
44 |
+
"title": title,
|
45 |
+
"embedding": embedding,
|
46 |
+
"sections": sections,
|
47 |
+
"summary": summary
|
48 |
+
}
|
49 |
+
|
50 |
+
self.log_action("Analysis complete", {"title": title})
|
51 |
+
return result
|
52 |
+
|
53 |
+
def generate_summary(self, sections: Dict) -> str:
|
54 |
+
"""Generate a human-readable summary of the job description"""
|
55 |
+
title = sections.get("job_title", "Unknown Position")
|
56 |
+
|
57 |
+
# Get responsibilities and qualifications
|
58 |
+
responsibilities = sections.get("responsibilities", [])
|
59 |
+
qualifications = sections.get("qualifications", [])
|
60 |
+
|
61 |
+
# Generate summary text
|
62 |
+
summary = f"Position: {title}\n\n"
|
63 |
+
|
64 |
+
if responsibilities:
|
65 |
+
summary += "Key Responsibilities:\n"
|
66 |
+
# Limit to top 5 responsibilities for brevity
|
67 |
+
for i, resp in enumerate(responsibilities[:5]):
|
68 |
+
summary += f"- {resp}\n"
|
69 |
+
if len(responsibilities) > 5:
|
70 |
+
summary += f"- Plus {len(responsibilities) - 5} more responsibilities\n"
|
71 |
+
|
72 |
+
summary += "\n"
|
73 |
+
|
74 |
+
if qualifications:
|
75 |
+
summary += "Required Qualifications:\n"
|
76 |
+
# Limit to top 5 qualifications for brevity
|
77 |
+
for i, qual in enumerate(qualifications[:5]):
|
78 |
+
summary += f"- {qual}\n"
|
79 |
+
if len(qualifications) > 5:
|
80 |
+
summary += f"- Plus {len(qualifications) - 5} more qualifications\n"
|
81 |
+
|
82 |
+
return summary
|
83 |
+
|
84 |
+
class CVAnalyzerAgent(Agent):
|
85 |
+
"""Agent responsible for analyzing candidate CVs"""
|
86 |
+
def __init__(self):
|
87 |
+
super().__init__("CVAnalyzer")
|
88 |
+
|
89 |
+
def process_cv(self, file_path: str, filename: str) -> Dict:
|
90 |
+
"""Process a CV to extract key information"""
|
91 |
+
from resume_embedding_utils import pdf_to_text, extract_resume_sections, generate_resume_embedding
|
92 |
+
|
93 |
+
self.log_action("Processing CV", {"filename": filename})
|
94 |
+
|
95 |
+
# Extract text from PDF
|
96 |
+
text = pdf_to_text(file_path)
|
97 |
+
|
98 |
+
# Parse CV sections
|
99 |
+
parsed_sections = extract_resume_sections(text)
|
100 |
+
|
101 |
+
# Generate section-specific embeddings
|
102 |
+
section_embeddings = {}
|
103 |
+
for section in ["experience", "education", "skills", "projects", "certifications", "tech_stack"]:
|
104 |
+
if section in parsed_sections and parsed_sections[section]:
|
105 |
+
section_text = " ".join(parsed_sections[section])
|
106 |
+
if section_text.strip():
|
107 |
+
from sentence_transformers import SentenceTransformer
|
108 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
109 |
+
section_embeddings[section] = model.encode(section_text, convert_to_numpy=True)
|
110 |
+
|
111 |
+
# Generate summary
|
112 |
+
summary = self.generate_summary(parsed_sections)
|
113 |
+
|
114 |
+
result = {
|
115 |
+
"parsed": parsed_sections,
|
116 |
+
"embedding": section_embeddings,
|
117 |
+
"text": text,
|
118 |
+
"summary": summary
|
119 |
+
}
|
120 |
+
|
121 |
+
self.log_action("CV processing complete", {
|
122 |
+
"sections_found": list(parsed_sections.keys())
|
123 |
+
})
|
124 |
+
|
125 |
+
return result
|
126 |
+
|
127 |
+
def generate_summary(self, sections: Dict) -> str:
|
128 |
+
"""Generate a human-readable summary of the CV"""
|
129 |
+
name = sections.get("name", ["Unknown Candidate"])[0] if sections.get("name") else "Unknown Candidate"
|
130 |
+
|
131 |
+
# Extract key information
|
132 |
+
skills = sections.get("skills", [])
|
133 |
+
experience = sections.get("experience", [])
|
134 |
+
education = sections.get("education", [])
|
135 |
+
|
136 |
+
# Generate summary text
|
137 |
+
summary = f"Candidate: {name}\n\n"
|
138 |
+
|
139 |
+
if skills:
|
140 |
+
summary += "Key Skills:\n"
|
141 |
+
# Limit to top 5 skills for brevity
|
142 |
+
for i, skill in enumerate(skills[:5]):
|
143 |
+
summary += f"- {skill}\n"
|
144 |
+
if len(skills) > 5:
|
145 |
+
summary += f"- Plus {len(skills) - 5} more skills\n"
|
146 |
+
|
147 |
+
summary += "\n"
|
148 |
+
|
149 |
+
if experience:
|
150 |
+
summary += "Experience:\n"
|
151 |
+
# Limit to top 3 experiences for brevity
|
152 |
+
for i, exp in enumerate(experience[:3]):
|
153 |
+
summary += f"- {exp}\n"
|
154 |
+
if len(experience) > 3:
|
155 |
+
summary += f"- Plus {len(experience) - 3} more experiences\n"
|
156 |
+
|
157 |
+
summary += "\n"
|
158 |
+
|
159 |
+
if education:
|
160 |
+
summary += "Education:\n"
|
161 |
+
# Limit to top 2 education entries for brevity
|
162 |
+
for i, edu in enumerate(education[:2]):
|
163 |
+
summary += f"- {edu}\n"
|
164 |
+
|
165 |
+
return summary
|
166 |
+
|
167 |
+
class MatchingAgent(Agent):
|
168 |
+
"""Agent responsible for matching CVs against job descriptions"""
|
169 |
+
def __init__(self, threshold: float = 0.7):
|
170 |
+
super().__init__("Matcher")
|
171 |
+
self.threshold = threshold
|
172 |
+
|
173 |
+
def match_cvs_to_jd(self, jd_data: Dict, cv_data: Dict[str, Dict]) -> Dict:
|
174 |
+
"""Match multiple CVs against a job description"""
|
175 |
+
from matcher import match_all_resumes
|
176 |
+
|
177 |
+
self.log_action("Starting matching process", {
|
178 |
+
"jd_title": jd_data.get("title", "Unknown"),
|
179 |
+
"cv_count": len(cv_data)
|
180 |
+
})
|
181 |
+
|
182 |
+
jd_title = jd_data.get("title", "Unknown Position")
|
183 |
+
jd_embeddings = jd_data.get("embedding", {})
|
184 |
+
|
185 |
+
# Match each CV against the JD
|
186 |
+
matches = []
|
187 |
+
for filename, resume_data in cv_data.items():
|
188 |
+
parsed = resume_data["parsed"]
|
189 |
+
embedding = resume_data.get("embedding", {})
|
190 |
+
|
191 |
+
# Extract name from parsed CV or use filename
|
192 |
+
name = self._extract_name(parsed, filename)
|
193 |
+
|
194 |
+
from matcher import calculate_match_score
|
195 |
+
score, reasoning = calculate_match_score(jd_embeddings, embedding)
|
196 |
+
|
197 |
+
match_data = {
|
198 |
+
"name": name,
|
199 |
+
"filename": filename,
|
200 |
+
"score": score,
|
201 |
+
"reasoning": reasoning,
|
202 |
+
"isMatch": score >= self.threshold # Use threshold for matching
|
203 |
+
}
|
204 |
+
|
205 |
+
self.log_action("CV matched", {
|
206 |
+
"name": name,
|
207 |
+
"score": score,
|
208 |
+
"is_match": match_data["isMatch"]
|
209 |
+
})
|
210 |
+
|
211 |
+
matches.append(match_data)
|
212 |
+
|
213 |
+
# Sort matches by score in descending order
|
214 |
+
matches.sort(key=lambda x: x["score"], reverse=True)
|
215 |
+
|
216 |
+
result = {"matches": matches}
|
217 |
+
self.log_action("Matching complete", {
|
218 |
+
"total_matches": len(matches),
|
219 |
+
"qualified_matches": sum(1 for m in matches if m["isMatch"])
|
220 |
+
})
|
221 |
+
|
222 |
+
return result
|
223 |
+
|
224 |
+
def _extract_name(self, parsed, fallback):
|
225 |
+
"""Extract name from parsed CV or use fallback"""
|
226 |
+
from pathlib import Path
|
227 |
+
if "name" in parsed and parsed["name"] and len(parsed["name"]) > 0:
|
228 |
+
return parsed["name"][0]
|
229 |
+
return Path(fallback).stem
|
230 |
+
|
231 |
+
class SchedulerAgent(Agent):
|
232 |
+
"""Agent responsible for scheduling interviews with matched candidates"""
|
233 |
+
def __init__(self):
|
234 |
+
super().__init__("Scheduler")
|
235 |
+
|
236 |
+
def generate_interview_slots(self, days_ahead: int = 10, slots_per_day: int = 3) -> List[Dict]:
|
237 |
+
"""Generate available interview time slots for the next N days"""
|
238 |
+
slots = []
|
239 |
+
start_date = datetime.now() + timedelta(days=1) # Start from tomorrow
|
240 |
+
|
241 |
+
for day in range(days_ahead):
|
242 |
+
current_date = start_date + timedelta(days=day)
|
243 |
+
|
244 |
+
# Skip weekends
|
245 |
+
if current_date.weekday() >= 5: # 5 = Saturday, 6 = Sunday
|
246 |
+
continue
|
247 |
+
|
248 |
+
# Generate time slots for this day
|
249 |
+
possible_hours = [9, 10, 11, 13, 14, 15, 16] # 9 AM to 5 PM with lunch break
|
250 |
+
selected_hours = random.sample(possible_hours, min(slots_per_day, len(possible_hours)))
|
251 |
+
selected_hours.sort()
|
252 |
+
|
253 |
+
for hour in selected_hours:
|
254 |
+
slot_time = current_date.replace(hour=hour, minute=0, second=0, microsecond=0)
|
255 |
+
slots.append({
|
256 |
+
"date": slot_time.strftime("%Y-%m-%d"),
|
257 |
+
"time": slot_time.strftime("%H:%M"),
|
258 |
+
"datetime": slot_time,
|
259 |
+
"formatted": slot_time.strftime("%A, %B %d at %I:%M %p")
|
260 |
+
})
|
261 |
+
|
262 |
+
return slots
|
263 |
+
|
264 |
+
def prepare_email_for_candidate(self, candidate: Dict, job_title: str) -> Dict:
|
265 |
+
"""Prepare an email for a shortlisted candidate with interview slots"""
|
266 |
+
self.log_action("Preparing email", {"candidate": candidate["name"]})
|
267 |
+
|
268 |
+
# Generate interview slots
|
269 |
+
interview_slots = self.generate_interview_slots(days_ahead=7, slots_per_day=2)
|
270 |
+
|
271 |
+
# Format the email content
|
272 |
+
candidate_name = candidate["name"]
|
273 |
+
|
274 |
+
# Create the email content with interview slots
|
275 |
+
subject = f"Interview Invitation: {job_title} Position"
|
276 |
+
|
277 |
+
body = f"""Dear {candidate_name},
|
278 |
+
|
279 |
+
We are pleased to inform you that your profile has been shortlisted for the {job_title} position. Your qualifications and experience align well with what we're looking for.
|
280 |
+
|
281 |
+
We would like to invite you for an interview. Please select one of the following time slots that works best for you:
|
282 |
+
|
283 |
+
"""
|
284 |
+
|
285 |
+
# Add the first 3 available slots
|
286 |
+
for i, slot in enumerate(interview_slots[:3]):
|
287 |
+
body += f"Option {i+1}: {slot['formatted']}\n"
|
288 |
+
|
289 |
+
body += f"""
|
290 |
+
Please reply to this email with your preferred time slot, or suggest an alternative if none of these work for you.
|
291 |
+
|
292 |
+
The interview will be conducted via video call, and the details will be sent once you confirm your availability.
|
293 |
+
|
294 |
+
We look forward to speaking with you!
|
295 |
+
|
296 |
+
Best regards,
|
297 |
+
Recruitment Team"""
|
298 |
+
|
299 |
+
return {
|
300 |
+
"to": candidate_name,
|
301 |
+
"email": self._generate_email_address(candidate_name),
|
302 |
+
"subject": subject,
|
303 |
+
"body": body,
|
304 |
+
"slots": interview_slots[:3]
|
305 |
+
}
|
306 |
+
|
307 |
+
def _generate_email_address(self, name: str) -> str:
|
308 |
+
"""Generate a placeholder email address from a name"""
|
309 |
+
# Convert to lowercase, replace spaces with dots, add domain
|
310 |
+
email = name.lower().replace(" ", ".")
|
311 |
+
return f"{email}@example.com"
|
312 |
+
|
313 |
+
def send_interview_email(self, email_data: Dict) -> Dict:
|
314 |
+
"""Send an interview invitation email to a candidate"""
|
315 |
+
from email_utils import send_email
|
316 |
+
|
317 |
+
self.log_action("Sending interview email", {
|
318 |
+
"to": email_data["to"],
|
319 |
+
"email": email_data["email"]
|
320 |
+
})
|
321 |
+
|
322 |
+
# Call the email utility to send the email
|
323 |
+
result = send_email(
|
324 |
+
to_email=email_data["email"],
|
325 |
+
subject=email_data["subject"],
|
326 |
+
body=email_data["body"].replace("\n", "<br>")
|
327 |
+
)
|
328 |
+
|
329 |
+
self.log_action("Email sent", {"success": result["success"]})
|
330 |
+
return result
|
331 |
+
|
332 |
+
class AgentCoordinator:
|
333 |
+
"""Coordinates the activities of all agents in the system"""
|
334 |
+
def __init__(self):
|
335 |
+
self.jd_agent = JDAnalyzerAgent()
|
336 |
+
self.cv_agent = CVAnalyzerAgent()
|
337 |
+
self.matching_agent = MatchingAgent()
|
338 |
+
self.scheduler_agent = SchedulerAgent()
|
339 |
+
self.logger = logging.getLogger("AgentCoordinator")
|
340 |
+
|
341 |
+
def process_job_description(self, jd_text: str) -> Dict:
|
342 |
+
"""Process a job description using the JD agent"""
|
343 |
+
self.logger.info("Starting job description processing")
|
344 |
+
return self.jd_agent.analyze_jd(jd_text)
|
345 |
+
|
346 |
+
def process_resumes(self, file_paths: List[tuple]) -> Dict[str, Dict]:
|
347 |
+
"""Process multiple resumes using the CV agent"""
|
348 |
+
self.logger.info(f"Starting resume processing for {len(file_paths)} files")
|
349 |
+
|
350 |
+
results = {}
|
351 |
+
for filename, file_path in file_paths:
|
352 |
+
results[filename] = self.cv_agent.process_cv(file_path, filename)
|
353 |
+
|
354 |
+
return results
|
355 |
+
|
356 |
+
def match_candidates(self, jd_data: Dict, cv_data: Dict[str, Dict]) -> Dict:
|
357 |
+
"""Match candidates with the job description"""
|
358 |
+
self.logger.info("Starting candidate matching")
|
359 |
+
return self.matching_agent.match_cvs_to_jd(jd_data, cv_data)
|
360 |
+
|
361 |
+
def schedule_interviews(self, matches: List[Dict], job_title: str) -> List[Dict]:
|
362 |
+
"""Schedule interviews for matched candidates"""
|
363 |
+
self.logger.info(f"Scheduling interviews for {len(matches)} candidates")
|
364 |
+
|
365 |
+
email_data = []
|
366 |
+
for candidate in matches:
|
367 |
+
if candidate["isMatch"]:
|
368 |
+
email_info = self.scheduler_agent.prepare_email_for_candidate(candidate, job_title)
|
369 |
+
email_data.append(email_info)
|
370 |
+
|
371 |
+
return email_data
|
372 |
+
|
373 |
+
def execute_full_workflow(self, jd_text: str, resume_files: List[tuple]) -> Dict:
|
374 |
+
"""Execute the complete workflow from JD analysis to interview scheduling"""
|
375 |
+
self.logger.info("Starting full recruitment workflow")
|
376 |
+
|
377 |
+
# Step 1: Process the job description
|
378 |
+
jd_result = self.process_job_description(jd_text)
|
379 |
+
|
380 |
+
# Step 2: Process all resumes
|
381 |
+
resume_results = self.process_resumes(resume_files)
|
382 |
+
|
383 |
+
# Step 3: Match candidates with the job
|
384 |
+
match_results = self.match_candidates(jd_result, resume_results)
|
385 |
+
|
386 |
+
# Step 4: Schedule interviews for matched candidates
|
387 |
+
email_data = self.schedule_interviews(match_results["matches"], jd_result["title"])
|
388 |
+
|
389 |
+
return {
|
390 |
+
"jd": jd_result,
|
391 |
+
"resumes": resume_results,
|
392 |
+
"matches": match_results,
|
393 |
+
"emails": email_data
|
394 |
+
}
|
app.py
ADDED
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi.responses import JSONResponse
|
4 |
+
from pydantic import BaseModel, EmailStr
|
5 |
+
import tempfile
|
6 |
+
import os
|
7 |
+
import shutil
|
8 |
+
from typing import List, Dict, Any
|
9 |
+
import json
|
10 |
+
import numpy as np
|
11 |
+
from pathlib import Path
|
12 |
+
import asyncio
|
13 |
+
from sentence_transformers import SentenceTransformer
|
14 |
+
import sqlite3
|
15 |
+
|
16 |
+
from jd_embedding_utils import generate_jd_embedding, extract_sections
|
17 |
+
from resume_embedding_utils import pdf_to_text, extract_resume_sections, generate_resume_embedding
|
18 |
+
from matcher import calculate_match_score, match_all_resumes
|
19 |
+
from email_utils import send_email
|
20 |
+
from agent_framework import AgentCoordinator
|
21 |
+
|
22 |
+
app = FastAPI()
|
23 |
+
|
24 |
+
# Add CORS middleware
|
25 |
+
app.add_middleware(
|
26 |
+
CORSMiddleware,
|
27 |
+
allow_origins=["*"],
|
28 |
+
allow_credentials=True,
|
29 |
+
allow_methods=["*"],
|
30 |
+
allow_headers=["*"],
|
31 |
+
)
|
32 |
+
|
33 |
+
# Initialize SQLite database
|
34 |
+
def init_db():
|
35 |
+
conn = sqlite3.connect("recruitly.db")
|
36 |
+
cursor = conn.cursor()
|
37 |
+
cursor.execute("""
|
38 |
+
CREATE TABLE IF NOT EXISTS job_descriptions (
|
39 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
40 |
+
title TEXT,
|
41 |
+
embedding TEXT,
|
42 |
+
sections TEXT,
|
43 |
+
summary TEXT
|
44 |
+
)
|
45 |
+
""")
|
46 |
+
cursor.execute("""
|
47 |
+
CREATE TABLE IF NOT EXISTS resumes (
|
48 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
49 |
+
filename TEXT,
|
50 |
+
embedding TEXT,
|
51 |
+
parsed TEXT,
|
52 |
+
summary TEXT
|
53 |
+
)
|
54 |
+
""")
|
55 |
+
cursor.execute("""
|
56 |
+
CREATE TABLE IF NOT EXISTS matches (
|
57 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
58 |
+
resume_id INTEGER,
|
59 |
+
jd_id INTEGER,
|
60 |
+
score REAL,
|
61 |
+
reasoning TEXT,
|
62 |
+
FOREIGN KEY (resume_id) REFERENCES resumes (id),
|
63 |
+
FOREIGN KEY (jd_id) REFERENCES job_descriptions (id)
|
64 |
+
)
|
65 |
+
""")
|
66 |
+
conn.commit()
|
67 |
+
conn.close()
|
68 |
+
|
69 |
+
# Call init_db on startup
|
70 |
+
init_db()
|
71 |
+
|
72 |
+
# Classes for request/response models
|
73 |
+
class JDRequest(BaseModel):
|
74 |
+
text: str
|
75 |
+
|
76 |
+
class MatchRequest(BaseModel):
|
77 |
+
jd_sections: Dict[str, List[str]]
|
78 |
+
resume_data: Dict[str, Dict[str, Any]]
|
79 |
+
|
80 |
+
class EmailRequest(BaseModel):
|
81 |
+
email: str
|
82 |
+
name: str
|
83 |
+
subject: str
|
84 |
+
body: str
|
85 |
+
|
86 |
+
class ScheduleRequest(BaseModel):
|
87 |
+
candidate_id: str
|
88 |
+
name: str
|
89 |
+
email: str
|
90 |
+
|
91 |
+
class NumpyEncoder(json.JSONEncoder):
|
92 |
+
def default(self, obj):
|
93 |
+
if isinstance(obj, np.ndarray):
|
94 |
+
return obj.tolist()
|
95 |
+
return json.JSONEncoder.default(self, obj)
|
96 |
+
|
97 |
+
# Store processed JD and resumes in memory for matching
|
98 |
+
current_session = {
|
99 |
+
"jd": None,
|
100 |
+
"resumes": {},
|
101 |
+
"agent_coordinator": AgentCoordinator()
|
102 |
+
}
|
103 |
+
|
104 |
+
# Create a single model instance for reuse
|
105 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
106 |
+
|
107 |
+
@app.post("/embed")
|
108 |
+
def get_embedding(request: JDRequest):
|
109 |
+
"""Process a job description and generate its embedding"""
|
110 |
+
coordinator = current_session["agent_coordinator"]
|
111 |
+
result = coordinator.process_job_description(request.text)
|
112 |
+
|
113 |
+
# Store in current session
|
114 |
+
current_session["jd"] = result
|
115 |
+
|
116 |
+
# Convert embedding dictionary properly for JSON response
|
117 |
+
serializable_embedding = json.loads(
|
118 |
+
json.dumps(result["embedding"], cls=NumpyEncoder)
|
119 |
+
)
|
120 |
+
|
121 |
+
response_data = {
|
122 |
+
"title": result["title"],
|
123 |
+
"embedding": serializable_embedding,
|
124 |
+
"sections": result["sections"],
|
125 |
+
"summary": result.get("summary", "")
|
126 |
+
}
|
127 |
+
|
128 |
+
return response_data
|
129 |
+
|
130 |
+
@app.post("/upload-resumes")
|
131 |
+
async def upload_resumes(files: List[UploadFile] = File(...)):
|
132 |
+
"""Process multiple resume PDFs and generate embeddings for each"""
|
133 |
+
if not files:
|
134 |
+
raise HTTPException(status_code=400, detail="No files provided")
|
135 |
+
|
136 |
+
# Create temp directory for saving uploaded files
|
137 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
138 |
+
resume_results = {}
|
139 |
+
|
140 |
+
# First save all files to disk to avoid keeping file handles open too long
|
141 |
+
file_paths = []
|
142 |
+
for file in files:
|
143 |
+
file_path = os.path.join(temp_dir, file.filename)
|
144 |
+
with open(file_path, "wb") as buffer:
|
145 |
+
shutil.copyfileobj(file.file, buffer)
|
146 |
+
file_paths.append((file.filename, file_path))
|
147 |
+
|
148 |
+
# Process files in batches to avoid memory issues
|
149 |
+
batch_size = 3
|
150 |
+
for i in range(0, len(file_paths), batch_size):
|
151 |
+
batch = file_paths[i:i+batch_size]
|
152 |
+
batch_tasks = []
|
153 |
+
|
154 |
+
for filename, file_path in batch:
|
155 |
+
batch_tasks.append(process_resume(filename, file_path))
|
156 |
+
|
157 |
+
# Process each batch concurrently
|
158 |
+
batch_results = await asyncio.gather(*batch_tasks)
|
159 |
+
|
160 |
+
# Combine results
|
161 |
+
for filename, result in batch_results:
|
162 |
+
resume_results[filename] = result
|
163 |
+
# Add to current session
|
164 |
+
if "error" not in result:
|
165 |
+
current_session["resumes"][filename] = result
|
166 |
+
|
167 |
+
# Convert NumPy arrays to lists for JSON response
|
168 |
+
serializable_results = json.loads(
|
169 |
+
json.dumps(resume_results, cls=NumpyEncoder)
|
170 |
+
)
|
171 |
+
|
172 |
+
return JSONResponse(content=serializable_results)
|
173 |
+
|
174 |
+
async def process_resume(filename, file_path):
|
175 |
+
"""Process a single resume PDF file"""
|
176 |
+
try:
|
177 |
+
coordinator = current_session["agent_coordinator"]
|
178 |
+
result = coordinator.cv_agent.process_cv(file_path, filename)
|
179 |
+
return filename, result
|
180 |
+
|
181 |
+
except Exception as e:
|
182 |
+
print(f"Error processing {filename}: {str(e)}")
|
183 |
+
return filename, {"error": str(e)}
|
184 |
+
|
185 |
+
@app.post("/match")
|
186 |
+
def match_resumes():
|
187 |
+
"""Match the current JD with all processed resumes"""
|
188 |
+
jd = current_session["jd"]
|
189 |
+
resumes = current_session["resumes"]
|
190 |
+
|
191 |
+
if not jd or not resumes:
|
192 |
+
raise HTTPException(status_code=400, detail="Job description or resumes missing")
|
193 |
+
|
194 |
+
jd_title = jd["title"]
|
195 |
+
jd_embeddings = jd["embedding"]
|
196 |
+
|
197 |
+
# Match all resumes
|
198 |
+
all_candidates = match_all_resumes(jd_title, jd_embeddings, resumes, threshold=0.8)
|
199 |
+
|
200 |
+
# Save all candidates to the database
|
201 |
+
conn = sqlite3.connect("recruitly.db")
|
202 |
+
cursor = conn.cursor()
|
203 |
+
for candidate in all_candidates:
|
204 |
+
cursor.execute("""
|
205 |
+
INSERT INTO matches (resume_id, jd_id, score, reasoning)
|
206 |
+
VALUES (?, ?, ?, ?)
|
207 |
+
""", (candidate.get("resume_id"), jd.get("id"), candidate["score"], json.dumps(candidate["reasoning"])))
|
208 |
+
conn.commit()
|
209 |
+
conn.close()
|
210 |
+
|
211 |
+
# Include all candidates in the response
|
212 |
+
return {"candidates": all_candidates}
|
213 |
+
|
214 |
+
@app.post("/generate-interview-slots")
|
215 |
+
def generate_interview_slots():
|
216 |
+
"""Generate potential interview time slots"""
|
217 |
+
if not current_session["agent_coordinator"]:
|
218 |
+
raise HTTPException(status_code=400, detail="Agent coordinator not initialized")
|
219 |
+
|
220 |
+
slots = current_session["agent_coordinator"].scheduler_agent.generate_interview_slots()
|
221 |
+
|
222 |
+
return {"slots": slots}
|
223 |
+
|
224 |
+
@app.post("/prepare-interview-email/{candidate_id}")
|
225 |
+
def prepare_interview_email(candidate_id: str):
|
226 |
+
"""Prepare an interview email for a specific candidate"""
|
227 |
+
if not current_session["jd"]:
|
228 |
+
raise HTTPException(status_code=400, detail="No job description processed")
|
229 |
+
|
230 |
+
# Find the candidate in the matches
|
231 |
+
matched_candidates = []
|
232 |
+
if "matches" in current_session:
|
233 |
+
matched_candidates = current_session["matches"]["matches"]
|
234 |
+
|
235 |
+
candidate = None
|
236 |
+
for match in matched_candidates:
|
237 |
+
if match["name"] == candidate_id or str(match.get("id", "")) == candidate_id:
|
238 |
+
candidate = match
|
239 |
+
break
|
240 |
+
|
241 |
+
if not candidate:
|
242 |
+
raise HTTPException(status_code=404, detail=f"Candidate {candidate_id} not found")
|
243 |
+
|
244 |
+
# Generate email content
|
245 |
+
email_data = current_session["agent_coordinator"].scheduler_agent.prepare_email_for_candidate(
|
246 |
+
candidate,
|
247 |
+
current_session["jd"]["title"]
|
248 |
+
)
|
249 |
+
|
250 |
+
return email_data
|
251 |
+
|
252 |
+
@app.post("/send-email")
|
253 |
+
def send_candidate_email(request: EmailRequest):
|
254 |
+
"""Send an email to a candidate"""
|
255 |
+
try:
|
256 |
+
result = send_email(
|
257 |
+
to_email=request.email,
|
258 |
+
subject=request.subject,
|
259 |
+
body=request.body
|
260 |
+
)
|
261 |
+
|
262 |
+
if result["success"]:
|
263 |
+
return {"success": True, "message": f"Email sent to {request.name}"}
|
264 |
+
else:
|
265 |
+
raise HTTPException(status_code=500, detail=result["message"])
|
266 |
+
except Exception as e:
|
267 |
+
raise HTTPException(status_code=500, detail=str(e))
|
268 |
+
|
269 |
+
@app.get("/suggest-interview-times/{candidate_id}")
|
270 |
+
def suggest_interview_times(candidate_id: str):
|
271 |
+
"""Suggest available interview time slots for a candidate"""
|
272 |
+
coordinator = current_session["agent_coordinator"]
|
273 |
+
slots = coordinator.scheduler_agent.generate_interview_slots(days_ahead=7, slots_per_day=3)
|
274 |
+
|
275 |
+
return {"candidate_id": candidate_id, "slots": slots}
|
276 |
+
|
277 |
+
# Helper function to extract name from parsed resume
|
278 |
+
def _extract_name(parsed, fallback):
|
279 |
+
if "name" in parsed and parsed["name"] and len(parsed["name"]) > 0:
|
280 |
+
return parsed["name"][0]
|
281 |
+
return Path(fallback).stem
|
282 |
+
|
283 |
+
@app.get("/clear-session")
|
284 |
+
def clear_session():
|
285 |
+
"""Clear the current session data"""
|
286 |
+
current_session["jd"] = None
|
287 |
+
current_session["resumes"] = {}
|
288 |
+
return {"message": "Session cleared"}
|
289 |
+
|
290 |
+
@app.get("/test-match")
|
291 |
+
def test_match():
|
292 |
+
"""Test endpoint to diagnose matching issues"""
|
293 |
+
test_jd = """We are seeking an innovative and strategic Product Manager to lead the development and execution of new products. The ideal candidate will collaborate with cross-functional teams to define product roadmaps, analyze market trends, and ensure successful product launches. Responsibilities: Define product vision and strategy based on market research and customer needs. Work closely with engineering, design, and marketing teams to develop and launch products. Prioritize features, create roadmaps, and manage product lifecycle. Analyze user feedback and data to optimize product performance. Ensure alignment between business goals and product development. Qualifications: Bachelor's degree in Business, Computer Science, or a related field. Experience in product management, agile methodologies, and market research. Strong analytical, leadership, and communication skills. Familiarity with project management tools and data-driven decision-making."""
|
294 |
+
|
295 |
+
# Process the test JD
|
296 |
+
title, embedding = generate_jd_embedding(test_jd)
|
297 |
+
sections = extract_sections(test_jd)
|
298 |
+
|
299 |
+
# Create a simple test resume with matching sections
|
300 |
+
test_resume = {
|
301 |
+
"skills": ["Product management", "Agile methodologies", "Leadership"],
|
302 |
+
"experience": ["5 years experience in product management", "Led cross-functional teams"],
|
303 |
+
"education": ["Bachelor's degree in Computer Science"],
|
304 |
+
"qualifications": ["Strong analytical skills", "Communication skills"]
|
305 |
+
}
|
306 |
+
|
307 |
+
# Run the matcher with debug output
|
308 |
+
score, reasoning = calculate_match_score(sections, test_resume)
|
309 |
+
|
310 |
+
return {
|
311 |
+
"jd_sections": sections,
|
312 |
+
"resume_sections": test_resume,
|
313 |
+
"score": score,
|
314 |
+
"reasoning": reasoning
|
315 |
+
}
|
316 |
+
|
317 |
+
# Run the app locally
|
318 |
+
if __name__ == "__main__":
|
319 |
+
import uvicorn
|
320 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
321 |
+
|
322 |
+
|
database.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy import create_engine
|
2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
3 |
+
from sqlalchemy.orm import sessionmaker
|
4 |
+
|
5 |
+
SQLALCHEMY_DATABASE_URL = "sqlite:///./application_db.sqlite"
|
6 |
+
|
7 |
+
engine = create_engine(
|
8 |
+
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
|
9 |
+
)
|
10 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
11 |
+
|
12 |
+
Base = declarative_base()
|
13 |
+
|
14 |
+
def get_db():
|
15 |
+
db = SessionLocal()
|
16 |
+
try:
|
17 |
+
yield db
|
18 |
+
finally:
|
19 |
+
db.close()
|
email_utils.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import smtplib
|
3 |
+
from email.mime.multipart import MIMEMultipart
|
4 |
+
from email.mime.text import MIMEText
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
def send_email(to_email, subject, body):
|
11 |
+
"""Send an email using the configured SMTP server"""
|
12 |
+
try:
|
13 |
+
# Get email configuration from environment variables
|
14 |
+
email_service = os.getenv("EMAIL_SERVICE")
|
15 |
+
email_user = os.getenv("EMAIL_USER")
|
16 |
+
email_password = os.getenv("EMAIL_PASS")
|
17 |
+
email_from = os.getenv("EMAIL_FROM")
|
18 |
+
|
19 |
+
# Create message
|
20 |
+
msg = MIMEMultipart()
|
21 |
+
msg['From'] = email_from
|
22 |
+
msg['To'] = to_email
|
23 |
+
msg['Subject'] = subject
|
24 |
+
|
25 |
+
# Add body to email
|
26 |
+
msg.attach(MIMEText(body, 'html'))
|
27 |
+
|
28 |
+
# Setup SMTP server
|
29 |
+
if email_service == 'gmail':
|
30 |
+
server = smtplib.SMTP('smtp.gmail.com', 587)
|
31 |
+
else:
|
32 |
+
raise ValueError(f"Unsupported email service: {email_service}")
|
33 |
+
|
34 |
+
server.starttls()
|
35 |
+
server.login(email_user, email_password)
|
36 |
+
|
37 |
+
# Send email
|
38 |
+
text = msg.as_string()
|
39 |
+
server.sendmail(email_from, to_email, text)
|
40 |
+
server.quit()
|
41 |
+
|
42 |
+
return {"success": True, "message": "Email sent successfully"}
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error sending email: {str(e)}")
|
45 |
+
return {"success": False, "message": str(e)}
|
jd_embedding_utils.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import nltk
|
3 |
+
from nltk import sent_tokenize
|
4 |
+
from collections import defaultdict
|
5 |
+
from sentence_transformers import SentenceTransformer, util
|
6 |
+
import spacy
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
# Ensure nltk data is available
|
10 |
+
try:
|
11 |
+
nltk.data.find("tokenizers/punkt")
|
12 |
+
except LookupError:
|
13 |
+
nltk.download("punkt")
|
14 |
+
|
15 |
+
# Load models
|
16 |
+
sbert = SentenceTransformer("all-MiniLM-L6-v2")
|
17 |
+
nlp = spacy.load("en_core_web_sm")
|
18 |
+
|
19 |
+
# Relevant templates
|
20 |
+
TEMPLATES = {
|
21 |
+
"job_title": ["We're hiring a Backend Developer", "Job Title: Cloud Engineer", "Looking for a Product Manager"],
|
22 |
+
"responsibilities": ["You will collaborate with teams", "Expected to deliver high performance"],
|
23 |
+
"qualifications": ["Bachelor's or Master's in CS", "Degree in engineering or related field"]
|
24 |
+
}
|
25 |
+
|
26 |
+
TEMPLATE_EMBEDDINGS = {k: sbert.encode(v, convert_to_tensor=True) for k, v in TEMPLATES.items()}
|
27 |
+
|
28 |
+
COMMON_HEADERS = ['responsibilities', 'qualifications']
|
29 |
+
|
30 |
+
def clean_line(line):
|
31 |
+
return line.strip()
|
32 |
+
|
33 |
+
def classify_line(line):
|
34 |
+
line_embedding = sbert.encode(line, convert_to_tensor=True)
|
35 |
+
scores = {k: float(util.cos_sim(line_embedding, TEMPLATE_EMBEDDINGS[k]).max()) for k in TEMPLATE_EMBEDDINGS}
|
36 |
+
best_match = max(scores, key=scores.get)
|
37 |
+
return best_match if scores[best_match] > 0.4 else None
|
38 |
+
|
39 |
+
def extract_job_title(text):
|
40 |
+
# Regex-based extraction
|
41 |
+
patterns = [
|
42 |
+
r"We are (seeking|looking for|hiring)( an?| a)? (?P<title>[A-Z][a-zA-Z\s\-]+)",
|
43 |
+
r"Job Title[:\-]?\s*(?P<title>[A-Z][\w\s\-]+)"
|
44 |
+
]
|
45 |
+
for pat in patterns:
|
46 |
+
match = re.search(pat, text, re.IGNORECASE)
|
47 |
+
if match:
|
48 |
+
title = match.group("title").strip()
|
49 |
+
|
50 |
+
# Trim any filler trailing words
|
51 |
+
for stop_word in [" to ", " who ", " that ", " and ", " for ", " with "]:
|
52 |
+
if stop_word in title:
|
53 |
+
title = title.split(stop_word)[0].strip()
|
54 |
+
break
|
55 |
+
|
56 |
+
if title.lower() not in ["responsibilities", "description", "qualifications"]:
|
57 |
+
return title
|
58 |
+
|
59 |
+
# Manual fallback: check for job title in lines
|
60 |
+
for line in text.splitlines():
|
61 |
+
if "job title" in line.lower():
|
62 |
+
return line.split(":")[-1].strip()
|
63 |
+
|
64 |
+
# Final fallback: first short line that isn’t a section
|
65 |
+
for line in text.splitlines():
|
66 |
+
line = line.strip()
|
67 |
+
if not line or line.lower().startswith(("description", "responsibilities", "qualifications")):
|
68 |
+
continue
|
69 |
+
if len(line.split()) <= 7 and line[0].isupper():
|
70 |
+
return line.strip()
|
71 |
+
|
72 |
+
return "Unknown"
|
73 |
+
|
74 |
+
def extract_sections(text):
|
75 |
+
lines = text.splitlines()
|
76 |
+
results = defaultdict(list)
|
77 |
+
results["job_title"] = extract_job_title(text)
|
78 |
+
|
79 |
+
current_section = None
|
80 |
+
normalized_headers = {
|
81 |
+
'responsibilities': 'responsibilities',
|
82 |
+
'qualifications': 'qualifications'
|
83 |
+
}
|
84 |
+
|
85 |
+
for line in lines:
|
86 |
+
raw_line = line.strip()
|
87 |
+
if not raw_line:
|
88 |
+
continue
|
89 |
+
|
90 |
+
lower_line = raw_line.lower().strip(":").strip()
|
91 |
+
if lower_line in normalized_headers:
|
92 |
+
current_section = normalized_headers[lower_line]
|
93 |
+
continue
|
94 |
+
|
95 |
+
if current_section:
|
96 |
+
results[current_section].append(raw_line)
|
97 |
+
else:
|
98 |
+
category = classify_line(raw_line)
|
99 |
+
if category and category != "job_title":
|
100 |
+
results[category].append(raw_line)
|
101 |
+
|
102 |
+
print("🔍 JD Section Classification Results (final):")
|
103 |
+
for section, content in results.items():
|
104 |
+
if section != "job_title":
|
105 |
+
print(f" {section}: {len(content)} lines")
|
106 |
+
|
107 |
+
return dict(results)
|
108 |
+
|
109 |
+
def generate_jd_embedding(jd_text):
|
110 |
+
parsed = extract_sections(jd_text)
|
111 |
+
title = parsed.get("job_title", "Unknown")
|
112 |
+
|
113 |
+
embeddings_by_section = {}
|
114 |
+
for section in ["responsibilities", "qualifications"]:
|
115 |
+
lines = parsed.get(section, [])
|
116 |
+
if lines:
|
117 |
+
combined = " ".join(lines)
|
118 |
+
emb = sbert.encode(combined, convert_to_numpy=True)
|
119 |
+
embeddings_by_section[section] = emb
|
120 |
+
print(f"✅ Embedded section '{section}': shape = {emb.shape}")
|
121 |
+
else:
|
122 |
+
print(f"❌ No content found for section '{section}'")
|
123 |
+
embeddings_by_section[section] = None
|
124 |
+
|
125 |
+
return title, embeddings_by_section
|
matcher.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import util
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
# Weights for each aligned JD section
|
5 |
+
weights = {
|
6 |
+
"responsibilities": 0.7,
|
7 |
+
"qualifications": 0.7
|
8 |
+
}
|
9 |
+
|
10 |
+
# Function to compute cosine similarity with fallback
|
11 |
+
def safe_cos_sim(vec1, vec2):
|
12 |
+
if vec1 is None or vec2 is None:
|
13 |
+
return 0.0
|
14 |
+
return float(util.cos_sim(vec1, vec2).item())
|
15 |
+
|
16 |
+
# Enhanced explanation with match levels
|
17 |
+
def interpret_match(label, score):
|
18 |
+
if score >= 0.75:
|
19 |
+
return f"✅ Strong alignment in {label}: {round(score * 100, 1)}%"
|
20 |
+
elif score >= 0.5:
|
21 |
+
return f"⚠️ Partial alignment in {label}: {round(score * 100, 1)}%"
|
22 |
+
else:
|
23 |
+
return f"❌ Weak alignment in {label}: {round(score * 100, 1)}%"
|
24 |
+
|
25 |
+
# Matching logic
|
26 |
+
def calculate_match_score(jd_embeddings, resume_embeddings):
|
27 |
+
explanation = []
|
28 |
+
total_score = 0.0
|
29 |
+
|
30 |
+
# Responsibilities: experience + projects
|
31 |
+
jd_resp = jd_embeddings.get("responsibilities")
|
32 |
+
resume_resp = _combine_embeddings([
|
33 |
+
resume_embeddings.get("experience"),
|
34 |
+
resume_embeddings.get("projects")
|
35 |
+
])
|
36 |
+
sim_resp = safe_cos_sim(jd_resp, resume_resp)
|
37 |
+
total_score += sim_resp * weights["responsibilities"]
|
38 |
+
explanation.append(interpret_match("Responsibilities", sim_resp))
|
39 |
+
|
40 |
+
# Qualifications: education + certs + skills
|
41 |
+
jd_qual = jd_embeddings.get("qualifications")
|
42 |
+
resume_qual = _combine_embeddings([
|
43 |
+
resume_embeddings.get("education"),
|
44 |
+
resume_embeddings.get("certifications"),
|
45 |
+
resume_embeddings.get("skills")
|
46 |
+
])
|
47 |
+
sim_qual = safe_cos_sim(jd_qual, resume_qual)
|
48 |
+
total_score += sim_qual * weights["qualifications"]
|
49 |
+
explanation.append(interpret_match("Qualifications", sim_qual))
|
50 |
+
|
51 |
+
return round(total_score, 3), explanation
|
52 |
+
|
53 |
+
# Combine multiple numpy vectors into one
|
54 |
+
def _combine_embeddings(embeddings_list):
|
55 |
+
valid = [vec for vec in embeddings_list if vec is not None]
|
56 |
+
if not valid:
|
57 |
+
return None
|
58 |
+
return np.mean(valid, axis=0)
|
59 |
+
|
60 |
+
# Main matcher
|
61 |
+
def match_all_resumes(jd_title, jd_embeddings, resume_data, threshold=0.8):
|
62 |
+
all_candidates = []
|
63 |
+
|
64 |
+
print(f"\n📌 Matching resumes against JD: **{jd_title}**\n")
|
65 |
+
|
66 |
+
for filename, data in resume_data.items():
|
67 |
+
parsed = data.get("parsed", {})
|
68 |
+
embeddings = data.get("embedding", {})
|
69 |
+
|
70 |
+
name = _extract_name(parsed, fallback=filename)
|
71 |
+
score, explanation = calculate_match_score(jd_embeddings, embeddings)
|
72 |
+
|
73 |
+
print(f"🔍 {name} — Score: {round(score*100, 1)}%")
|
74 |
+
for line in explanation:
|
75 |
+
print(" •", line)
|
76 |
+
print("✅ Shortlisted\n" if score >= threshold else "❌ Not shortlisted\n")
|
77 |
+
|
78 |
+
all_candidates.append({
|
79 |
+
"name": name,
|
80 |
+
"score": score,
|
81 |
+
"reasoning": explanation,
|
82 |
+
"resume_id": data.get("id"), # Assuming resume ID is stored in data
|
83 |
+
"is_match": score >= threshold # Flag for passing the threshold
|
84 |
+
})
|
85 |
+
|
86 |
+
return all_candidates
|
87 |
+
|
88 |
+
# Name extractor fallback
|
89 |
+
def _extract_name(parsed, fallback="Unknown"):
|
90 |
+
name_lines = parsed.get("name", [])
|
91 |
+
for line in name_lines:
|
92 |
+
if line and any(c.isalpha() for c in line):
|
93 |
+
return line.strip()
|
94 |
+
return fallback
|
models.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, Float, Text, LargeBinary, DateTime, JSON
|
2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
3 |
+
from sqlalchemy.orm import relationship
|
4 |
+
from sqlalchemy.sql import func
|
5 |
+
|
6 |
+
Base = declarative_base()
|
7 |
+
|
8 |
+
class User(Base):
|
9 |
+
__tablename__ = "users"
|
10 |
+
|
11 |
+
id = Column(Integer, primary_key=True, index=True)
|
12 |
+
email = Column(String, unique=True, index=True)
|
13 |
+
password = Column(String)
|
14 |
+
name = Column(String)
|
15 |
+
created_at = Column(DateTime, server_default=func.now())
|
16 |
+
|
17 |
+
job_descriptions = relationship("JobDescription", back_populates="user")
|
18 |
+
resumes = relationship("Resume", back_populates="user")
|
19 |
+
memories = relationship("ApplicationMemory", back_populates="user")
|
20 |
+
|
21 |
+
class JobDescription(Base):
|
22 |
+
__tablename__ = "job_descriptions"
|
23 |
+
|
24 |
+
id = Column(Integer, primary_key=True, index=True)
|
25 |
+
user_id = Column(Integer, ForeignKey("users.id"))
|
26 |
+
title = Column(String)
|
27 |
+
description = Column(Text)
|
28 |
+
embedding_data = Column(JSON)
|
29 |
+
sections = Column(JSON)
|
30 |
+
created_at = Column(DateTime, server_default=func.now())
|
31 |
+
|
32 |
+
user = relationship("User", back_populates="job_descriptions")
|
33 |
+
matches = relationship("Match", back_populates="job_description")
|
34 |
+
|
35 |
+
class Resume(Base):
|
36 |
+
__tablename__ = "resumes"
|
37 |
+
|
38 |
+
id = Column(Integer, primary_key=True, index=True)
|
39 |
+
user_id = Column(Integer, ForeignKey("users.id"))
|
40 |
+
filename = Column(String)
|
41 |
+
candidate_name = Column(String, nullable=True)
|
42 |
+
file_content = Column(LargeBinary)
|
43 |
+
parsed_data = Column(JSON)
|
44 |
+
embedding_data = Column(JSON)
|
45 |
+
summary = Column(Text)
|
46 |
+
created_at = Column(DateTime, server_default=func.now())
|
47 |
+
|
48 |
+
user = relationship("User", back_populates="resumes")
|
49 |
+
matches = relationship("Match", back_populates="resume")
|
50 |
+
|
51 |
+
class Match(Base):
|
52 |
+
__tablename__ = "matches"
|
53 |
+
|
54 |
+
id = Column(Integer, primary_key=True, index=True)
|
55 |
+
job_description_id = Column(Integer, ForeignKey("job_descriptions.id"))
|
56 |
+
resume_id = Column(Integer, ForeignKey("resumes.id"))
|
57 |
+
score = Column(Float)
|
58 |
+
is_match = Column(Boolean)
|
59 |
+
reasoning = Column(JSON)
|
60 |
+
created_at = Column(DateTime, server_default=func.now())
|
61 |
+
|
62 |
+
job_description = relationship("JobDescription", back_populates="matches")
|
63 |
+
resume = relationship("Resume", back_populates="matches")
|
64 |
+
interviews = relationship("Interview", back_populates="match")
|
65 |
+
|
66 |
+
class Interview(Base):
|
67 |
+
__tablename__ = "interviews"
|
68 |
+
|
69 |
+
id = Column(Integer, primary_key=True, index=True)
|
70 |
+
match_id = Column(Integer, ForeignKey("matches.id"))
|
71 |
+
scheduled_time = Column(DateTime)
|
72 |
+
email_sent = Column(Boolean, default=False)
|
73 |
+
status = Column(String, default="scheduled") # scheduled, completed, cancelled
|
74 |
+
notes = Column(Text, nullable=True)
|
75 |
+
created_at = Column(DateTime, server_default=func.now())
|
76 |
+
|
77 |
+
match = relationship("Match", back_populates="interviews")
|
78 |
+
|
79 |
+
class ApplicationMemory(Base):
|
80 |
+
"""Long-term memory storage for the application"""
|
81 |
+
__tablename__ = "application_memories"
|
82 |
+
|
83 |
+
id = Column(Integer, primary_key=True, index=True)
|
84 |
+
user_id = Column(Integer, ForeignKey("users.id"))
|
85 |
+
type = Column(String, index=True) # job_description, resume, match, interview, etc.
|
86 |
+
reference_id = Column(Integer) # ID of the referenced entity
|
87 |
+
data = Column(JSON) # Main data to store
|
88 |
+
metadata = Column(JSON, nullable=True) # Additional metadata
|
89 |
+
created_at = Column(DateTime, server_default=func.now())
|
90 |
+
|
91 |
+
user = relationship("User", back_populates="memories")
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
nltk
|
2 |
+
sentence-transformers
|
3 |
+
spacy
|
4 |
+
numpy<2
|
5 |
+
pybind11
|
6 |
+
torch
|
7 |
+
torchvision
|
8 |
+
tf-keras
|
9 |
+
fastapi
|
10 |
+
uvicorn
|
11 |
+
python-multipart
|
12 |
+
pdfplumber
|
13 |
+
python-dotenv
|
14 |
+
email-validator
|
resume_embedding_utils.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# --- resume_embedding_utils.py ---
|
2 |
+
import re
|
3 |
+
import nltk
|
4 |
+
import spacy
|
5 |
+
import pdfplumber
|
6 |
+
import numpy as np
|
7 |
+
from nltk import sent_tokenize
|
8 |
+
from collections import defaultdict
|
9 |
+
from sentence_transformers import SentenceTransformer, util
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
# --- Setup ---
|
13 |
+
nltk.download("punkt")
|
14 |
+
nlp = spacy.load("en_core_web_sm")
|
15 |
+
sbert = SentenceTransformer("all-MiniLM-L6-v2")
|
16 |
+
|
17 |
+
# --- Templates for fallback classification ---
|
18 |
+
RESUME_TEMPLATES = {
|
19 |
+
"name": ["My name is", "Resume of", "Name:"],
|
20 |
+
"skills": ["Skills: Python, Java", "Proficient in C++ and ML"],
|
21 |
+
"experience": ["Worked at Google", "Software Engineer at Amazon"],
|
22 |
+
"education": ["Bachelor of Technology from IIT", "Master's in Data Science"],
|
23 |
+
"certifications": ["AWS Certified", "Completed PMP Certification"],
|
24 |
+
"projects": ["Built an AI chatbot", "Project: Deep Learning"],
|
25 |
+
"tech_stack": ["Tech Stack: Python, TensorFlow", "Languages: Java, C++"]
|
26 |
+
}
|
27 |
+
|
28 |
+
TEMPLATE_EMBEDDINGS = {
|
29 |
+
k: sbert.encode(v, convert_to_tensor=True)
|
30 |
+
for k, v in RESUME_TEMPLATES.items()
|
31 |
+
}
|
32 |
+
|
33 |
+
COMMON_HEADERS = {
|
34 |
+
"skills": ["skills", "technical skills"],
|
35 |
+
"experience": ["experience", "work experience", "employment"],
|
36 |
+
"education": ["education", "academics"],
|
37 |
+
"certifications": ["certifications"],
|
38 |
+
"projects": ["projects", "achievements"],
|
39 |
+
"tech_stack": ["tech stack", "languages", "tools"],
|
40 |
+
"name": ["name", "profile"]
|
41 |
+
}
|
42 |
+
|
43 |
+
def normalize_header(text):
|
44 |
+
lower = text.lower().strip().strip(":")
|
45 |
+
for section, aliases in COMMON_HEADERS.items():
|
46 |
+
if any(lower.startswith(alias) for alias in aliases):
|
47 |
+
return section
|
48 |
+
return None
|
49 |
+
|
50 |
+
def classify_line(line):
|
51 |
+
emb = sbert.encode(line, convert_to_tensor=True)
|
52 |
+
scores = {
|
53 |
+
k: float(util.cos_sim(emb, TEMPLATE_EMBEDDINGS[k]).max())
|
54 |
+
for k in TEMPLATE_EMBEDDINGS
|
55 |
+
}
|
56 |
+
best = max(scores, key=scores.get)
|
57 |
+
return best if scores[best] > 0.4 else None
|
58 |
+
|
59 |
+
def extract_name(text):
|
60 |
+
for line in text.splitlines():
|
61 |
+
doc = nlp(line.strip())
|
62 |
+
for ent in doc.ents:
|
63 |
+
if ent.label_ == "PERSON":
|
64 |
+
return ent.text.strip()
|
65 |
+
return None
|
66 |
+
|
67 |
+
def pdf_to_text(pdf_path):
|
68 |
+
with pdfplumber.open(pdf_path) as pdf:
|
69 |
+
return "\n".join([page.extract_text() or "" for page in pdf.pages])
|
70 |
+
|
71 |
+
def extract_resume_sections(text):
|
72 |
+
lines = text.splitlines()
|
73 |
+
merged_lines = []
|
74 |
+
prev_line = ""
|
75 |
+
|
76 |
+
for raw in lines:
|
77 |
+
line = raw.strip()
|
78 |
+
if not line:
|
79 |
+
continue
|
80 |
+
if prev_line and (line[0].islower() or line.startswith(("and", "which", "-", "or", ",", "of", "to"))):
|
81 |
+
merged_lines[-1] += " " + line
|
82 |
+
else:
|
83 |
+
merged_lines.append(line)
|
84 |
+
prev_line = line
|
85 |
+
|
86 |
+
sections = defaultdict(list)
|
87 |
+
current_section = None
|
88 |
+
name_found = extract_name(text)
|
89 |
+
|
90 |
+
for line in merged_lines:
|
91 |
+
normalized = normalize_header(line)
|
92 |
+
if normalized:
|
93 |
+
current_section = normalized
|
94 |
+
continue
|
95 |
+
|
96 |
+
lower = line.lower()
|
97 |
+
if any(w in lower for w in ["bachelor", "ph.d", "master", "diploma", "msc", "b.tech", "mba"]):
|
98 |
+
current_section = "education"
|
99 |
+
elif "tech stack" in lower or "languages" in lower or "tools" in lower:
|
100 |
+
current_section = "tech_stack"
|
101 |
+
elif "achievements" in lower or line.startswith(("Built", "Developed")) or "project" in lower:
|
102 |
+
current_section = "projects"
|
103 |
+
elif "work experience" in lower or re.search(r"(intern|engineer|manager|scientist|developer)", lower):
|
104 |
+
current_section = "experience"
|
105 |
+
|
106 |
+
if not current_section:
|
107 |
+
current_section = classify_line(line)
|
108 |
+
|
109 |
+
if current_section:
|
110 |
+
if current_section in ["education", "experience", "certifications"] and sections[current_section]:
|
111 |
+
if line[0].islower() or re.match(r"^(Concentrated|Focused|Research|Worked|Led|Responsible|Published|with|and|using|or|to)\b", line):
|
112 |
+
sections[current_section][-1] += " " + line
|
113 |
+
continue
|
114 |
+
sections[current_section].append(line)
|
115 |
+
|
116 |
+
if name_found and name_found not in sections.get("name", []):
|
117 |
+
sections["name"].insert(0, name_found)
|
118 |
+
|
119 |
+
return dict(sections)
|
120 |
+
|
121 |
+
def generate_resume_embedding(parsed_resume):
|
122 |
+
combined = " ".join(
|
123 |
+
parsed_resume.get("skills", []) +
|
124 |
+
parsed_resume.get("experience", []) +
|
125 |
+
parsed_resume.get("education", []) +
|
126 |
+
parsed_resume.get("certifications", []) +
|
127 |
+
parsed_resume.get("projects", []) +
|
128 |
+
parsed_resume.get("tech_stack", [])
|
129 |
+
)
|
130 |
+
if not combined.strip():
|
131 |
+
return sbert.encode("generic resume", convert_to_numpy=True)
|
132 |
+
return sbert.encode(combined, convert_to_numpy=True)
|
133 |
+
|
134 |
+
def generate_embeddings_for_all_resumes(pdf_paths):
|
135 |
+
results = {}
|
136 |
+
|
137 |
+
print("\n���� DEBUGGING RESUME PARSING:\n")
|
138 |
+
|
139 |
+
for pdf_path in pdf_paths:
|
140 |
+
file_name = Path(pdf_path).name
|
141 |
+
text = pdf_to_text(pdf_path)
|
142 |
+
parsed = extract_resume_sections(text)
|
143 |
+
|
144 |
+
print(f"\n📄 Resume: {file_name}")
|
145 |
+
for section in ["name", "skills", "experience", "education", "certifications", "projects", "tech_stack"]:
|
146 |
+
lines = parsed.get(section)
|
147 |
+
if lines:
|
148 |
+
print(f" ✅ {section.title()}: {len(lines)} line(s)")
|
149 |
+
else:
|
150 |
+
print(f" ❌ {section.title()}: Not found")
|
151 |
+
|
152 |
+
embedding = generate_resume_embedding(parsed)
|
153 |
+
print(f" 🔢 Embedding shape: {embedding.shape}")
|
154 |
+
|
155 |
+
results[file_name] = {
|
156 |
+
"embedding": {
|
157 |
+
"skills": sbert.encode(" ".join(parsed.get("skills", [])), convert_to_numpy=True) if parsed.get("skills") else None,
|
158 |
+
"experience": sbert.encode(" ".join(parsed.get("experience", [])), convert_to_numpy=True) if parsed.get("experience") else None,
|
159 |
+
"education": sbert.encode(" ".join(parsed.get("education", [])), convert_to_numpy=True) if parsed.get("education") else None,
|
160 |
+
"certifications": sbert.encode(" ".join(parsed.get("certifications", [])), convert_to_numpy=True) if parsed.get("certifications") else None,
|
161 |
+
"projects": sbert.encode(" ".join(parsed.get("projects", [])), convert_to_numpy=True) if parsed.get("projects") else None,
|
162 |
+
"tech_stack": sbert.encode(" ".join(parsed.get("tech_stack", [])), convert_to_numpy=True) if parsed.get("tech_stack") else None,
|
163 |
+
},
|
164 |
+
"parsed": parsed
|
165 |
+
}
|
166 |
+
|
167 |
+
return results
|
schemas.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, EmailStr
|
2 |
+
from typing import Optional, Dict, Any, List
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
# ...existing schemas...
|
6 |
+
|
7 |
+
class MemoryData(BaseModel):
|
8 |
+
id: int
|
9 |
+
type: str
|
10 |
+
reference_id: int
|
11 |
+
data: Dict[str, Any]
|
12 |
+
metadata: Optional[Dict[str, Any]] = None
|
13 |
+
created_at: datetime
|
14 |
+
|
15 |
+
class Config:
|
16 |
+
orm_mode = True
|