ak0601 commited on
Commit
d1bb0a2
·
verified ·
1 Parent(s): a761602

Upload 2 files

Browse files
Files changed (2) hide show
  1. app_job_copy_1.py +473 -0
  2. requirements.txt +12 -3
app_job_copy_1.py ADDED
@@ -0,0 +1,473 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import os
5
+ from pydantic import BaseModel, Field
6
+ from typing import List, Set, Dict, Any, Optional
7
+ import time
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.messages import HumanMessage
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.prompts import PromptTemplate
13
+ import gspread
14
+ from google.oauth2 import service_account
15
+
16
+ st.set_page_config(
17
+ page_title="Candidate Matching App",
18
+ page_icon="👨‍💻🎯",
19
+ layout="wide"
20
+ )
21
+
22
+ # Define pydantic model for structured output
23
+ class Shortlist(BaseModel):
24
+ fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
+ candidate_name: str = Field(description="The name of the candidate.")
26
+ candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
+ candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
+ candidate_location: str = Field(description="The location of the candidate.")
29
+ justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
+
31
+ # Function to parse and normalize tech stacks
32
+ def parse_tech_stack(stack):
33
+ if pd.isna(stack) or stack == "" or stack is None:
34
+ return set()
35
+ if isinstance(stack, set):
36
+ return stack
37
+ try:
38
+ # Handle potential string representation of sets
39
+ if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
+ # This could be a string representation of a set
41
+ items = stack.strip("{}").split(",")
42
+ return set(item.strip().strip("'\"") for item in items if item.strip())
43
+ return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
+ except Exception as e:
45
+ st.error(f"Error parsing tech stack: {e}")
46
+ return set()
47
+
48
+ def display_tech_stack(stack_set):
49
+ if isinstance(stack_set, set):
50
+ return ", ".join(sorted(stack_set))
51
+ return str(stack_set)
52
+
53
+ def get_matching_candidates(job_stack, candidates_df):
54
+ """Find candidates with matching tech stack for a specific job"""
55
+ matched = []
56
+ job_stack_set = parse_tech_stack(job_stack)
57
+
58
+ for _, candidate in candidates_df.iterrows():
59
+ candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
+ common = job_stack_set & candidate_stack
61
+ if len(common) >= 2:
62
+ matched.append({
63
+ "Name": candidate["Full Name"],
64
+ "URL": candidate["LinkedIn URL"],
65
+ "Degree & Education": candidate["Degree & University"],
66
+ "Years of Experience": candidate["Years of Experience"],
67
+ "Current Title & Company": candidate['Current Title & Company'],
68
+ "Key Highlights": candidate["Key Highlights"],
69
+ "Location": candidate["Location (from most recent experience)"],
70
+ "Experience": str(candidate["Experience"]),
71
+ "Tech Stack": candidate_stack
72
+ })
73
+ return matched
74
+
75
+ def setup_llm():
76
+ """Set up the LangChain LLM with structured output"""
77
+ # Create LLM instance
78
+ llm = ChatOpenAI(
79
+ model="gpt-4o-mini",
80
+ temperature=0,
81
+ max_tokens=None,
82
+ timeout=None,
83
+ max_retries=2,
84
+ )
85
+
86
+ # Create structured output
87
+ sum_llm = llm.with_structured_output(Shortlist)
88
+
89
+ # Create system prompt
90
+ system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
+ the profile is according to job.
92
+ Try to ensure following points while estimating the candidate's fit score:
93
+ For education:
94
+ Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
+ Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
+ Tier3 - Unknown or unranked institutions - Lower points or reject
97
+
98
+ Startup Experience Requirement:
99
+ Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
100
+ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
101
+
102
+ The fit score signifies based on following metrics:
103
+ 1–5 - Poor Fit - Auto-reject
104
+ 6–7 - Weak Fit - Auto-reject
105
+ 8.0–8.7 - Moderate Fit - Auto-reject
106
+ 8.8–10 - STRONG Fit - Include in results
107
+ """
108
+
109
+ # Create query prompt
110
+ query_prompt = ChatPromptTemplate.from_messages([
111
+ ("system", system),
112
+ ("human", """
113
+ You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
114
+ For this you will be provided with the follwing inputs of job and candidates:
115
+ Job Details
116
+ Company: {Company}
117
+ Role: {Role}
118
+ About Company: {desc}
119
+ Locations: {Locations}
120
+ Tech Stack: {Tech_Stack}
121
+ Industry: {Industry}
122
+
123
+
124
+ Candidate Details:
125
+ Full Name: {Full_Name}
126
+ LinkedIn URL: {LinkedIn_URL}
127
+ Current Title & Company: {Current_Title_Company}
128
+ Years of Experience: {Years_of_Experience}
129
+ Degree & University: {Degree_University}
130
+ Key Tech Stack: {Key_Tech_Stack}
131
+ Key Highlights: {Key_Highlights}
132
+ Location (from most recent experience): {cand_Location}
133
+ Past_Experience: {Experience}
134
+
135
+
136
+ Answer in the structured manner as per the schema.
137
+ If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
138
+ """),
139
+ ])
140
+
141
+ # Chain the prompt and LLM
142
+ cat_class = query_prompt | sum_llm
143
+
144
+ return cat_class
145
+
146
+ def call_llm(candidate_data, job_data, llm_chain):
147
+ """Call the actual LLM to evaluate the candidate"""
148
+ try:
149
+ # Convert tech stacks to strings for the LLM payload
150
+ job_tech_stack = job_data.get("Tech_Stack", set())
151
+ candidate_tech_stack = candidate_data.get("Tech Stack", set())
152
+
153
+ if isinstance(job_tech_stack, set):
154
+ job_tech_stack = ", ".join(sorted(job_tech_stack))
155
+
156
+ if isinstance(candidate_tech_stack, set):
157
+ candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
158
+
159
+ # Prepare payload for LLM
160
+ payload = {
161
+ "Company": job_data.get("Company", ""),
162
+ "Role": job_data.get("Role", ""),
163
+ "desc": job_data.get("desc", ""),
164
+ "Locations": job_data.get("Locations", ""),
165
+ "Tech_Stack": job_tech_stack,
166
+ "Industry": job_data.get("Industry", ""),
167
+
168
+ "Full_Name": candidate_data.get("Name", ""),
169
+ "LinkedIn_URL": candidate_data.get("URL", ""),
170
+ "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
171
+ "Years_of_Experience": candidate_data.get("Years of Experience", ""),
172
+ "Degree_University": candidate_data.get("Degree & Education", ""),
173
+ "Key_Tech_Stack": candidate_tech_stack,
174
+ "Key_Highlights": candidate_data.get("Key Highlights", ""),
175
+ "cand_Location": candidate_data.get("Location", ""),
176
+ "Experience": candidate_data.get("Experience", "")
177
+ }
178
+
179
+ # Call LLM
180
+ response = llm_chain.invoke(payload)
181
+ print(candidate_data.get("Experience", ""))
182
+
183
+ # Return response in expected format
184
+ return {
185
+ "candidate_name": response.candidate_name,
186
+ "candidate_url": response.candidate_url,
187
+ "candidate_summary": response.candidate_summary,
188
+ "candidate_location": response.candidate_location,
189
+ "fit_score": response.fit_score,
190
+ "justification": response.justification
191
+ }
192
+ except Exception as e:
193
+ st.error(f"Error calling LLM: {e}")
194
+ # Fallback to a default response
195
+ return {
196
+ "candidate_name": candidate_data.get("Name", "Unknown"),
197
+ "candidate_url": candidate_data.get("URL", ""),
198
+ "candidate_summary": "Error processing candidate profile",
199
+ "candidate_location": candidate_data.get("Location", "Unknown"),
200
+ "fit_score": 0.0,
201
+ "justification": f"Error in LLM processing: {str(e)}"
202
+ }
203
+
204
+ def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
205
+ """Process candidates for a specific job using the LLM"""
206
+ if llm_chain is None:
207
+ with st.spinner("Setting up LLM..."):
208
+ llm_chain = setup_llm()
209
+
210
+ selected_candidates = []
211
+
212
+ try:
213
+ # Get job-specific data
214
+ job_data = {
215
+ "Company": job_row["Company"],
216
+ "Role": job_row["Role"],
217
+ "desc": job_row.get("One liner", ""),
218
+ "Locations": job_row.get("Locations", ""),
219
+ "Tech_Stack": job_row["Tech Stack"],
220
+ "Industry": job_row.get("Industry", "")
221
+ }
222
+
223
+ # Find matching candidates for this job
224
+ with st.spinner("Finding matching candidates based on tech stack..."):
225
+ matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
226
+
227
+ if not matching_candidates:
228
+ st.warning("No candidates with matching tech stack found for this job.")
229
+ return []
230
+
231
+ st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
232
+
233
+ # Create progress elements
234
+ candidates_progress = st.progress(0)
235
+ candidate_status = st.empty()
236
+
237
+ # Process each candidate
238
+ for i, candidate_data in enumerate(matching_candidates):
239
+ # Update progress
240
+ candidates_progress.progress((i + 1) / len(matching_candidates))
241
+ candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
242
+
243
+ # Process the candidate with the LLM
244
+ response = call_llm(candidate_data, job_data, llm_chain)
245
+
246
+ response_dict = {
247
+ "Name": response["candidate_name"],
248
+ "LinkedIn": response["candidate_url"],
249
+ "summary": response["candidate_summary"],
250
+ "Location": response["candidate_location"],
251
+ "Fit Score": response["fit_score"],
252
+ "justification": response["justification"],
253
+ # Add back original candidate data for context
254
+ "Educational Background": candidate_data.get("Degree & Education", ""),
255
+ "Years of Experience": candidate_data.get("Years of Experience", ""),
256
+ "Current Title & Company": candidate_data.get("Current Title & Company", "")
257
+ }
258
+
259
+ # Add to selected candidates if score is high enough
260
+ if response["fit_score"] >= 8.8:
261
+ selected_candidates.append(response_dict)
262
+ st.markdown(response_dict)
263
+ else:
264
+ st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
265
+
266
+ # Clear progress indicators
267
+ candidates_progress.empty()
268
+ candidate_status.empty()
269
+
270
+ # Show results
271
+ if selected_candidates:
272
+ st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
273
+ else:
274
+ st.info("No candidates met the minimum fit score threshold for this job.")
275
+
276
+ return selected_candidates
277
+
278
+ except Exception as e:
279
+ st.error(f"Error processing job: {e}")
280
+ return []
281
+
282
+ def main():
283
+ st.title("👨‍💻 Candidate Matching App")
284
+
285
+ # Initialize session state
286
+ if 'processed_jobs' not in st.session_state:
287
+ st.session_state.processed_jobs = {}
288
+
289
+ st.write("""
290
+ This app matches job listings with candidate profiles based on tech stack and other criteria.
291
+ Select a job to find matching candidates.
292
+ """)
293
+
294
+ # API Key input
295
+ with st.sidebar:
296
+ st.header("API Configuration")
297
+ api_key = st.text_input("Enter OpenAI API Key", type="password")
298
+ if api_key:
299
+ os.environ["OPENAI_API_KEY"] = api_key
300
+ st.success("API Key set!")
301
+ else:
302
+ st.warning("Please enter OpenAI API Key to use LLM features")
303
+
304
+ # Show API key warning if not set
305
+ SERVICE_ACCOUNT_FILE = 'synapse-recruitment-e94255ca76fd.json'
306
+ SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
307
+ creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
308
+ gc = gspread.authorize(creds)
309
+ job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
310
+ candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
311
+
312
+ if not api_key:
313
+ st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
314
+
315
+ if api_key:
316
+ try:
317
+ # Load data from Google Sheets
318
+ job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
319
+ job_data = job_worksheet.get_all_values()
320
+ candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
321
+ candidate_data = candidate_worksheet.get_all_values()
322
+
323
+ # Convert to DataFrames
324
+ jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
325
+ candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
326
+ candidates_df = candidates_df.fillna("Unknown")
327
+
328
+ # Display data preview
329
+ with st.expander("Preview uploaded data"):
330
+ st.subheader("Jobs Data Preview")
331
+ st.dataframe(jobs_df.head(3))
332
+
333
+ st.subheader("Candidates Data Preview")
334
+ st.dataframe(candidates_df.head(3))
335
+
336
+ # Map column names if needed
337
+ column_mapping = {
338
+ "Full Name": "Full Name",
339
+ "LinkedIn URL": "LinkedIn URL",
340
+ "Current Title & Company": "Current Title & Company",
341
+ "Years of Experience": "Years of Experience",
342
+ "Degree & University": "Degree & University",
343
+ "Key Tech Stack": "Key Tech Stack",
344
+ "Key Highlights": "Key Highlights",
345
+ "Location (from most recent experience)": "Location (from most recent experience)"
346
+ }
347
+
348
+ # Rename columns if they don't match expected
349
+ candidates_df = candidates_df.rename(columns={
350
+ col: mapping for col, mapping in column_mapping.items()
351
+ if col in candidates_df.columns and col != mapping
352
+ })
353
+
354
+ # Now, instead of processing all jobs upfront, we'll display job selection
355
+ # and only process the selected job when the user chooses it
356
+ display_job_selection(jobs_df, candidates_df)
357
+
358
+ except Exception as e:
359
+ st.error(f"Error processing files: {e}")
360
+
361
+ st.divider()
362
+
363
+
364
+ def display_job_selection(jobs_df, candidates_df):
365
+ # Store the LLM chain as a session state to avoid recreating it
366
+ if 'llm_chain' not in st.session_state:
367
+ st.session_state.llm_chain = None
368
+
369
+ st.subheader("Select a job to view potential matches")
370
+
371
+ # Create job options - but don't compute matches yet
372
+ job_options = []
373
+ for i, row in jobs_df.iterrows():
374
+ job_options.append(f"{row['Role']} at {row['Company']}")
375
+
376
+ if job_options:
377
+ selected_job_index = st.selectbox("Jobs:",
378
+ range(len(job_options)),
379
+ format_func=lambda x: job_options[x])
380
+
381
+ # Display job details
382
+ job_row = jobs_df.iloc[selected_job_index]
383
+
384
+ # Parse tech stack for display
385
+ job_row_stack = parse_tech_stack(job_row["Tech Stack"])
386
+
387
+ col1, col2 = st.columns([2, 1])
388
+
389
+ with col1:
390
+ st.subheader(f"Job Details: {job_row['Role']}")
391
+
392
+ job_details = {
393
+ "Company": job_row["Company"],
394
+ "Role": job_row["Role"],
395
+ "Description": job_row.get("One liner", "N/A"),
396
+ "Locations": job_row.get("Locations", "N/A"),
397
+ "Industry": job_row.get("Industry", "N/A"),
398
+ "Tech Stack": display_tech_stack(job_row_stack)
399
+ }
400
+
401
+ for key, value in job_details.items():
402
+ st.markdown(f"**{key}:** {value}")
403
+
404
+ # Create a key for this job in session state
405
+ job_key = f"job_{selected_job_index}_processed"
406
+
407
+ if job_key not in st.session_state:
408
+ st.session_state[job_key] = False
409
+
410
+ # Add a process button for this job
411
+ if not st.session_state[job_key]:
412
+ if st.button(f"Find Matching Candidates for this Job"):
413
+ if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
414
+ st.error("Please enter your OpenAI API key in the sidebar before processing")
415
+ else:
416
+ # Process candidates for this job (only when requested)
417
+ selected_candidates = process_candidates_for_job(
418
+ job_row,
419
+ candidates_df,
420
+ st.session_state.llm_chain
421
+ )
422
+
423
+ # Store the results and set as processed
424
+ if 'Selected_Candidates' not in st.session_state:
425
+ st.session_state.Selected_Candidates = {}
426
+ st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
427
+ st.session_state[job_key] = True
428
+
429
+ # Store the LLM chain for reuse
430
+ if st.session_state.llm_chain is None:
431
+ st.session_state.llm_chain = setup_llm()
432
+
433
+ # Force refresh
434
+ st.rerun()
435
+
436
+ # Display selected candidates if already processed
437
+ if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
438
+ selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
439
+
440
+ # Display selected candidates
441
+ st.subheader("Selected Candidates")
442
+
443
+ if len(selected_candidates) > 0:
444
+ for i, candidate in enumerate(selected_candidates):
445
+ with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
446
+ col1, col2 = st.columns([3, 1])
447
+
448
+ with col1:
449
+ st.markdown(f"**Summary:** {candidate['summary']}")
450
+ st.markdown(f"**Current:** {candidate['Current Title & Company']}")
451
+ st.markdown(f"**Education:** {candidate['Educational Background']}")
452
+ st.markdown(f"**Experience:** {candidate['Years of Experience']}")
453
+ st.markdown(f"**Location:** {candidate['Location']}")
454
+ st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
455
+
456
+ with col2:
457
+ st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
458
+
459
+ st.markdown("**Justification:**")
460
+ st.info(candidate['justification'])
461
+ else:
462
+ st.info("No candidates met the minimum score threshold (8.8) for this job.")
463
+
464
+ # We don't show tech-matched candidates here since they are generated
465
+ # during the LLM matching process now
466
+
467
+ # Add a reset button to start over
468
+ if st.button("Reset and Process Again"):
469
+ st.session_state[job_key] = False
470
+ st.rerun()
471
+
472
+ if __name__ == "__main__":
473
+ main()
requirements.txt CHANGED
@@ -1,3 +1,12 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
1
+ gspread==6.2.0
2
+ langchain==0.3.25
3
+ langchain_community==0.3.23
4
+ langchain_core==0.3.59
5
+ langchain_google_genai==2.1.4
6
+ langchain_huggingface==0.2.0
7
+ langchain_openai==0.3.16
8
+ pandas==2.2.3
9
+ protobuf==6.30.2
10
+ pydantic==2.11.4
11
+ python-dotenv==1.1.0
12
+ streamlit==1.44.1