ak0601 commited on
Commit
a21823a
·
verified ·
1 Parent(s): 48e9688

Update app_job_copy_1.py

Browse files
Files changed (1) hide show
  1. app_job_copy_1.py +474 -472
app_job_copy_1.py CHANGED
@@ -1,473 +1,475 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import json
4
- import os
5
- from pydantic import BaseModel, Field
6
- from typing import List, Set, Dict, Any, Optional
7
- import time
8
- from langchain_openai import ChatOpenAI
9
- from langchain_core.messages import HumanMessage
10
- from langchain_core.prompts import ChatPromptTemplate
11
- from langchain_core.output_parsers import StrOutputParser
12
- from langchain_core.prompts import PromptTemplate
13
- import gspread
14
- from google.oauth2 import service_account
15
-
16
- st.set_page_config(
17
- page_title="Candidate Matching App",
18
- page_icon="👨‍💻🎯",
19
- layout="wide"
20
- )
21
-
22
- # Define pydantic model for structured output
23
- class Shortlist(BaseModel):
24
- fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
- candidate_name: str = Field(description="The name of the candidate.")
26
- candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
- candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
- candidate_location: str = Field(description="The location of the candidate.")
29
- justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
-
31
- # Function to parse and normalize tech stacks
32
- def parse_tech_stack(stack):
33
- if pd.isna(stack) or stack == "" or stack is None:
34
- return set()
35
- if isinstance(stack, set):
36
- return stack
37
- try:
38
- # Handle potential string representation of sets
39
- if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
- # This could be a string representation of a set
41
- items = stack.strip("{}").split(",")
42
- return set(item.strip().strip("'\"") for item in items if item.strip())
43
- return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
- except Exception as e:
45
- st.error(f"Error parsing tech stack: {e}")
46
- return set()
47
-
48
- def display_tech_stack(stack_set):
49
- if isinstance(stack_set, set):
50
- return ", ".join(sorted(stack_set))
51
- return str(stack_set)
52
-
53
- def get_matching_candidates(job_stack, candidates_df):
54
- """Find candidates with matching tech stack for a specific job"""
55
- matched = []
56
- job_stack_set = parse_tech_stack(job_stack)
57
-
58
- for _, candidate in candidates_df.iterrows():
59
- candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
- common = job_stack_set & candidate_stack
61
- if len(common) >= 2:
62
- matched.append({
63
- "Name": candidate["Full Name"],
64
- "URL": candidate["LinkedIn URL"],
65
- "Degree & Education": candidate["Degree & University"],
66
- "Years of Experience": candidate["Years of Experience"],
67
- "Current Title & Company": candidate['Current Title & Company'],
68
- "Key Highlights": candidate["Key Highlights"],
69
- "Location": candidate["Location (from most recent experience)"],
70
- "Experience": str(candidate["Experience"]),
71
- "Tech Stack": candidate_stack
72
- })
73
- return matched
74
-
75
- def setup_llm():
76
- """Set up the LangChain LLM with structured output"""
77
- # Create LLM instance
78
- llm = ChatOpenAI(
79
- model="gpt-4o-mini",
80
- temperature=0,
81
- max_tokens=None,
82
- timeout=None,
83
- max_retries=2,
84
- )
85
-
86
- # Create structured output
87
- sum_llm = llm.with_structured_output(Shortlist)
88
-
89
- # Create system prompt
90
- system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
- the profile is according to job.
92
- Try to ensure following points while estimating the candidate's fit score:
93
- For education:
94
- Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
- Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
- Tier3 - Unknown or unranked institutions - Lower points or reject
97
-
98
- Startup Experience Requirement:
99
- Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
100
- preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
101
-
102
- The fit score signifies based on following metrics:
103
- 1–5 - Poor Fit - Auto-reject
104
- 6–7 - Weak Fit - Auto-reject
105
- 8.0–8.7 - Moderate Fit - Auto-reject
106
- 8.8–10 - STRONG Fit - Include in results
107
- """
108
-
109
- # Create query prompt
110
- query_prompt = ChatPromptTemplate.from_messages([
111
- ("system", system),
112
- ("human", """
113
- You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
114
- For this you will be provided with the follwing inputs of job and candidates:
115
- Job Details
116
- Company: {Company}
117
- Role: {Role}
118
- About Company: {desc}
119
- Locations: {Locations}
120
- Tech Stack: {Tech_Stack}
121
- Industry: {Industry}
122
-
123
-
124
- Candidate Details:
125
- Full Name: {Full_Name}
126
- LinkedIn URL: {LinkedIn_URL}
127
- Current Title & Company: {Current_Title_Company}
128
- Years of Experience: {Years_of_Experience}
129
- Degree & University: {Degree_University}
130
- Key Tech Stack: {Key_Tech_Stack}
131
- Key Highlights: {Key_Highlights}
132
- Location (from most recent experience): {cand_Location}
133
- Past_Experience: {Experience}
134
-
135
-
136
- Answer in the structured manner as per the schema.
137
- If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
138
- """),
139
- ])
140
-
141
- # Chain the prompt and LLM
142
- cat_class = query_prompt | sum_llm
143
-
144
- return cat_class
145
-
146
- def call_llm(candidate_data, job_data, llm_chain):
147
- """Call the actual LLM to evaluate the candidate"""
148
- try:
149
- # Convert tech stacks to strings for the LLM payload
150
- job_tech_stack = job_data.get("Tech_Stack", set())
151
- candidate_tech_stack = candidate_data.get("Tech Stack", set())
152
-
153
- if isinstance(job_tech_stack, set):
154
- job_tech_stack = ", ".join(sorted(job_tech_stack))
155
-
156
- if isinstance(candidate_tech_stack, set):
157
- candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
158
-
159
- # Prepare payload for LLM
160
- payload = {
161
- "Company": job_data.get("Company", ""),
162
- "Role": job_data.get("Role", ""),
163
- "desc": job_data.get("desc", ""),
164
- "Locations": job_data.get("Locations", ""),
165
- "Tech_Stack": job_tech_stack,
166
- "Industry": job_data.get("Industry", ""),
167
-
168
- "Full_Name": candidate_data.get("Name", ""),
169
- "LinkedIn_URL": candidate_data.get("URL", ""),
170
- "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
171
- "Years_of_Experience": candidate_data.get("Years of Experience", ""),
172
- "Degree_University": candidate_data.get("Degree & Education", ""),
173
- "Key_Tech_Stack": candidate_tech_stack,
174
- "Key_Highlights": candidate_data.get("Key Highlights", ""),
175
- "cand_Location": candidate_data.get("Location", ""),
176
- "Experience": candidate_data.get("Experience", "")
177
- }
178
-
179
- # Call LLM
180
- response = llm_chain.invoke(payload)
181
- print(candidate_data.get("Experience", ""))
182
-
183
- # Return response in expected format
184
- return {
185
- "candidate_name": response.candidate_name,
186
- "candidate_url": response.candidate_url,
187
- "candidate_summary": response.candidate_summary,
188
- "candidate_location": response.candidate_location,
189
- "fit_score": response.fit_score,
190
- "justification": response.justification
191
- }
192
- except Exception as e:
193
- st.error(f"Error calling LLM: {e}")
194
- # Fallback to a default response
195
- return {
196
- "candidate_name": candidate_data.get("Name", "Unknown"),
197
- "candidate_url": candidate_data.get("URL", ""),
198
- "candidate_summary": "Error processing candidate profile",
199
- "candidate_location": candidate_data.get("Location", "Unknown"),
200
- "fit_score": 0.0,
201
- "justification": f"Error in LLM processing: {str(e)}"
202
- }
203
-
204
- def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
205
- """Process candidates for a specific job using the LLM"""
206
- if llm_chain is None:
207
- with st.spinner("Setting up LLM..."):
208
- llm_chain = setup_llm()
209
-
210
- selected_candidates = []
211
-
212
- try:
213
- # Get job-specific data
214
- job_data = {
215
- "Company": job_row["Company"],
216
- "Role": job_row["Role"],
217
- "desc": job_row.get("One liner", ""),
218
- "Locations": job_row.get("Locations", ""),
219
- "Tech_Stack": job_row["Tech Stack"],
220
- "Industry": job_row.get("Industry", "")
221
- }
222
-
223
- # Find matching candidates for this job
224
- with st.spinner("Finding matching candidates based on tech stack..."):
225
- matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
226
-
227
- if not matching_candidates:
228
- st.warning("No candidates with matching tech stack found for this job.")
229
- return []
230
-
231
- st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
232
-
233
- # Create progress elements
234
- candidates_progress = st.progress(0)
235
- candidate_status = st.empty()
236
-
237
- # Process each candidate
238
- for i, candidate_data in enumerate(matching_candidates):
239
- # Update progress
240
- candidates_progress.progress((i + 1) / len(matching_candidates))
241
- candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
242
-
243
- # Process the candidate with the LLM
244
- response = call_llm(candidate_data, job_data, llm_chain)
245
-
246
- response_dict = {
247
- "Name": response["candidate_name"],
248
- "LinkedIn": response["candidate_url"],
249
- "summary": response["candidate_summary"],
250
- "Location": response["candidate_location"],
251
- "Fit Score": response["fit_score"],
252
- "justification": response["justification"],
253
- # Add back original candidate data for context
254
- "Educational Background": candidate_data.get("Degree & Education", ""),
255
- "Years of Experience": candidate_data.get("Years of Experience", ""),
256
- "Current Title & Company": candidate_data.get("Current Title & Company", "")
257
- }
258
-
259
- # Add to selected candidates if score is high enough
260
- if response["fit_score"] >= 8.8:
261
- selected_candidates.append(response_dict)
262
- st.markdown(response_dict)
263
- else:
264
- st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
265
-
266
- # Clear progress indicators
267
- candidates_progress.empty()
268
- candidate_status.empty()
269
-
270
- # Show results
271
- if selected_candidates:
272
- st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
273
- else:
274
- st.info("No candidates met the minimum fit score threshold for this job.")
275
-
276
- return selected_candidates
277
-
278
- except Exception as e:
279
- st.error(f"Error processing job: {e}")
280
- return []
281
-
282
- def main():
283
- st.title("👨‍💻 Candidate Matching App")
284
-
285
- # Initialize session state
286
- if 'processed_jobs' not in st.session_state:
287
- st.session_state.processed_jobs = {}
288
-
289
- st.write("""
290
- This app matches job listings with candidate profiles based on tech stack and other criteria.
291
- Select a job to find matching candidates.
292
- """)
293
-
294
- # API Key input
295
- with st.sidebar:
296
- st.header("API Configuration")
297
- api_key = st.text_input("Enter OpenAI API Key", type="password")
298
- if api_key:
299
- os.environ["OPENAI_API_KEY"] = api_key
300
- st.success("API Key set!")
301
- else:
302
- st.warning("Please enter OpenAI API Key to use LLM features")
303
-
304
- # Show API key warning if not set
305
- SERVICE_ACCOUNT_FILE = 'synapse-recruitment-e94255ca76fd.json'
306
- SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
307
- creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
308
- gc = gspread.authorize(creds)
309
- job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
310
- candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
311
-
312
- if not api_key:
313
- st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
314
-
315
- if api_key:
316
- try:
317
- # Load data from Google Sheets
318
- job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
319
- job_data = job_worksheet.get_all_values()
320
- candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
321
- candidate_data = candidate_worksheet.get_all_values()
322
-
323
- # Convert to DataFrames
324
- jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
325
- candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
326
- candidates_df = candidates_df.fillna("Unknown")
327
-
328
- # Display data preview
329
- with st.expander("Preview uploaded data"):
330
- st.subheader("Jobs Data Preview")
331
- st.dataframe(jobs_df.head(3))
332
-
333
- st.subheader("Candidates Data Preview")
334
- st.dataframe(candidates_df.head(3))
335
-
336
- # Map column names if needed
337
- column_mapping = {
338
- "Full Name": "Full Name",
339
- "LinkedIn URL": "LinkedIn URL",
340
- "Current Title & Company": "Current Title & Company",
341
- "Years of Experience": "Years of Experience",
342
- "Degree & University": "Degree & University",
343
- "Key Tech Stack": "Key Tech Stack",
344
- "Key Highlights": "Key Highlights",
345
- "Location (from most recent experience)": "Location (from most recent experience)"
346
- }
347
-
348
- # Rename columns if they don't match expected
349
- candidates_df = candidates_df.rename(columns={
350
- col: mapping for col, mapping in column_mapping.items()
351
- if col in candidates_df.columns and col != mapping
352
- })
353
-
354
- # Now, instead of processing all jobs upfront, we'll display job selection
355
- # and only process the selected job when the user chooses it
356
- display_job_selection(jobs_df, candidates_df)
357
-
358
- except Exception as e:
359
- st.error(f"Error processing files: {e}")
360
-
361
- st.divider()
362
-
363
-
364
- def display_job_selection(jobs_df, candidates_df):
365
- # Store the LLM chain as a session state to avoid recreating it
366
- if 'llm_chain' not in st.session_state:
367
- st.session_state.llm_chain = None
368
-
369
- st.subheader("Select a job to view potential matches")
370
-
371
- # Create job options - but don't compute matches yet
372
- job_options = []
373
- for i, row in jobs_df.iterrows():
374
- job_options.append(f"{row['Role']} at {row['Company']}")
375
-
376
- if job_options:
377
- selected_job_index = st.selectbox("Jobs:",
378
- range(len(job_options)),
379
- format_func=lambda x: job_options[x])
380
-
381
- # Display job details
382
- job_row = jobs_df.iloc[selected_job_index]
383
-
384
- # Parse tech stack for display
385
- job_row_stack = parse_tech_stack(job_row["Tech Stack"])
386
-
387
- col1, col2 = st.columns([2, 1])
388
-
389
- with col1:
390
- st.subheader(f"Job Details: {job_row['Role']}")
391
-
392
- job_details = {
393
- "Company": job_row["Company"],
394
- "Role": job_row["Role"],
395
- "Description": job_row.get("One liner", "N/A"),
396
- "Locations": job_row.get("Locations", "N/A"),
397
- "Industry": job_row.get("Industry", "N/A"),
398
- "Tech Stack": display_tech_stack(job_row_stack)
399
- }
400
-
401
- for key, value in job_details.items():
402
- st.markdown(f"**{key}:** {value}")
403
-
404
- # Create a key for this job in session state
405
- job_key = f"job_{selected_job_index}_processed"
406
-
407
- if job_key not in st.session_state:
408
- st.session_state[job_key] = False
409
-
410
- # Add a process button for this job
411
- if not st.session_state[job_key]:
412
- if st.button(f"Find Matching Candidates for this Job"):
413
- if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
414
- st.error("Please enter your OpenAI API key in the sidebar before processing")
415
- else:
416
- # Process candidates for this job (only when requested)
417
- selected_candidates = process_candidates_for_job(
418
- job_row,
419
- candidates_df,
420
- st.session_state.llm_chain
421
- )
422
-
423
- # Store the results and set as processed
424
- if 'Selected_Candidates' not in st.session_state:
425
- st.session_state.Selected_Candidates = {}
426
- st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
427
- st.session_state[job_key] = True
428
-
429
- # Store the LLM chain for reuse
430
- if st.session_state.llm_chain is None:
431
- st.session_state.llm_chain = setup_llm()
432
-
433
- # Force refresh
434
- st.rerun()
435
-
436
- # Display selected candidates if already processed
437
- if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
438
- selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
439
-
440
- # Display selected candidates
441
- st.subheader("Selected Candidates")
442
-
443
- if len(selected_candidates) > 0:
444
- for i, candidate in enumerate(selected_candidates):
445
- with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
446
- col1, col2 = st.columns([3, 1])
447
-
448
- with col1:
449
- st.markdown(f"**Summary:** {candidate['summary']}")
450
- st.markdown(f"**Current:** {candidate['Current Title & Company']}")
451
- st.markdown(f"**Education:** {candidate['Educational Background']}")
452
- st.markdown(f"**Experience:** {candidate['Years of Experience']}")
453
- st.markdown(f"**Location:** {candidate['Location']}")
454
- st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
455
-
456
- with col2:
457
- st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
458
-
459
- st.markdown("**Justification:**")
460
- st.info(candidate['justification'])
461
- else:
462
- st.info("No candidates met the minimum score threshold (8.8) for this job.")
463
-
464
- # We don't show tech-matched candidates here since they are generated
465
- # during the LLM matching process now
466
-
467
- # Add a reset button to start over
468
- if st.button("Reset and Process Again"):
469
- st.session_state[job_key] = False
470
- st.rerun()
471
-
472
- if __name__ == "__main__":
 
 
473
  main()
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import os
5
+ from pydantic import BaseModel, Field
6
+ from typing import List, Set, Dict, Any, Optional
7
+ import time
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.messages import HumanMessage
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.prompts import PromptTemplate
13
+ import gspread
14
+ from google.oauth2 import service_account
15
+
16
+ st.set_page_config(
17
+ page_title="Candidate Matching App",
18
+ page_icon="👨‍💻🎯",
19
+ layout="wide"
20
+ )
21
+
22
+ # Define pydantic model for structured output
23
+ class Shortlist(BaseModel):
24
+ fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
+ candidate_name: str = Field(description="The name of the candidate.")
26
+ candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
+ candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
+ candidate_location: str = Field(description="The location of the candidate.")
29
+ justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
+
31
+ # Function to parse and normalize tech stacks
32
+ def parse_tech_stack(stack):
33
+ if pd.isna(stack) or stack == "" or stack is None:
34
+ return set()
35
+ if isinstance(stack, set):
36
+ return stack
37
+ try:
38
+ # Handle potential string representation of sets
39
+ if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
+ # This could be a string representation of a set
41
+ items = stack.strip("{}").split(",")
42
+ return set(item.strip().strip("'\"") for item in items if item.strip())
43
+ return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
+ except Exception as e:
45
+ st.error(f"Error parsing tech stack: {e}")
46
+ return set()
47
+
48
+ def display_tech_stack(stack_set):
49
+ if isinstance(stack_set, set):
50
+ return ", ".join(sorted(stack_set))
51
+ return str(stack_set)
52
+
53
+ def get_matching_candidates(job_stack, candidates_df):
54
+ """Find candidates with matching tech stack for a specific job"""
55
+ matched = []
56
+ job_stack_set = parse_tech_stack(job_stack)
57
+
58
+ for _, candidate in candidates_df.iterrows():
59
+ candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
+ common = job_stack_set & candidate_stack
61
+ if len(common) >= 2:
62
+ matched.append({
63
+ "Name": candidate["Full Name"],
64
+ "URL": candidate["LinkedIn URL"],
65
+ "Degree & Education": candidate["Degree & University"],
66
+ "Years of Experience": candidate["Years of Experience"],
67
+ "Current Title & Company": candidate['Current Title & Company'],
68
+ "Key Highlights": candidate["Key Highlights"],
69
+ "Location": candidate["Location (from most recent experience)"],
70
+ "Experience": str(candidate["Experience"]),
71
+ "Tech Stack": candidate_stack
72
+ })
73
+ return matched
74
+
75
+ def setup_llm():
76
+ """Set up the LangChain LLM with structured output"""
77
+ # Create LLM instance
78
+ llm = ChatOpenAI(
79
+ model="gpt-4o-mini",
80
+ temperature=0,
81
+ max_tokens=None,
82
+ timeout=None,
83
+ max_retries=2,
84
+ )
85
+
86
+ # Create structured output
87
+ sum_llm = llm.with_structured_output(Shortlist)
88
+
89
+ # Create system prompt
90
+ system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
+ the profile is according to job.
92
+ Try to ensure following points while estimating the candidate's fit score:
93
+ For education:
94
+ Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
+ Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
+ Tier3 - Unknown or unranked institutions - Lower points or reject
97
+
98
+ Startup Experience Requirement:
99
+ Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
100
+ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
101
+
102
+ The fit score signifies based on following metrics:
103
+ 1–5 - Poor Fit - Auto-reject
104
+ 6–7 - Weak Fit - Auto-reject
105
+ 8.0–8.7 - Moderate Fit - Auto-reject
106
+ 8.8–10 - STRONG Fit - Include in results
107
+ """
108
+
109
+ # Create query prompt
110
+ query_prompt = ChatPromptTemplate.from_messages([
111
+ ("system", system),
112
+ ("human", """
113
+ You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
114
+ For this you will be provided with the follwing inputs of job and candidates:
115
+ Job Details
116
+ Company: {Company}
117
+ Role: {Role}
118
+ About Company: {desc}
119
+ Locations: {Locations}
120
+ Tech Stack: {Tech_Stack}
121
+ Industry: {Industry}
122
+
123
+
124
+ Candidate Details:
125
+ Full Name: {Full_Name}
126
+ LinkedIn URL: {LinkedIn_URL}
127
+ Current Title & Company: {Current_Title_Company}
128
+ Years of Experience: {Years_of_Experience}
129
+ Degree & University: {Degree_University}
130
+ Key Tech Stack: {Key_Tech_Stack}
131
+ Key Highlights: {Key_Highlights}
132
+ Location (from most recent experience): {cand_Location}
133
+ Past_Experience: {Experience}
134
+
135
+
136
+ Answer in the structured manner as per the schema.
137
+ If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
138
+ """),
139
+ ])
140
+
141
+ # Chain the prompt and LLM
142
+ cat_class = query_prompt | sum_llm
143
+
144
+ return cat_class
145
+
146
+ def call_llm(candidate_data, job_data, llm_chain):
147
+ """Call the actual LLM to evaluate the candidate"""
148
+ try:
149
+ # Convert tech stacks to strings for the LLM payload
150
+ job_tech_stack = job_data.get("Tech_Stack", set())
151
+ candidate_tech_stack = candidate_data.get("Tech Stack", set())
152
+
153
+ if isinstance(job_tech_stack, set):
154
+ job_tech_stack = ", ".join(sorted(job_tech_stack))
155
+
156
+ if isinstance(candidate_tech_stack, set):
157
+ candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
158
+
159
+ # Prepare payload for LLM
160
+ payload = {
161
+ "Company": job_data.get("Company", ""),
162
+ "Role": job_data.get("Role", ""),
163
+ "desc": job_data.get("desc", ""),
164
+ "Locations": job_data.get("Locations", ""),
165
+ "Tech_Stack": job_tech_stack,
166
+ "Industry": job_data.get("Industry", ""),
167
+
168
+ "Full_Name": candidate_data.get("Name", ""),
169
+ "LinkedIn_URL": candidate_data.get("URL", ""),
170
+ "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
171
+ "Years_of_Experience": candidate_data.get("Years of Experience", ""),
172
+ "Degree_University": candidate_data.get("Degree & Education", ""),
173
+ "Key_Tech_Stack": candidate_tech_stack,
174
+ "Key_Highlights": candidate_data.get("Key Highlights", ""),
175
+ "cand_Location": candidate_data.get("Location", ""),
176
+ "Experience": candidate_data.get("Experience", "")
177
+ }
178
+
179
+ # Call LLM
180
+ response = llm_chain.invoke(payload)
181
+ print(candidate_data.get("Experience", ""))
182
+
183
+ # Return response in expected format
184
+ return {
185
+ "candidate_name": response.candidate_name,
186
+ "candidate_url": response.candidate_url,
187
+ "candidate_summary": response.candidate_summary,
188
+ "candidate_location": response.candidate_location,
189
+ "fit_score": response.fit_score,
190
+ "justification": response.justification
191
+ }
192
+ except Exception as e:
193
+ st.error(f"Error calling LLM: {e}")
194
+ # Fallback to a default response
195
+ return {
196
+ "candidate_name": candidate_data.get("Name", "Unknown"),
197
+ "candidate_url": candidate_data.get("URL", ""),
198
+ "candidate_summary": "Error processing candidate profile",
199
+ "candidate_location": candidate_data.get("Location", "Unknown"),
200
+ "fit_score": 0.0,
201
+ "justification": f"Error in LLM processing: {str(e)}"
202
+ }
203
+
204
+ def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
205
+ """Process candidates for a specific job using the LLM"""
206
+ if llm_chain is None:
207
+ with st.spinner("Setting up LLM..."):
208
+ llm_chain = setup_llm()
209
+
210
+ selected_candidates = []
211
+
212
+ try:
213
+ # Get job-specific data
214
+ job_data = {
215
+ "Company": job_row["Company"],
216
+ "Role": job_row["Role"],
217
+ "desc": job_row.get("One liner", ""),
218
+ "Locations": job_row.get("Locations", ""),
219
+ "Tech_Stack": job_row["Tech Stack"],
220
+ "Industry": job_row.get("Industry", "")
221
+ }
222
+
223
+ # Find matching candidates for this job
224
+ with st.spinner("Finding matching candidates based on tech stack..."):
225
+ matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
226
+
227
+ if not matching_candidates:
228
+ st.warning("No candidates with matching tech stack found for this job.")
229
+ return []
230
+
231
+ st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
232
+
233
+ # Create progress elements
234
+ candidates_progress = st.progress(0)
235
+ candidate_status = st.empty()
236
+
237
+ # Process each candidate
238
+ for i, candidate_data in enumerate(matching_candidates):
239
+ # Update progress
240
+ candidates_progress.progress((i + 1) / len(matching_candidates))
241
+ candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
242
+
243
+ # Process the candidate with the LLM
244
+ response = call_llm(candidate_data, job_data, llm_chain)
245
+
246
+ response_dict = {
247
+ "Name": response["candidate_name"],
248
+ "LinkedIn": response["candidate_url"],
249
+ "summary": response["candidate_summary"],
250
+ "Location": response["candidate_location"],
251
+ "Fit Score": response["fit_score"],
252
+ "justification": response["justification"],
253
+ # Add back original candidate data for context
254
+ "Educational Background": candidate_data.get("Degree & Education", ""),
255
+ "Years of Experience": candidate_data.get("Years of Experience", ""),
256
+ "Current Title & Company": candidate_data.get("Current Title & Company", "")
257
+ }
258
+
259
+ # Add to selected candidates if score is high enough
260
+ if response["fit_score"] >= 8.8:
261
+ selected_candidates.append(response_dict)
262
+ st.markdown(response_dict)
263
+ else:
264
+ st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
265
+
266
+ # Clear progress indicators
267
+ candidates_progress.empty()
268
+ candidate_status.empty()
269
+
270
+ # Show results
271
+ if selected_candidates:
272
+ st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
273
+ else:
274
+ st.info("No candidates met the minimum fit score threshold for this job.")
275
+
276
+ return selected_candidates
277
+
278
+ except Exception as e:
279
+ st.error(f"Error processing job: {e}")
280
+ return []
281
+
282
+ def main():
283
+ st.title("👨‍💻 Candidate Matching App")
284
+
285
+ # Initialize session state
286
+ if 'processed_jobs' not in st.session_state:
287
+ st.session_state.processed_jobs = {}
288
+
289
+ st.write("""
290
+ This app matches job listings with candidate profiles based on tech stack and other criteria.
291
+ Select a job to find matching candidates.
292
+ """)
293
+
294
+ # API Key input
295
+ with st.sidebar:
296
+ st.header("API Configuration")
297
+ api_key = st.text_input("Enter OpenAI API Key", type="password")
298
+ if api_key:
299
+ os.environ["OPENAI_API_KEY"] = api_key
300
+ st.success("API Key set!")
301
+ else:
302
+ st.warning("Please enter OpenAI API Key to use LLM features")
303
+
304
+ # Show API key warning if not set
305
+ secret_content = os.getenv("GCP_SERVICE_ACCOUNT")
306
+ secret_content = secret_content.replace("\n", "\\n")
307
+ secret_content = json.loads(secret_content)
308
+ SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
309
+ creds = service_account.Credentials.from_service_account_info(secret_content, scopes=SCOPES)
310
+ gc = gspread.authorize(creds)
311
+ job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
312
+ candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
313
+
314
+ if not api_key:
315
+ st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
316
+
317
+ if api_key:
318
+ try:
319
+ # Load data from Google Sheets
320
+ job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
321
+ job_data = job_worksheet.get_all_values()
322
+ candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
323
+ candidate_data = candidate_worksheet.get_all_values()
324
+
325
+ # Convert to DataFrames
326
+ jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
327
+ candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
328
+ candidates_df = candidates_df.fillna("Unknown")
329
+
330
+ # Display data preview
331
+ with st.expander("Preview uploaded data"):
332
+ st.subheader("Jobs Data Preview")
333
+ st.dataframe(jobs_df.head(3))
334
+
335
+ st.subheader("Candidates Data Preview")
336
+ st.dataframe(candidates_df.head(3))
337
+
338
+ # Map column names if needed
339
+ column_mapping = {
340
+ "Full Name": "Full Name",
341
+ "LinkedIn URL": "LinkedIn URL",
342
+ "Current Title & Company": "Current Title & Company",
343
+ "Years of Experience": "Years of Experience",
344
+ "Degree & University": "Degree & University",
345
+ "Key Tech Stack": "Key Tech Stack",
346
+ "Key Highlights": "Key Highlights",
347
+ "Location (from most recent experience)": "Location (from most recent experience)"
348
+ }
349
+
350
+ # Rename columns if they don't match expected
351
+ candidates_df = candidates_df.rename(columns={
352
+ col: mapping for col, mapping in column_mapping.items()
353
+ if col in candidates_df.columns and col != mapping
354
+ })
355
+
356
+ # Now, instead of processing all jobs upfront, we'll display job selection
357
+ # and only process the selected job when the user chooses it
358
+ display_job_selection(jobs_df, candidates_df)
359
+
360
+ except Exception as e:
361
+ st.error(f"Error processing files: {e}")
362
+
363
+ st.divider()
364
+
365
+
366
+ def display_job_selection(jobs_df, candidates_df):
367
+ # Store the LLM chain as a session state to avoid recreating it
368
+ if 'llm_chain' not in st.session_state:
369
+ st.session_state.llm_chain = None
370
+
371
+ st.subheader("Select a job to view potential matches")
372
+
373
+ # Create job options - but don't compute matches yet
374
+ job_options = []
375
+ for i, row in jobs_df.iterrows():
376
+ job_options.append(f"{row['Role']} at {row['Company']}")
377
+
378
+ if job_options:
379
+ selected_job_index = st.selectbox("Jobs:",
380
+ range(len(job_options)),
381
+ format_func=lambda x: job_options[x])
382
+
383
+ # Display job details
384
+ job_row = jobs_df.iloc[selected_job_index]
385
+
386
+ # Parse tech stack for display
387
+ job_row_stack = parse_tech_stack(job_row["Tech Stack"])
388
+
389
+ col1, col2 = st.columns([2, 1])
390
+
391
+ with col1:
392
+ st.subheader(f"Job Details: {job_row['Role']}")
393
+
394
+ job_details = {
395
+ "Company": job_row["Company"],
396
+ "Role": job_row["Role"],
397
+ "Description": job_row.get("One liner", "N/A"),
398
+ "Locations": job_row.get("Locations", "N/A"),
399
+ "Industry": job_row.get("Industry", "N/A"),
400
+ "Tech Stack": display_tech_stack(job_row_stack)
401
+ }
402
+
403
+ for key, value in job_details.items():
404
+ st.markdown(f"**{key}:** {value}")
405
+
406
+ # Create a key for this job in session state
407
+ job_key = f"job_{selected_job_index}_processed"
408
+
409
+ if job_key not in st.session_state:
410
+ st.session_state[job_key] = False
411
+
412
+ # Add a process button for this job
413
+ if not st.session_state[job_key]:
414
+ if st.button(f"Find Matching Candidates for this Job"):
415
+ if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
416
+ st.error("Please enter your OpenAI API key in the sidebar before processing")
417
+ else:
418
+ # Process candidates for this job (only when requested)
419
+ selected_candidates = process_candidates_for_job(
420
+ job_row,
421
+ candidates_df,
422
+ st.session_state.llm_chain
423
+ )
424
+
425
+ # Store the results and set as processed
426
+ if 'Selected_Candidates' not in st.session_state:
427
+ st.session_state.Selected_Candidates = {}
428
+ st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
429
+ st.session_state[job_key] = True
430
+
431
+ # Store the LLM chain for reuse
432
+ if st.session_state.llm_chain is None:
433
+ st.session_state.llm_chain = setup_llm()
434
+
435
+ # Force refresh
436
+ st.rerun()
437
+
438
+ # Display selected candidates if already processed
439
+ if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
440
+ selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
441
+
442
+ # Display selected candidates
443
+ st.subheader("Selected Candidates")
444
+
445
+ if len(selected_candidates) > 0:
446
+ for i, candidate in enumerate(selected_candidates):
447
+ with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
448
+ col1, col2 = st.columns([3, 1])
449
+
450
+ with col1:
451
+ st.markdown(f"**Summary:** {candidate['summary']}")
452
+ st.markdown(f"**Current:** {candidate['Current Title & Company']}")
453
+ st.markdown(f"**Education:** {candidate['Educational Background']}")
454
+ st.markdown(f"**Experience:** {candidate['Years of Experience']}")
455
+ st.markdown(f"**Location:** {candidate['Location']}")
456
+ st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
457
+
458
+ with col2:
459
+ st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
460
+
461
+ st.markdown("**Justification:**")
462
+ st.info(candidate['justification'])
463
+ else:
464
+ st.info("No candidates met the minimum score threshold (8.8) for this job.")
465
+
466
+ # We don't show tech-matched candidates here since they are generated
467
+ # during the LLM matching process now
468
+
469
+ # Add a reset button to start over
470
+ if st.button("Reset and Process Again"):
471
+ st.session_state[job_key] = False
472
+ st.rerun()
473
+
474
+ if __name__ == "__main__":
475
  main()