ak0601 commited on
Commit
00749c1
·
verified ·
1 Parent(s): 88b2e50

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +468 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,470 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import os
5
+ from pydantic import BaseModel, Field
6
+ from typing import List, Set, Dict, Any, Optional
7
+ import time
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.messages import HumanMessage
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.prompts import PromptTemplate
13
+ import gspread
14
+ from google.oauth2 import service_account
15
+
16
+ st.set_page_config(
17
+ page_title="Candidate Matching App",
18
+ page_icon="👨‍💻🎯",
19
+ layout="wide"
20
+ )
21
+
22
+ # Define pydantic model for structured output
23
+ class Shortlist(BaseModel):
24
+ fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
+ candidate_name: str = Field(description="The name of the candidate.")
26
+ candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
+ candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
+ candidate_location: str = Field(description="The location of the candidate.")
29
+ justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
+
31
+ # Function to parse and normalize tech stacks
32
+ def parse_tech_stack(stack):
33
+ if pd.isna(stack) or stack == "" or stack is None:
34
+ return set()
35
+ if isinstance(stack, set):
36
+ return stack
37
+ try:
38
+ # Handle potential string representation of sets
39
+ if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
+ # This could be a string representation of a set
41
+ items = stack.strip("{}").split(",")
42
+ return set(item.strip().strip("'\"") for item in items if item.strip())
43
+ return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
+ except Exception as e:
45
+ st.error(f"Error parsing tech stack: {e}")
46
+ return set()
47
+
48
+ def display_tech_stack(stack_set):
49
+ if isinstance(stack_set, set):
50
+ return ", ".join(sorted(stack_set))
51
+ return str(stack_set)
52
+
53
+ def get_matching_candidates(job_stack, candidates_df):
54
+ """Find candidates with matching tech stack for a specific job"""
55
+ matched = []
56
+ job_stack_set = parse_tech_stack(job_stack)
57
+
58
+ for _, candidate in candidates_df.iterrows():
59
+ candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
+ common = job_stack_set & candidate_stack
61
+ if len(common) >= 2:
62
+ matched.append({
63
+ "Name": candidate["Full Name"],
64
+ "URL": candidate["LinkedIn URL"],
65
+ "Degree & Education": candidate["Degree & University"],
66
+ "Years of Experience": candidate["Years of Experience"],
67
+ "Current Title & Company": candidate['Current Title & Company'],
68
+ "Key Highlights": candidate["Key Highlights"],
69
+ "Location": candidate["Location (from most recent experience)"],
70
+ "Experience": str(candidate["Experience"]),
71
+ "Tech Stack": candidate_stack
72
+ })
73
+ return matched
74
+
75
+ def setup_llm():
76
+ """Set up the LangChain LLM with structured output"""
77
+ # Create LLM instance
78
+ llm = ChatOpenAI(
79
+ model="gpt-4o-mini",
80
+ temperature=0,
81
+ max_tokens=None,
82
+ timeout=None,
83
+ max_retries=2,
84
+ )
85
+
86
+ # Create structured output
87
+ sum_llm = llm.with_structured_output(Shortlist)
88
+
89
+ # Create system prompt
90
+ system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
+ the profile is according to job.
92
+ Try to ensure following points while estimating the candidate's fit score:
93
+ For education:
94
+ Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
+ Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
+ Tier3 - Unknown or unranked institutions - Lower points or reject
97
+ Startup Experience Requirement:
98
+ Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
99
+ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
100
+ The fit score signifies based on following metrics:
101
+ 1–5 - Poor Fit - Auto-reject
102
+ 6–7 - Weak Fit - Auto-reject
103
+ 8.0–8.7 - Moderate Fit - Auto-reject
104
+ 8.8–10 - STRONG Fit - Include in results
105
+ """
106
+
107
+ # Create query prompt
108
+ query_prompt = ChatPromptTemplate.from_messages([
109
+ ("system", system),
110
+ ("human", """
111
+ You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
112
+ For this you will be provided with the follwing inputs of job and candidates:
113
+ Job Details
114
+ Company: {Company}
115
+ Role: {Role}
116
+ About Company: {desc}
117
+ Locations: {Locations}
118
+ Tech Stack: {Tech_Stack}
119
+ Industry: {Industry}
120
+
121
+ Candidate Details:
122
+ Full Name: {Full_Name}
123
+ LinkedIn URL: {LinkedIn_URL}
124
+ Current Title & Company: {Current_Title_Company}
125
+ Years of Experience: {Years_of_Experience}
126
+ Degree & University: {Degree_University}
127
+ Key Tech Stack: {Key_Tech_Stack}
128
+ Key Highlights: {Key_Highlights}
129
+ Location (from most recent experience): {cand_Location}
130
+ Past_Experience: {Experience}
131
+ Answer in the structured manner as per the schema.
132
+ If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
133
+ """),
134
+ ])
135
+
136
+ # Chain the prompt and LLM
137
+ cat_class = query_prompt | sum_llm
138
+
139
+ return cat_class
140
+
141
+ def call_llm(candidate_data, job_data, llm_chain):
142
+ """Call the actual LLM to evaluate the candidate"""
143
+ try:
144
+ # Convert tech stacks to strings for the LLM payload
145
+ job_tech_stack = job_data.get("Tech_Stack", set())
146
+ candidate_tech_stack = candidate_data.get("Tech Stack", set())
147
+
148
+ if isinstance(job_tech_stack, set):
149
+ job_tech_stack = ", ".join(sorted(job_tech_stack))
150
+
151
+ if isinstance(candidate_tech_stack, set):
152
+ candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
153
+
154
+ # Prepare payload for LLM
155
+ payload = {
156
+ "Company": job_data.get("Company", ""),
157
+ "Role": job_data.get("Role", ""),
158
+ "desc": job_data.get("desc", ""),
159
+ "Locations": job_data.get("Locations", ""),
160
+ "Tech_Stack": job_tech_stack,
161
+ "Industry": job_data.get("Industry", ""),
162
+
163
+ "Full_Name": candidate_data.get("Name", ""),
164
+ "LinkedIn_URL": candidate_data.get("URL", ""),
165
+ "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
166
+ "Years_of_Experience": candidate_data.get("Years of Experience", ""),
167
+ "Degree_University": candidate_data.get("Degree & Education", ""),
168
+ "Key_Tech_Stack": candidate_tech_stack,
169
+ "Key_Highlights": candidate_data.get("Key Highlights", ""),
170
+ "cand_Location": candidate_data.get("Location", ""),
171
+ "Experience": candidate_data.get("Experience", "")
172
+ }
173
+
174
+ # Call LLM
175
+ response = llm_chain.invoke(payload)
176
+ print(candidate_data.get("Experience", ""))
177
+
178
+ # Return response in expected format
179
+ return {
180
+ "candidate_name": response.candidate_name,
181
+ "candidate_url": response.candidate_url,
182
+ "candidate_summary": response.candidate_summary,
183
+ "candidate_location": response.candidate_location,
184
+ "fit_score": response.fit_score,
185
+ "justification": response.justification
186
+ }
187
+ except Exception as e:
188
+ st.error(f"Error calling LLM: {e}")
189
+ # Fallback to a default response
190
+ return {
191
+ "candidate_name": candidate_data.get("Name", "Unknown"),
192
+ "candidate_url": candidate_data.get("URL", ""),
193
+ "candidate_summary": "Error processing candidate profile",
194
+ "candidate_location": candidate_data.get("Location", "Unknown"),
195
+ "fit_score": 0.0,
196
+ "justification": f"Error in LLM processing: {str(e)}"
197
+ }
198
+
199
+ def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
200
+ """Process candidates for a specific job using the LLM"""
201
+ if llm_chain is None:
202
+ with st.spinner("Setting up LLM..."):
203
+ llm_chain = setup_llm()
204
+
205
+ selected_candidates = []
206
+
207
+ try:
208
+ # Get job-specific data
209
+ job_data = {
210
+ "Company": job_row["Company"],
211
+ "Role": job_row["Role"],
212
+ "desc": job_row.get("One liner", ""),
213
+ "Locations": job_row.get("Locations", ""),
214
+ "Tech_Stack": job_row["Tech Stack"],
215
+ "Industry": job_row.get("Industry", "")
216
+ }
217
+
218
+ # Find matching candidates for this job
219
+ with st.spinner("Finding matching candidates based on tech stack..."):
220
+ matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
221
+
222
+ if not matching_candidates:
223
+ st.warning("No candidates with matching tech stack found for this job.")
224
+ return []
225
+
226
+ st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
227
+
228
+ # Create progress elements
229
+ candidates_progress = st.progress(0)
230
+ candidate_status = st.empty()
231
+
232
+ # Process each candidate
233
+ for i, candidate_data in enumerate(matching_candidates):
234
+ # Update progress
235
+ candidates_progress.progress((i + 1) / len(matching_candidates))
236
+ candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
237
+
238
+ # Process the candidate with the LLM
239
+ response = call_llm(candidate_data, job_data, llm_chain)
240
+
241
+ response_dict = {
242
+ "Name": response["candidate_name"],
243
+ "LinkedIn": response["candidate_url"],
244
+ "summary": response["candidate_summary"],
245
+ "Location": response["candidate_location"],
246
+ "Fit Score": response["fit_score"],
247
+ "justification": response["justification"],
248
+ # Add back original candidate data for context
249
+ "Educational Background": candidate_data.get("Degree & Education", ""),
250
+ "Years of Experience": candidate_data.get("Years of Experience", ""),
251
+ "Current Title & Company": candidate_data.get("Current Title & Company", "")
252
+ }
253
+
254
+ # Add to selected candidates if score is high enough
255
+ if response["fit_score"] >= 8.8:
256
+ selected_candidates.append(response_dict)
257
+ st.markdown(response_dict)
258
+ else:
259
+ st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
260
+
261
+ # Clear progress indicators
262
+ candidates_progress.empty()
263
+ candidate_status.empty()
264
+
265
+ # Show results
266
+ if selected_candidates:
267
+ st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
268
+ else:
269
+ st.info("No candidates met the minimum fit score threshold for this job.")
270
+
271
+ return selected_candidates
272
+
273
+ except Exception as e:
274
+ st.error(f"Error processing job: {e}")
275
+ return []
276
+
277
+ def main():
278
+ st.title("👨‍💻 Candidate Matching App")
279
+
280
+ # Initialize session state
281
+ if 'processed_jobs' not in st.session_state:
282
+ st.session_state.processed_jobs = {}
283
+
284
+ st.write("""
285
+ This app matches job listings with candidate profiles based on tech stack and other criteria.
286
+ Select a job to find matching candidates.
287
+ """)
288
+
289
+ # API Key input
290
+ with st.sidebar:
291
+ st.header("API Configuration")
292
+ api_key = st.text_input("Enter OpenAI API Key", type="password")
293
+ if api_key:
294
+ os.environ["OPENAI_API_KEY"] = api_key
295
+ st.success("API Key set!")
296
+ else:
297
+ st.warning("Please enter OpenAI API Key to use LLM features")
298
+
299
+ # Show API key warning if not set
300
+ secret_content = os.getenv("GCP_SERVICE_ACCOUNT")
301
+ secret_content = secret_content.replace("\n", "\\n")
302
+ secret_content = json.loads(secret_content)
303
+ SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
304
+ creds = service_account.Credentials.from_service_account_info(secret_content, scopes=SCOPES)
305
+ gc = gspread.authorize(creds)
306
+ job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
307
+ candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
308
+
309
+ if not api_key:
310
+ st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
311
+
312
+ if api_key:
313
+ try:
314
+ # Load data from Google Sheets
315
+ job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
316
+ job_data = job_worksheet.get_all_values()
317
+ candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
318
+ candidate_data = candidate_worksheet.get_all_values()
319
+
320
+ # Convert to DataFrames
321
+ jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
322
+ candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
323
+ candidates_df = candidates_df.fillna("Unknown")
324
+
325
+ # Display data preview
326
+ with st.expander("Preview uploaded data"):
327
+ st.subheader("Jobs Data Preview")
328
+ st.dataframe(jobs_df.head(3))
329
+
330
+ st.subheader("Candidates Data Preview")
331
+ st.dataframe(candidates_df.head(3))
332
+
333
+ # Map column names if needed
334
+ column_mapping = {
335
+ "Full Name": "Full Name",
336
+ "LinkedIn URL": "LinkedIn URL",
337
+ "Current Title & Company": "Current Title & Company",
338
+ "Years of Experience": "Years of Experience",
339
+ "Degree & University": "Degree & University",
340
+ "Key Tech Stack": "Key Tech Stack",
341
+ "Key Highlights": "Key Highlights",
342
+ "Location (from most recent experience)": "Location (from most recent experience)"
343
+ }
344
+
345
+ # Rename columns if they don't match expected
346
+ candidates_df = candidates_df.rename(columns={
347
+ col: mapping for col, mapping in column_mapping.items()
348
+ if col in candidates_df.columns and col != mapping
349
+ })
350
+
351
+ # Now, instead of processing all jobs upfront, we'll display job selection
352
+ # and only process the selected job when the user chooses it
353
+ display_job_selection(jobs_df, candidates_df)
354
+
355
+ except Exception as e:
356
+ st.error(f"Error processing files: {e}")
357
+
358
+ st.divider()
359
+
360
+
361
+ def display_job_selection(jobs_df, candidates_df):
362
+ # Store the LLM chain as a session state to avoid recreating it
363
+ if 'llm_chain' not in st.session_state:
364
+ st.session_state.llm_chain = None
365
+
366
+ st.subheader("Select a job to view potential matches")
367
+
368
+ # Create job options - but don't compute matches yet
369
+ job_options = []
370
+ for i, row in jobs_df.iterrows():
371
+ job_options.append(f"{row['Role']} at {row['Company']}")
372
+
373
+ if job_options:
374
+ selected_job_index = st.selectbox("Jobs:",
375
+ range(len(job_options)),
376
+ format_func=lambda x: job_options[x])
377
+
378
+ # Display job details
379
+ job_row = jobs_df.iloc[selected_job_index]
380
+
381
+ # Parse tech stack for display
382
+ job_row_stack = parse_tech_stack(job_row["Tech Stack"])
383
+
384
+ col1, col2 = st.columns([2, 1])
385
+
386
+ with col1:
387
+ st.subheader(f"Job Details: {job_row['Role']}")
388
+
389
+ job_details = {
390
+ "Company": job_row["Company"],
391
+ "Role": job_row["Role"],
392
+ "Description": job_row.get("One liner", "N/A"),
393
+ "Locations": job_row.get("Locations", "N/A"),
394
+ "Industry": job_row.get("Industry", "N/A"),
395
+ "Tech Stack": display_tech_stack(job_row_stack)
396
+ }
397
+
398
+ for key, value in job_details.items():
399
+ st.markdown(f"**{key}:** {value}")
400
+
401
+ # Create a key for this job in session state
402
+ job_key = f"job_{selected_job_index}_processed"
403
+
404
+ if job_key not in st.session_state:
405
+ st.session_state[job_key] = False
406
+
407
+ # Add a process button for this job
408
+ if not st.session_state[job_key]:
409
+ if st.button(f"Find Matching Candidates for this Job"):
410
+ if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
411
+ st.error("Please enter your OpenAI API key in the sidebar before processing")
412
+ else:
413
+ # Process candidates for this job (only when requested)
414
+ selected_candidates = process_candidates_for_job(
415
+ job_row,
416
+ candidates_df,
417
+ st.session_state.llm_chain
418
+ )
419
+
420
+ # Store the results and set as processed
421
+ if 'Selected_Candidates' not in st.session_state:
422
+ st.session_state.Selected_Candidates = {}
423
+ st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
424
+ st.session_state[job_key] = True
425
+
426
+ # Store the LLM chain for reuse
427
+ if st.session_state.llm_chain is None:
428
+ st.session_state.llm_chain = setup_llm()
429
+
430
+ # Force refresh
431
+ st.rerun()
432
+
433
+ # Display selected candidates if already processed
434
+ if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
435
+ selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
436
+
437
+ # Display selected candidates
438
+ st.subheader("Selected Candidates")
439
+
440
+ if len(selected_candidates) > 0:
441
+ for i, candidate in enumerate(selected_candidates):
442
+ with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
443
+ col1, col2 = st.columns([3, 1])
444
+
445
+ with col1:
446
+ st.markdown(f"**Summary:** {candidate['summary']}")
447
+ st.markdown(f"**Current:** {candidate['Current Title & Company']}")
448
+ st.markdown(f"**Education:** {candidate['Educational Background']}")
449
+ st.markdown(f"**Experience:** {candidate['Years of Experience']}")
450
+ st.markdown(f"**Location:** {candidate['Location']}")
451
+ st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
452
+
453
+ with col2:
454
+ st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
455
+
456
+ st.markdown("**Justification:**")
457
+ st.info(candidate['justification'])
458
+ else:
459
+ st.info("No candidates met the minimum score threshold (8.8) for this job.")
460
+
461
+ # We don't show tech-matched candidates here since they are generated
462
+ # during the LLM matching process now
463
+
464
+ # Add a reset button to start over
465
+ if st.button("Reset and Process Again"):
466
+ st.session_state[job_key] = False
467
+ st.rerun()
468
 
469
+ if __name__ == "__main__":
470
+ main()