ak0601 commited on
Commit
0cf3e7f
Β·
verified Β·
1 Parent(s): 883ad5a

Delete src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +0 -470
src/streamlit_app.py DELETED
@@ -1,470 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import json
4
- import os
5
- from pydantic import BaseModel, Field
6
- from typing import List, Set, Dict, Any, Optional
7
- import time
8
- from langchain_openai import ChatOpenAI
9
- from langchain_core.messages import HumanMessage
10
- from langchain_core.prompts import ChatPromptTemplate
11
- from langchain_core.output_parsers import StrOutputParser
12
- from langchain_core.prompts import PromptTemplate
13
- import gspread
14
- from google.oauth2 import service_account
15
-
16
- st.set_page_config(
17
- page_title="Candidate Matching App",
18
- page_icon="πŸ‘¨β€πŸ’»πŸŽ―",
19
- layout="wide"
20
- )
21
-
22
- # Define pydantic model for structured output
23
- class Shortlist(BaseModel):
24
- fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
- candidate_name: str = Field(description="The name of the candidate.")
26
- candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
- candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
- candidate_location: str = Field(description="The location of the candidate.")
29
- justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
-
31
- # Function to parse and normalize tech stacks
32
- def parse_tech_stack(stack):
33
- if pd.isna(stack) or stack == "" or stack is None:
34
- return set()
35
- if isinstance(stack, set):
36
- return stack
37
- try:
38
- # Handle potential string representation of sets
39
- if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
- # This could be a string representation of a set
41
- items = stack.strip("{}").split(",")
42
- return set(item.strip().strip("'\"") for item in items if item.strip())
43
- return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
- except Exception as e:
45
- st.error(f"Error parsing tech stack: {e}")
46
- return set()
47
-
48
- def display_tech_stack(stack_set):
49
- if isinstance(stack_set, set):
50
- return ", ".join(sorted(stack_set))
51
- return str(stack_set)
52
-
53
- def get_matching_candidates(job_stack, candidates_df):
54
- """Find candidates with matching tech stack for a specific job"""
55
- matched = []
56
- job_stack_set = parse_tech_stack(job_stack)
57
-
58
- for _, candidate in candidates_df.iterrows():
59
- candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
- common = job_stack_set & candidate_stack
61
- if len(common) >= 2:
62
- matched.append({
63
- "Name": candidate["Full Name"],
64
- "URL": candidate["LinkedIn URL"],
65
- "Degree & Education": candidate["Degree & University"],
66
- "Years of Experience": candidate["Years of Experience"],
67
- "Current Title & Company": candidate['Current Title & Company'],
68
- "Key Highlights": candidate["Key Highlights"],
69
- "Location": candidate["Location (from most recent experience)"],
70
- "Experience": str(candidate["Experience"]),
71
- "Tech Stack": candidate_stack
72
- })
73
- return matched
74
-
75
- def setup_llm():
76
- """Set up the LangChain LLM with structured output"""
77
- # Create LLM instance
78
- llm = ChatOpenAI(
79
- model="gpt-4o-mini",
80
- temperature=0,
81
- max_tokens=None,
82
- timeout=None,
83
- max_retries=2,
84
- )
85
-
86
- # Create structured output
87
- sum_llm = llm.with_structured_output(Shortlist)
88
-
89
- # Create system prompt
90
- system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
- the profile is according to job.
92
- Try to ensure following points while estimating the candidate's fit score:
93
- For education:
94
- Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
- Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
- Tier3 - Unknown or unranked institutions - Lower points or reject
97
- Startup Experience Requirement:
98
- Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
99
- preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
100
- The fit score signifies based on following metrics:
101
- 1–5 - Poor Fit - Auto-reject
102
- 6–7 - Weak Fit - Auto-reject
103
- 8.0–8.7 - Moderate Fit - Auto-reject
104
- 8.8–10 - STRONG Fit - Include in results
105
- """
106
-
107
- # Create query prompt
108
- query_prompt = ChatPromptTemplate.from_messages([
109
- ("system", system),
110
- ("human", """
111
- You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
112
- For this you will be provided with the follwing inputs of job and candidates:
113
- Job Details
114
- Company: {Company}
115
- Role: {Role}
116
- About Company: {desc}
117
- Locations: {Locations}
118
- Tech Stack: {Tech_Stack}
119
- Industry: {Industry}
120
-
121
- Candidate Details:
122
- Full Name: {Full_Name}
123
- LinkedIn URL: {LinkedIn_URL}
124
- Current Title & Company: {Current_Title_Company}
125
- Years of Experience: {Years_of_Experience}
126
- Degree & University: {Degree_University}
127
- Key Tech Stack: {Key_Tech_Stack}
128
- Key Highlights: {Key_Highlights}
129
- Location (from most recent experience): {cand_Location}
130
- Past_Experience: {Experience}
131
- Answer in the structured manner as per the schema.
132
- If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
133
- """),
134
- ])
135
-
136
- # Chain the prompt and LLM
137
- cat_class = query_prompt | sum_llm
138
-
139
- return cat_class
140
-
141
- def call_llm(candidate_data, job_data, llm_chain):
142
- """Call the actual LLM to evaluate the candidate"""
143
- try:
144
- # Convert tech stacks to strings for the LLM payload
145
- job_tech_stack = job_data.get("Tech_Stack", set())
146
- candidate_tech_stack = candidate_data.get("Tech Stack", set())
147
-
148
- if isinstance(job_tech_stack, set):
149
- job_tech_stack = ", ".join(sorted(job_tech_stack))
150
-
151
- if isinstance(candidate_tech_stack, set):
152
- candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
153
-
154
- # Prepare payload for LLM
155
- payload = {
156
- "Company": job_data.get("Company", ""),
157
- "Role": job_data.get("Role", ""),
158
- "desc": job_data.get("desc", ""),
159
- "Locations": job_data.get("Locations", ""),
160
- "Tech_Stack": job_tech_stack,
161
- "Industry": job_data.get("Industry", ""),
162
-
163
- "Full_Name": candidate_data.get("Name", ""),
164
- "LinkedIn_URL": candidate_data.get("URL", ""),
165
- "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
166
- "Years_of_Experience": candidate_data.get("Years of Experience", ""),
167
- "Degree_University": candidate_data.get("Degree & Education", ""),
168
- "Key_Tech_Stack": candidate_tech_stack,
169
- "Key_Highlights": candidate_data.get("Key Highlights", ""),
170
- "cand_Location": candidate_data.get("Location", ""),
171
- "Experience": candidate_data.get("Experience", "")
172
- }
173
-
174
- # Call LLM
175
- response = llm_chain.invoke(payload)
176
- print(candidate_data.get("Experience", ""))
177
-
178
- # Return response in expected format
179
- return {
180
- "candidate_name": response.candidate_name,
181
- "candidate_url": response.candidate_url,
182
- "candidate_summary": response.candidate_summary,
183
- "candidate_location": response.candidate_location,
184
- "fit_score": response.fit_score,
185
- "justification": response.justification
186
- }
187
- except Exception as e:
188
- st.error(f"Error calling LLM: {e}")
189
- # Fallback to a default response
190
- return {
191
- "candidate_name": candidate_data.get("Name", "Unknown"),
192
- "candidate_url": candidate_data.get("URL", ""),
193
- "candidate_summary": "Error processing candidate profile",
194
- "candidate_location": candidate_data.get("Location", "Unknown"),
195
- "fit_score": 0.0,
196
- "justification": f"Error in LLM processing: {str(e)}"
197
- }
198
-
199
- def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
200
- """Process candidates for a specific job using the LLM"""
201
- if llm_chain is None:
202
- with st.spinner("Setting up LLM..."):
203
- llm_chain = setup_llm()
204
-
205
- selected_candidates = []
206
-
207
- try:
208
- # Get job-specific data
209
- job_data = {
210
- "Company": job_row["Company"],
211
- "Role": job_row["Role"],
212
- "desc": job_row.get("One liner", ""),
213
- "Locations": job_row.get("Locations", ""),
214
- "Tech_Stack": job_row["Tech Stack"],
215
- "Industry": job_row.get("Industry", "")
216
- }
217
-
218
- # Find matching candidates for this job
219
- with st.spinner("Finding matching candidates based on tech stack..."):
220
- matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
221
-
222
- if not matching_candidates:
223
- st.warning("No candidates with matching tech stack found for this job.")
224
- return []
225
-
226
- st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
227
-
228
- # Create progress elements
229
- candidates_progress = st.progress(0)
230
- candidate_status = st.empty()
231
-
232
- # Process each candidate
233
- for i, candidate_data in enumerate(matching_candidates):
234
- # Update progress
235
- candidates_progress.progress((i + 1) / len(matching_candidates))
236
- candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
237
-
238
- # Process the candidate with the LLM
239
- response = call_llm(candidate_data, job_data, llm_chain)
240
-
241
- response_dict = {
242
- "Name": response["candidate_name"],
243
- "LinkedIn": response["candidate_url"],
244
- "summary": response["candidate_summary"],
245
- "Location": response["candidate_location"],
246
- "Fit Score": response["fit_score"],
247
- "justification": response["justification"],
248
- # Add back original candidate data for context
249
- "Educational Background": candidate_data.get("Degree & Education", ""),
250
- "Years of Experience": candidate_data.get("Years of Experience", ""),
251
- "Current Title & Company": candidate_data.get("Current Title & Company", "")
252
- }
253
-
254
- # Add to selected candidates if score is high enough
255
- if response["fit_score"] >= 8.8:
256
- selected_candidates.append(response_dict)
257
- st.markdown(response_dict)
258
- else:
259
- st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
260
-
261
- # Clear progress indicators
262
- candidates_progress.empty()
263
- candidate_status.empty()
264
-
265
- # Show results
266
- if selected_candidates:
267
- st.success(f"βœ… Found {len(selected_candidates)} suitable candidates for this job!")
268
- else:
269
- st.info("No candidates met the minimum fit score threshold for this job.")
270
-
271
- return selected_candidates
272
-
273
- except Exception as e:
274
- st.error(f"Error processing job: {e}")
275
- return []
276
-
277
- def main():
278
- st.title("πŸ‘¨β€πŸ’» Candidate Matching App")
279
-
280
- # Initialize session state
281
- if 'processed_jobs' not in st.session_state:
282
- st.session_state.processed_jobs = {}
283
-
284
- st.write("""
285
- This app matches job listings with candidate profiles based on tech stack and other criteria.
286
- Select a job to find matching candidates.
287
- """)
288
-
289
- # API Key input
290
- with st.sidebar:
291
- st.header("API Configuration")
292
- api_key = st.text_input("Enter OpenAI API Key", type="password")
293
- if api_key:
294
- os.environ["OPENAI_API_KEY"] = api_key
295
- st.success("API Key set!")
296
- else:
297
- st.warning("Please enter OpenAI API Key to use LLM features")
298
-
299
- # Show API key warning if not set
300
- secret_content = os.getenv("GCP_SERVICE_ACCOUNT")
301
- # secret_content = secret_content.replace("\n", "\\n")
302
- secret_content = json.loads(secret_content)
303
- SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
304
- creds = service_account.Credentials.from_service_account_info(secret_content, scopes=SCOPES)
305
- gc = gspread.authorize(creds)
306
- job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
307
- candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
308
-
309
- if not api_key:
310
- st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
311
-
312
- if api_key:
313
- try:
314
- # Load data from Google Sheets
315
- job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
316
- job_data = job_worksheet.get_all_values()
317
- candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
318
- candidate_data = candidate_worksheet.get_all_values()
319
-
320
- # Convert to DataFrames
321
- jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
322
- candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
323
- candidates_df = candidates_df.fillna("Unknown")
324
-
325
- # Display data preview
326
- with st.expander("Preview uploaded data"):
327
- st.subheader("Jobs Data Preview")
328
- st.dataframe(jobs_df.head(3))
329
-
330
- st.subheader("Candidates Data Preview")
331
- st.dataframe(candidates_df.head(3))
332
-
333
- # Map column names if needed
334
- column_mapping = {
335
- "Full Name": "Full Name",
336
- "LinkedIn URL": "LinkedIn URL",
337
- "Current Title & Company": "Current Title & Company",
338
- "Years of Experience": "Years of Experience",
339
- "Degree & University": "Degree & University",
340
- "Key Tech Stack": "Key Tech Stack",
341
- "Key Highlights": "Key Highlights",
342
- "Location (from most recent experience)": "Location (from most recent experience)"
343
- }
344
-
345
- # Rename columns if they don't match expected
346
- candidates_df = candidates_df.rename(columns={
347
- col: mapping for col, mapping in column_mapping.items()
348
- if col in candidates_df.columns and col != mapping
349
- })
350
-
351
- # Now, instead of processing all jobs upfront, we'll display job selection
352
- # and only process the selected job when the user chooses it
353
- display_job_selection(jobs_df, candidates_df)
354
-
355
- except Exception as e:
356
- st.error(f"Error processing files: {e}")
357
-
358
- st.divider()
359
-
360
-
361
- def display_job_selection(jobs_df, candidates_df):
362
- # Store the LLM chain as a session state to avoid recreating it
363
- if 'llm_chain' not in st.session_state:
364
- st.session_state.llm_chain = None
365
-
366
- st.subheader("Select a job to view potential matches")
367
-
368
- # Create job options - but don't compute matches yet
369
- job_options = []
370
- for i, row in jobs_df.iterrows():
371
- job_options.append(f"{row['Role']} at {row['Company']}")
372
-
373
- if job_options:
374
- selected_job_index = st.selectbox("Jobs:",
375
- range(len(job_options)),
376
- format_func=lambda x: job_options[x])
377
-
378
- # Display job details
379
- job_row = jobs_df.iloc[selected_job_index]
380
-
381
- # Parse tech stack for display
382
- job_row_stack = parse_tech_stack(job_row["Tech Stack"])
383
-
384
- col1, col2 = st.columns([2, 1])
385
-
386
- with col1:
387
- st.subheader(f"Job Details: {job_row['Role']}")
388
-
389
- job_details = {
390
- "Company": job_row["Company"],
391
- "Role": job_row["Role"],
392
- "Description": job_row.get("One liner", "N/A"),
393
- "Locations": job_row.get("Locations", "N/A"),
394
- "Industry": job_row.get("Industry", "N/A"),
395
- "Tech Stack": display_tech_stack(job_row_stack)
396
- }
397
-
398
- for key, value in job_details.items():
399
- st.markdown(f"**{key}:** {value}")
400
-
401
- # Create a key for this job in session state
402
- job_key = f"job_{selected_job_index}_processed"
403
-
404
- if job_key not in st.session_state:
405
- st.session_state[job_key] = False
406
-
407
- # Add a process button for this job
408
- if not st.session_state[job_key]:
409
- if st.button(f"Find Matching Candidates for this Job"):
410
- if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
411
- st.error("Please enter your OpenAI API key in the sidebar before processing")
412
- else:
413
- # Process candidates for this job (only when requested)
414
- selected_candidates = process_candidates_for_job(
415
- job_row,
416
- candidates_df,
417
- st.session_state.llm_chain
418
- )
419
-
420
- # Store the results and set as processed
421
- if 'Selected_Candidates' not in st.session_state:
422
- st.session_state.Selected_Candidates = {}
423
- st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
424
- st.session_state[job_key] = True
425
-
426
- # Store the LLM chain for reuse
427
- if st.session_state.llm_chain is None:
428
- st.session_state.llm_chain = setup_llm()
429
-
430
- # Force refresh
431
- st.rerun()
432
-
433
- # Display selected candidates if already processed
434
- if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
435
- selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
436
-
437
- # Display selected candidates
438
- st.subheader("Selected Candidates")
439
-
440
- if len(selected_candidates) > 0:
441
- for i, candidate in enumerate(selected_candidates):
442
- with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
443
- col1, col2 = st.columns([3, 1])
444
-
445
- with col1:
446
- st.markdown(f"**Summary:** {candidate['summary']}")
447
- st.markdown(f"**Current:** {candidate['Current Title & Company']}")
448
- st.markdown(f"**Education:** {candidate['Educational Background']}")
449
- st.markdown(f"**Experience:** {candidate['Years of Experience']}")
450
- st.markdown(f"**Location:** {candidate['Location']}")
451
- st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
452
-
453
- with col2:
454
- st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
455
-
456
- st.markdown("**Justification:**")
457
- st.info(candidate['justification'])
458
- else:
459
- st.info("No candidates met the minimum score threshold (8.8) for this job.")
460
-
461
- # We don't show tech-matched candidates here since they are generated
462
- # during the LLM matching process now
463
-
464
- # Add a reset button to start over
465
- if st.button("Reset and Process Again"):
466
- st.session_state[job_key] = False
467
- st.rerun()
468
-
469
- if __name__ == "__main__":
470
- main()