ak0601 commited on
Commit
883ad5a
·
verified ·
1 Parent(s): 1430581

Update src/app_job_copy_1.py

Browse files
Files changed (1) hide show
  1. src/app_job_copy_1.py +1054 -1054
src/app_job_copy_1.py CHANGED
@@ -1,1055 +1,1055 @@
1
- # import streamlit as st
2
- # import pandas as pd
3
- # import json
4
- # import os
5
- # from pydantic import BaseModel, Field
6
- # from typing import List, Set, Dict, Any, Optional
7
- # import time
8
- # from langchain_openai import ChatOpenAI
9
- # from langchain_core.messages import HumanMessage
10
- # from langchain_core.prompts import ChatPromptTemplate
11
- # from langchain_core.output_parsers import StrOutputParser
12
- # from langchain_core.prompts import PromptTemplate
13
- # import gspread
14
- # from google.oauth2 import service_account
15
-
16
- # st.set_page_config(
17
- # page_title="Candidate Matching App",
18
- # page_icon="👨‍💻🎯",
19
- # layout="wide"
20
- # )
21
-
22
- # # Define pydantic model for structured output
23
- # class Shortlist(BaseModel):
24
- # fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
- # candidate_name: str = Field(description="The name of the candidate.")
26
- # candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
- # candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
- # candidate_location: str = Field(description="The location of the candidate.")
29
- # justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
-
31
- # # Function to parse and normalize tech stacks
32
- # def parse_tech_stack(stack):
33
- # if pd.isna(stack) or stack == "" or stack is None:
34
- # return set()
35
- # if isinstance(stack, set):
36
- # return stack
37
- # try:
38
- # # Handle potential string representation of sets
39
- # if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
- # # This could be a string representation of a set
41
- # items = stack.strip("{}").split(",")
42
- # return set(item.strip().strip("'\"") for item in items if item.strip())
43
- # return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
- # except Exception as e:
45
- # st.error(f"Error parsing tech stack: {e}")
46
- # return set()
47
-
48
- # def display_tech_stack(stack_set):
49
- # if isinstance(stack_set, set):
50
- # return ", ".join(sorted(stack_set))
51
- # return str(stack_set)
52
-
53
- # def get_matching_candidates(job_stack, candidates_df):
54
- # """Find candidates with matching tech stack for a specific job"""
55
- # matched = []
56
- # job_stack_set = parse_tech_stack(job_stack)
57
-
58
- # for _, candidate in candidates_df.iterrows():
59
- # candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
- # common = job_stack_set & candidate_stack
61
- # if len(common) >= 2:
62
- # matched.append({
63
- # "Name": candidate["Full Name"],
64
- # "URL": candidate["LinkedIn URL"],
65
- # "Degree & Education": candidate["Degree & University"],
66
- # "Years of Experience": candidate["Years of Experience"],
67
- # "Current Title & Company": candidate['Current Title & Company'],
68
- # "Key Highlights": candidate["Key Highlights"],
69
- # "Location": candidate["Location (from most recent experience)"],
70
- # "Experience": str(candidate["Experience"]),
71
- # "Tech Stack": candidate_stack
72
- # })
73
- # return matched
74
-
75
- # def setup_llm():
76
- # """Set up the LangChain LLM with structured output"""
77
- # # Create LLM instance
78
- # llm = ChatOpenAI(
79
- # model="gpt-4o-mini",
80
- # temperature=0,
81
- # max_tokens=None,
82
- # timeout=None,
83
- # max_retries=2,
84
- # )
85
-
86
- # # Create structured output
87
- # sum_llm = llm.with_structured_output(Shortlist)
88
-
89
- # # Create system prompt
90
- # system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
- # the profile is according to job.
92
- # Try to ensure following points while estimating the candidate's fit score:
93
- # For education:
94
- # Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
- # Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
- # Tier3 - Unknown or unranked institutions - Lower points or reject
97
-
98
- # Startup Experience Requirement:
99
- # Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
100
- # preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
101
-
102
- # The fit score signifies based on following metrics:
103
- # 1–5 - Poor Fit - Auto-reject
104
- # 6–7 - Weak Fit - Auto-reject
105
- # 8.0–8.7 - Moderate Fit - Auto-reject
106
- # 8.8��10 - STRONG Fit - Include in results
107
- # """
108
-
109
- # # Create query prompt
110
- # query_prompt = ChatPromptTemplate.from_messages([
111
- # ("system", system),
112
- # ("human", """
113
- # You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
114
- # For this you will be provided with the follwing inputs of job and candidates:
115
- # Job Details
116
- # Company: {Company}
117
- # Role: {Role}
118
- # About Company: {desc}
119
- # Locations: {Locations}
120
- # Tech Stack: {Tech_Stack}
121
- # Industry: {Industry}
122
-
123
-
124
- # Candidate Details:
125
- # Full Name: {Full_Name}
126
- # LinkedIn URL: {LinkedIn_URL}
127
- # Current Title & Company: {Current_Title_Company}
128
- # Years of Experience: {Years_of_Experience}
129
- # Degree & University: {Degree_University}
130
- # Key Tech Stack: {Key_Tech_Stack}
131
- # Key Highlights: {Key_Highlights}
132
- # Location (from most recent experience): {cand_Location}
133
- # Past_Experience: {Experience}
134
-
135
-
136
- # Answer in the structured manner as per the schema.
137
- # If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
138
- # """),
139
- # ])
140
-
141
- # # Chain the prompt and LLM
142
- # cat_class = query_prompt | sum_llm
143
-
144
- # return cat_class
145
-
146
- # def call_llm(candidate_data, job_data, llm_chain):
147
- # """Call the actual LLM to evaluate the candidate"""
148
- # try:
149
- # # Convert tech stacks to strings for the LLM payload
150
- # job_tech_stack = job_data.get("Tech_Stack", set())
151
- # candidate_tech_stack = candidate_data.get("Tech Stack", set())
152
-
153
- # if isinstance(job_tech_stack, set):
154
- # job_tech_stack = ", ".join(sorted(job_tech_stack))
155
-
156
- # if isinstance(candidate_tech_stack, set):
157
- # candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
158
-
159
- # # Prepare payload for LLM
160
- # payload = {
161
- # "Company": job_data.get("Company", ""),
162
- # "Role": job_data.get("Role", ""),
163
- # "desc": job_data.get("desc", ""),
164
- # "Locations": job_data.get("Locations", ""),
165
- # "Tech_Stack": job_tech_stack,
166
- # "Industry": job_data.get("Industry", ""),
167
-
168
- # "Full_Name": candidate_data.get("Name", ""),
169
- # "LinkedIn_URL": candidate_data.get("URL", ""),
170
- # "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
171
- # "Years_of_Experience": candidate_data.get("Years of Experience", ""),
172
- # "Degree_University": candidate_data.get("Degree & Education", ""),
173
- # "Key_Tech_Stack": candidate_tech_stack,
174
- # "Key_Highlights": candidate_data.get("Key Highlights", ""),
175
- # "cand_Location": candidate_data.get("Location", ""),
176
- # "Experience": candidate_data.get("Experience", "")
177
- # }
178
-
179
- # # Call LLM
180
- # response = llm_chain.invoke(payload)
181
- # print(candidate_data.get("Experience", ""))
182
-
183
- # # Return response in expected format
184
- # return {
185
- # "candidate_name": response.candidate_name,
186
- # "candidate_url": response.candidate_url,
187
- # "candidate_summary": response.candidate_summary,
188
- # "candidate_location": response.candidate_location,
189
- # "fit_score": response.fit_score,
190
- # "justification": response.justification
191
- # }
192
- # except Exception as e:
193
- # st.error(f"Error calling LLM: {e}")
194
- # # Fallback to a default response
195
- # return {
196
- # "candidate_name": candidate_data.get("Name", "Unknown"),
197
- # "candidate_url": candidate_data.get("URL", ""),
198
- # "candidate_summary": "Error processing candidate profile",
199
- # "candidate_location": candidate_data.get("Location", "Unknown"),
200
- # "fit_score": 0.0,
201
- # "justification": f"Error in LLM processing: {str(e)}"
202
- # }
203
-
204
- # def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
205
- # """Process candidates for a specific job using the LLM"""
206
- # if llm_chain is None:
207
- # with st.spinner("Setting up LLM..."):
208
- # llm_chain = setup_llm()
209
-
210
- # selected_candidates = []
211
-
212
- # try:
213
- # # Get job-specific data
214
- # job_data = {
215
- # "Company": job_row["Company"],
216
- # "Role": job_row["Role"],
217
- # "desc": job_row.get("One liner", ""),
218
- # "Locations": job_row.get("Locations", ""),
219
- # "Tech_Stack": job_row["Tech Stack"],
220
- # "Industry": job_row.get("Industry", "")
221
- # }
222
-
223
- # # Find matching candidates for this job
224
- # with st.spinner("Finding matching candidates based on tech stack..."):
225
- # matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
226
-
227
- # if not matching_candidates:
228
- # st.warning("No candidates with matching tech stack found for this job.")
229
- # return []
230
-
231
- # st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
232
-
233
- # # Create progress elements
234
- # candidates_progress = st.progress(0)
235
- # candidate_status = st.empty()
236
-
237
- # # Process each candidate
238
- # for i, candidate_data in enumerate(matching_candidates):
239
- # # Update progress
240
- # candidates_progress.progress((i + 1) / len(matching_candidates))
241
- # candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
242
-
243
- # # Process the candidate with the LLM
244
- # response = call_llm(candidate_data, job_data, llm_chain)
245
-
246
- # response_dict = {
247
- # "Name": response["candidate_name"],
248
- # "LinkedIn": response["candidate_url"],
249
- # "summary": response["candidate_summary"],
250
- # "Location": response["candidate_location"],
251
- # "Fit Score": response["fit_score"],
252
- # "justification": response["justification"],
253
- # # Add back original candidate data for context
254
- # "Educational Background": candidate_data.get("Degree & Education", ""),
255
- # "Years of Experience": candidate_data.get("Years of Experience", ""),
256
- # "Current Title & Company": candidate_data.get("Current Title & Company", "")
257
- # }
258
-
259
- # # Add to selected candidates if score is high enough
260
- # if response["fit_score"] >= 8.8:
261
- # selected_candidates.append(response_dict)
262
- # st.markdown(response_dict)
263
- # else:
264
- # st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
265
-
266
- # # Clear progress indicators
267
- # candidates_progress.empty()
268
- # candidate_status.empty()
269
-
270
- # # Show results
271
- # if selected_candidates:
272
- # st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
273
- # else:
274
- # st.info("No candidates met the minimum fit score threshold for this job.")
275
-
276
- # return selected_candidates
277
-
278
- # except Exception as e:
279
- # st.error(f"Error processing job: {e}")
280
- # return []
281
-
282
- # def main():
283
- # st.title("👨‍💻 Candidate Matching App")
284
-
285
- # # Initialize session state
286
- # if 'processed_jobs' not in st.session_state:
287
- # st.session_state.processed_jobs = {}
288
-
289
- # st.write("""
290
- # This app matches job listings with candidate profiles based on tech stack and other criteria.
291
- # Select a job to find matching candidates.
292
- # """)
293
-
294
- # # API Key input
295
- # with st.sidebar:
296
- # st.header("API Configuration")
297
- # api_key = st.text_input("Enter OpenAI API Key", type="password")
298
- # if api_key:
299
- # os.environ["OPENAI_API_KEY"] = api_key
300
- # st.success("API Key set!")
301
- # else:
302
- # st.warning("Please enter OpenAI API Key to use LLM features")
303
-
304
- # # Show API key warning if not set
305
- # SERVICE_ACCOUNT_FILE = 'synapse-recruitment-e94255ca76fd.json'
306
- # SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
307
- # creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
308
- # gc = gspread.authorize(creds)
309
- # job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
310
- # candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
311
-
312
- # if not api_key:
313
- # st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
314
-
315
- # if api_key:
316
- # try:
317
- # # Load data from Google Sheets
318
- # job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
319
- # job_data = job_worksheet.get_all_values()
320
- # candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
321
- # candidate_data = candidate_worksheet.get_all_values()
322
-
323
- # # Convert to DataFrames
324
- # jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
325
- # candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
326
- # candidates_df = candidates_df.fillna("Unknown")
327
-
328
- # # Display data preview
329
- # with st.expander("Preview uploaded data"):
330
- # st.subheader("Jobs Data Preview")
331
- # st.dataframe(jobs_df.head(3))
332
-
333
- # st.subheader("Candidates Data Preview")
334
- # st.dataframe(candidates_df.head(3))
335
-
336
- # # Map column names if needed
337
- # column_mapping = {
338
- # "Full Name": "Full Name",
339
- # "LinkedIn URL": "LinkedIn URL",
340
- # "Current Title & Company": "Current Title & Company",
341
- # "Years of Experience": "Years of Experience",
342
- # "Degree & University": "Degree & University",
343
- # "Key Tech Stack": "Key Tech Stack",
344
- # "Key Highlights": "Key Highlights",
345
- # "Location (from most recent experience)": "Location (from most recent experience)"
346
- # }
347
-
348
- # # Rename columns if they don't match expected
349
- # candidates_df = candidates_df.rename(columns={
350
- # col: mapping for col, mapping in column_mapping.items()
351
- # if col in candidates_df.columns and col != mapping
352
- # })
353
-
354
- # # Now, instead of processing all jobs upfront, we'll display job selection
355
- # # and only process the selected job when the user chooses it
356
- # display_job_selection(jobs_df, candidates_df)
357
-
358
- # except Exception as e:
359
- # st.error(f"Error processing files: {e}")
360
-
361
- # st.divider()
362
-
363
-
364
- # def display_job_selection(jobs_df, candidates_df):
365
- # # Store the LLM chain as a session state to avoid recreating it
366
- # if 'llm_chain' not in st.session_state:
367
- # st.session_state.llm_chain = None
368
-
369
- # st.subheader("Select a job to view potential matches")
370
-
371
- # # Create job options - but don't compute matches yet
372
- # job_options = []
373
- # for i, row in jobs_df.iterrows():
374
- # job_options.append(f"{row['Role']} at {row['Company']}")
375
-
376
- # if job_options:
377
- # selected_job_index = st.selectbox("Jobs:",
378
- # range(len(job_options)),
379
- # format_func=lambda x: job_options[x])
380
-
381
- # # Display job details
382
- # job_row = jobs_df.iloc[selected_job_index]
383
-
384
- # # Parse tech stack for display
385
- # job_row_stack = parse_tech_stack(job_row["Tech Stack"])
386
-
387
- # col1, col2 = st.columns([2, 1])
388
-
389
- # with col1:
390
- # st.subheader(f"Job Details: {job_row['Role']}")
391
-
392
- # job_details = {
393
- # "Company": job_row["Company"],
394
- # "Role": job_row["Role"],
395
- # "Description": job_row.get("One liner", "N/A"),
396
- # "Locations": job_row.get("Locations", "N/A"),
397
- # "Industry": job_row.get("Industry", "N/A"),
398
- # "Tech Stack": display_tech_stack(job_row_stack)
399
- # }
400
-
401
- # for key, value in job_details.items():
402
- # st.markdown(f"**{key}:** {value}")
403
-
404
- # # Create a key for this job in session state
405
- # job_key = f"job_{selected_job_index}_processed"
406
-
407
- # if job_key not in st.session_state:
408
- # st.session_state[job_key] = False
409
-
410
- # # Add a process button for this job
411
- # if not st.session_state[job_key]:
412
- # if st.button(f"Find Matching Candidates for this Job"):
413
- # if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
414
- # st.error("Please enter your OpenAI API key in the sidebar before processing")
415
- # else:
416
- # # Process candidates for this job (only when requested)
417
- # selected_candidates = process_candidates_for_job(
418
- # job_row,
419
- # candidates_df,
420
- # st.session_state.llm_chain
421
- # )
422
-
423
- # # Store the results and set as processed
424
- # if 'Selected_Candidates' not in st.session_state:
425
- # st.session_state.Selected_Candidates = {}
426
- # st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
427
- # st.session_state[job_key] = True
428
-
429
- # # Store the LLM chain for reuse
430
- # if st.session_state.llm_chain is None:
431
- # st.session_state.llm_chain = setup_llm()
432
-
433
- # # Force refresh
434
- # st.rerun()
435
-
436
- # # Display selected candidates if already processed
437
- # if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
438
- # selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
439
-
440
- # # Display selected candidates
441
- # st.subheader("Selected Candidates")
442
-
443
- # if len(selected_candidates) > 0:
444
- # for i, candidate in enumerate(selected_candidates):
445
- # with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
446
- # col1, col2 = st.columns([3, 1])
447
-
448
- # with col1:
449
- # st.markdown(f"**Summary:** {candidate['summary']}")
450
- # st.markdown(f"**Current:** {candidate['Current Title & Company']}")
451
- # st.markdown(f"**Education:** {candidate['Educational Background']}")
452
- # st.markdown(f"**Experience:** {candidate['Years of Experience']}")
453
- # st.markdown(f"**Location:** {candidate['Location']}")
454
- # st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
455
-
456
- # with col2:
457
- # st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
458
-
459
- # st.markdown("**Justification:**")
460
- # st.info(candidate['justification'])
461
- # else:
462
- # st.info("No candidates met the minimum score threshold (8.8) for this job.")
463
-
464
- # # We don't show tech-matched candidates here since they are generated
465
- # # during the LLM matching process now
466
-
467
- # # Add a reset button to start over
468
- # if st.button("Reset and Process Again"):
469
- # st.session_state[job_key] = False
470
- # st.rerun()
471
-
472
- # if __name__ == "__main__":
473
- # main()
474
-
475
-
476
-
477
- import streamlit as st
478
- import pandas as pd
479
- import json
480
- import os
481
- from pydantic import BaseModel, Field
482
- from typing import List, Set, Dict, Any, Optional
483
- import time
484
- from langchain_openai import ChatOpenAI
485
- from langchain_core.messages import HumanMessage
486
- from langchain_core.prompts import ChatPromptTemplate
487
- from langchain_core.output_parsers import StrOutputParser
488
- from langchain_core.prompts import PromptTemplate
489
- import gspread
490
- from google.oauth2 import service_account
491
- import tiktoken
492
-
493
- st.set_page_config(
494
- page_title="Candidate Matching App",
495
- page_icon="👨‍💻🎯",
496
- layout="wide"
497
- )
498
-
499
- # Define pydantic model for structured output
500
- class Shortlist(BaseModel):
501
- fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
502
- candidate_name: str = Field(description="The name of the candidate.")
503
- candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
504
- candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
505
- candidate_location: str = Field(description="The location of the candidate.")
506
- justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
507
-
508
- # Function to calculate tokens
509
- def calculate_tokens(text, model="gpt-4o-mini"):
510
- """Calculate the number of tokens in a given text for a specific model"""
511
- try:
512
- # Get the encoding for the model
513
- if "gpt-4" in model:
514
- encoding = tiktoken.encoding_for_model("gpt-4o-mini")
515
- elif "gpt-3.5" in model:
516
- encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
517
- else:
518
- encoding = tiktoken.get_encoding("cl100k_base") # Default for newer models
519
-
520
- # Encode the text and return the token count
521
- return len(encoding.encode(text))
522
- except Exception as e:
523
- # If there's an error, make a rough estimate (1 token ≈ 4 chars)
524
- return len(text) // 4
525
-
526
- # Function to display token usage
527
- def display_token_usage():
528
- """Display token usage statistics"""
529
- if 'total_input_tokens' not in st.session_state:
530
- st.session_state.total_input_tokens = 0
531
- if 'total_output_tokens' not in st.session_state:
532
- st.session_state.total_output_tokens = 0
533
-
534
- total_input = st.session_state.total_input_tokens
535
- total_output = st.session_state.total_output_tokens
536
- total_tokens = total_input + total_output
537
-
538
- # Estimate cost based on model
539
- if st.session_state.model_name == "gpt-4o-mini":
540
- input_cost_per_1k = 0.0003 # $0.0003 per 1K input tokens
541
- output_cost_per_1k = 0.0006 # $$0.0006 per 1K output tokens
542
- elif "gpt-4" in st.session_state.model_name:
543
- input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
544
- output_cost_per_1k = 0.60 # $0.60 per 1K output tokens
545
- else: # Assume gpt-3.5-turbo pricing
546
- input_cost_per_1k = 0.0015 # $0.0015 per 1K input tokens
547
- output_cost_per_1k = 0.015 # $0.002 per 1K output tokens
548
-
549
- estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
550
-
551
- st.subheader("📊 Token Usage Statistics")
552
-
553
- col1, col2, col3 = st.columns(3)
554
-
555
- with col1:
556
- st.metric("Input Tokens", f"{total_input:,}")
557
-
558
- with col2:
559
- st.metric("Output Tokens", f"{total_output:,}")
560
-
561
- with col3:
562
- st.metric("Total Tokens", f"{total_tokens:,}")
563
-
564
- st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
565
-
566
- return total_tokens
567
-
568
- # Function to parse and normalize tech stacks
569
- def parse_tech_stack(stack):
570
- if pd.isna(stack) or stack == "" or stack is None:
571
- return set()
572
- if isinstance(stack, set):
573
- return stack
574
- try:
575
- # Handle potential string representation of sets
576
- if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
577
- # This could be a string representation of a set
578
- items = stack.strip("{}").split(",")
579
- return set(item.strip().strip("'\"") for item in items if item.strip())
580
- return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
581
- except Exception as e:
582
- st.error(f"Error parsing tech stack: {e}")
583
- return set()
584
-
585
- def display_tech_stack(stack_set):
586
- if isinstance(stack_set, set):
587
- return ", ".join(sorted(stack_set))
588
- return str(stack_set)
589
-
590
- def get_matching_candidates(job_stack, candidates_df):
591
- """Find candidates with matching tech stack for a specific job"""
592
- matched = []
593
- job_stack_set = parse_tech_stack(job_stack)
594
-
595
- for _, candidate in candidates_df.iterrows():
596
- candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
597
- common = job_stack_set & candidate_stack
598
- if len(common) >= 2:
599
- matched.append({
600
- "Name": candidate["Full Name"],
601
- "URL": candidate["LinkedIn URL"],
602
- "Degree & Education": candidate["Degree & University"],
603
- "Years of Experience": candidate["Years of Experience"],
604
- "Current Title & Company": candidate['Current Title & Company'],
605
- "Key Highlights": candidate["Key Highlights"],
606
- "Location": candidate["Location (from most recent experience)"],
607
- "Experience": str(candidate["Experience"]),
608
- "Tech Stack": candidate_stack
609
- })
610
- return matched
611
-
612
- def setup_llm():
613
- """Set up the LangChain LLM with structured output"""
614
- # Define the model to use
615
- model_name = "gpt-4o-mini"
616
-
617
- # Store model name in session state for token calculation
618
- if 'model_name' not in st.session_state:
619
- st.session_state.model_name = model_name
620
-
621
- # Create LLM instance
622
- llm = ChatOpenAI(
623
- model=model_name,
624
- temperature=0,
625
- max_tokens=None,
626
- timeout=None,
627
- max_retries=2,
628
- )
629
-
630
- # Create structured output
631
- sum_llm = llm.with_structured_output(Shortlist)
632
-
633
- # Create system prompt
634
- system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
635
- the profile is according to job.
636
- Try to ensure following points while estimating the candidate's fit score:
637
- For education:
638
- Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
639
- Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
640
- Tier3 - Unknown or unranked institutions - Lower points or reject
641
-
642
- Startup Experience Requirement:
643
- Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
644
- preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
645
-
646
- The fit score signifies based on following metrics:
647
- 1–5 - Poor Fit - Auto-reject
648
- 6–7 - Weak Fit - Auto-reject
649
- 8.0–8.7 - Moderate Fit - Auto-reject
650
- 8.8–10 - STRONG Fit - Include in results
651
- """
652
-
653
- # Create query prompt
654
- query_prompt = ChatPromptTemplate.from_messages([
655
- ("system", system),
656
- ("human", """
657
- You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
658
- For this you will be provided with the follwing inputs of job and candidates:
659
- Job Details
660
- Company: {Company}
661
- Role: {Role}
662
- About Company: {desc}
663
- Locations: {Locations}
664
- Tech Stack: {Tech_Stack}
665
- Industry: {Industry}
666
-
667
-
668
- Candidate Details:
669
- Full Name: {Full_Name}
670
- LinkedIn URL: {LinkedIn_URL}
671
- Current Title & Company: {Current_Title_Company}
672
- Years of Experience: {Years_of_Experience}
673
- Degree & University: {Degree_University}
674
- Key Tech Stack: {Key_Tech_Stack}
675
- Key Highlights: {Key_Highlights}
676
- Location (from most recent experience): {cand_Location}
677
- Past_Experience: {Experience}
678
-
679
-
680
- Answer in the structured manner as per the schema.
681
- If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
682
- """),
683
- ])
684
-
685
- # Chain the prompt and LLM
686
- cat_class = query_prompt | sum_llm
687
-
688
- return cat_class
689
-
690
- def call_llm(candidate_data, job_data, llm_chain):
691
- """Call the actual LLM to evaluate the candidate"""
692
- try:
693
- # Convert tech stacks to strings for the LLM payload
694
- job_tech_stack = job_data.get("Tech_Stack", set())
695
- candidate_tech_stack = candidate_data.get("Tech Stack", set())
696
-
697
- if isinstance(job_tech_stack, set):
698
- job_tech_stack = ", ".join(sorted(job_tech_stack))
699
-
700
- if isinstance(candidate_tech_stack, set):
701
- candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
702
-
703
- # Prepare payload for LLM
704
- payload = {
705
- "Company": job_data.get("Company", ""),
706
- "Role": job_data.get("Role", ""),
707
- "desc": job_data.get("desc", ""),
708
- "Locations": job_data.get("Locations", ""),
709
- "Tech_Stack": job_tech_stack,
710
- "Industry": job_data.get("Industry", ""),
711
-
712
- "Full_Name": candidate_data.get("Name", ""),
713
- "LinkedIn_URL": candidate_data.get("URL", ""),
714
- "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
715
- "Years_of_Experience": candidate_data.get("Years of Experience", ""),
716
- "Degree_University": candidate_data.get("Degree & Education", ""),
717
- "Key_Tech_Stack": candidate_tech_stack,
718
- "Key_Highlights": candidate_data.get("Key Highlights", ""),
719
- "cand_Location": candidate_data.get("Location", ""),
720
- "Experience": candidate_data.get("Experience", "")
721
- }
722
-
723
- # Convert payload to a string for token calculation
724
- payload_str = json.dumps(payload)
725
-
726
- # Calculate input tokens
727
- input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
728
-
729
- # Call LLM
730
- response = llm_chain.invoke(payload)
731
- print(candidate_data.get("Experience", ""))
732
-
733
- # Convert response to string for token calculation
734
- response_str = f"""
735
- candidate_name: {response.candidate_name}
736
- candidate_url: {response.candidate_url}
737
- candidate_summary: {response.candidate_summary}
738
- candidate_location: {response.candidate_location}
739
- fit_score: {response.fit_score}
740
- justification: {response.justification}
741
- """
742
-
743
- # Calculate output tokens
744
- output_tokens = calculate_tokens(response_str, st.session_state.model_name)
745
-
746
- # Update token counts in session state
747
- if 'total_input_tokens' not in st.session_state:
748
- st.session_state.total_input_tokens = 0
749
- if 'total_output_tokens' not in st.session_state:
750
- st.session_state.total_output_tokens = 0
751
-
752
- st.session_state.total_input_tokens += input_tokens
753
- st.session_state.total_output_tokens += output_tokens
754
-
755
- # Return response in expected format
756
- return {
757
- "candidate_name": response.candidate_name,
758
- "candidate_url": response.candidate_url,
759
- "candidate_summary": response.candidate_summary,
760
- "candidate_location": response.candidate_location,
761
- "fit_score": response.fit_score,
762
- "justification": response.justification
763
- }
764
- except Exception as e:
765
- st.error(f"Error calling LLM: {e}")
766
- # Fallback to a default response
767
- return {
768
- "candidate_name": candidate_data.get("Name", "Unknown"),
769
- "candidate_url": candidate_data.get("URL", ""),
770
- "candidate_summary": "Error processing candidate profile",
771
- "candidate_location": candidate_data.get("Location", "Unknown"),
772
- "fit_score": 0.0,
773
- "justification": f"Error in LLM processing: {str(e)}"
774
- }
775
-
776
- def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
777
- """Process candidates for a specific job using the LLM"""
778
- # Reset token counters for this job
779
- st.session_state.total_input_tokens = 0
780
- st.session_state.total_output_tokens = 0
781
-
782
- if llm_chain is None:
783
- with st.spinner("Setting up LLM..."):
784
- llm_chain = setup_llm()
785
-
786
- selected_candidates = []
787
-
788
- try:
789
- # Get job-specific data
790
- job_data = {
791
- "Company": job_row["Company"],
792
- "Role": job_row["Role"],
793
- "desc": job_row.get("One liner", ""),
794
- "Locations": job_row.get("Locations", ""),
795
- "Tech_Stack": job_row["Tech Stack"],
796
- "Industry": job_row.get("Industry", "")
797
- }
798
-
799
- # Find matching candidates for this job
800
- with st.spinner("Finding matching candidates based on tech stack..."):
801
- matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
802
-
803
- if not matching_candidates:
804
- st.warning("No candidates with matching tech stack found for this job.")
805
- return []
806
-
807
- st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
808
-
809
- # Create progress elements
810
- candidates_progress = st.progress(0)
811
- candidate_status = st.empty()
812
-
813
- # Process each candidate
814
- for i, candidate_data in enumerate(matching_candidates):
815
- # Update progress
816
- candidates_progress.progress((i + 1) / len(matching_candidates))
817
- candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
818
-
819
- # Process the candidate with the LLM
820
- response = call_llm(candidate_data, job_data, llm_chain)
821
-
822
- response_dict = {
823
- "Name": response["candidate_name"],
824
- "LinkedIn": response["candidate_url"],
825
- "summary": response["candidate_summary"],
826
- "Location": response["candidate_location"],
827
- "Fit Score": response["fit_score"],
828
- "justification": response["justification"],
829
- # Add back original candidate data for context
830
- "Educational Background": candidate_data.get("Degree & Education", ""),
831
- "Years of Experience": candidate_data.get("Years of Experience", ""),
832
- "Current Title & Company": candidate_data.get("Current Title & Company", "")
833
- }
834
-
835
- # Add to selected candidates if score is high enough
836
- if response["fit_score"] >= 8.8:
837
- selected_candidates.append(response_dict)
838
- st.markdown(response_dict)
839
- else:
840
- st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
841
-
842
- # Clear progress indicators
843
- candidates_progress.empty()
844
- candidate_status.empty()
845
-
846
- # Show results
847
- if selected_candidates:
848
- st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
849
- else:
850
- st.info("No candidates met the minimum fit score threshold for this job.")
851
-
852
- # Token usage is now displayed in display_job_selection when showing results
853
- return selected_candidates
854
-
855
- except Exception as e:
856
- st.error(f"Error processing job: {e}")
857
- return []
858
-
859
- def main():
860
- st.title("👨‍💻 Candidate Matching App")
861
-
862
- # Initialize session state
863
- if 'processed_jobs' not in st.session_state:
864
- st.session_state.processed_jobs = {}
865
-
866
- st.write("""
867
- This app matches job listings with candidate profiles based on tech stack and other criteria.
868
- Select a job to find matching candidates.
869
- """)
870
-
871
- # API Key input
872
- with st.sidebar:
873
- st.header("API Configuration")
874
- api_key = st.text_input("Enter OpenAI API Key", type="password")
875
- if api_key:
876
- os.environ["OPENAI_API_KEY"] = api_key
877
- st.success("API Key set!")
878
- else:
879
- st.warning("Please enter OpenAI API Key to use LLM features")
880
-
881
- # Show API key warning if not set
882
- SERVICE_ACCOUNT_FILE = 'synapse-recruitment-e94255ca76fd.json'
883
- SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
884
- creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
885
- gc = gspread.authorize(creds)
886
- job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
887
- candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
888
-
889
- if not api_key:
890
- st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
891
-
892
- if api_key:
893
- try:
894
- # Load data from Google Sheets
895
- job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
896
- job_data = job_worksheet.get_all_values()
897
- candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
898
- candidate_data = candidate_worksheet.get_all_values()
899
-
900
- # Convert to DataFrames
901
- jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
902
- candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
903
- candidates_df = candidates_df.fillna("Unknown")
904
-
905
- # Display data preview
906
- with st.expander("Preview uploaded data"):
907
- st.subheader("Jobs Data Preview")
908
- st.dataframe(jobs_df.head(3))
909
-
910
- st.subheader("Candidates Data Preview")
911
- st.dataframe(candidates_df.head(3))
912
-
913
- # Map column names if needed
914
- column_mapping = {
915
- "Full Name": "Full Name",
916
- "LinkedIn URL": "LinkedIn URL",
917
- "Current Title & Company": "Current Title & Company",
918
- "Years of Experience": "Years of Experience",
919
- "Degree & University": "Degree & University",
920
- "Key Tech Stack": "Key Tech Stack",
921
- "Key Highlights": "Key Highlights",
922
- "Location (from most recent experience)": "Location (from most recent experience)"
923
- }
924
-
925
- # Rename columns if they don't match expected
926
- candidates_df = candidates_df.rename(columns={
927
- col: mapping for col, mapping in column_mapping.items()
928
- if col in candidates_df.columns and col != mapping
929
- })
930
-
931
- # Now, instead of processing all jobs upfront, we'll display job selection
932
- # and only process the selected job when the user chooses it
933
- display_job_selection(jobs_df, candidates_df)
934
-
935
- except Exception as e:
936
- st.error(f"Error processing files: {e}")
937
-
938
- st.divider()
939
-
940
-
941
- def display_job_selection(jobs_df, candidates_df):
942
- # Store the LLM chain as a session state to avoid recreating it
943
- if 'llm_chain' not in st.session_state:
944
- st.session_state.llm_chain = None
945
-
946
- st.subheader("Select a job to view potential matches")
947
-
948
- # Create job options - but don't compute matches yet
949
- job_options = []
950
- for i, row in jobs_df.iterrows():
951
- job_options.append(f"{row['Role']} at {row['Company']}")
952
-
953
- if job_options:
954
- selected_job_index = st.selectbox("Jobs:",
955
- range(len(job_options)),
956
- format_func=lambda x: job_options[x])
957
-
958
- # Display job details
959
- job_row = jobs_df.iloc[selected_job_index]
960
-
961
- # Parse tech stack for display
962
- job_row_stack = parse_tech_stack(job_row["Tech Stack"])
963
-
964
- col1, col2 = st.columns([2, 1])
965
-
966
- with col1:
967
- st.subheader(f"Job Details: {job_row['Role']}")
968
-
969
- job_details = {
970
- "Company": job_row["Company"],
971
- "Role": job_row["Role"],
972
- "Description": job_row.get("One liner", "N/A"),
973
- "Locations": job_row.get("Locations", "N/A"),
974
- "Industry": job_row.get("Industry", "N/A"),
975
- "Tech Stack": display_tech_stack(job_row_stack)
976
- }
977
-
978
- for key, value in job_details.items():
979
- st.markdown(f"**{key}:** {value}")
980
-
981
- # Create a key for this job in session state
982
- job_key = f"job_{selected_job_index}_processed"
983
-
984
- if job_key not in st.session_state:
985
- st.session_state[job_key] = False
986
-
987
- # Add a process button for this job
988
- if not st.session_state[job_key]:
989
- if st.button(f"Find Matching Candidates for this Job"):
990
- if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
991
- st.error("Please enter your OpenAI API key in the sidebar before processing")
992
- else:
993
- # Process candidates for this job (only when requested)
994
- selected_candidates = process_candidates_for_job(
995
- job_row,
996
- candidates_df,
997
- st.session_state.llm_chain
998
- )
999
-
1000
- # Store the results and set as processed
1001
- if 'Selected_Candidates' not in st.session_state:
1002
- st.session_state.Selected_Candidates = {}
1003
- st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
1004
- st.session_state[job_key] = True
1005
-
1006
- # Store the LLM chain for reuse
1007
- if st.session_state.llm_chain is None:
1008
- st.session_state.llm_chain = setup_llm()
1009
-
1010
- # Force refresh
1011
- st.rerun()
1012
-
1013
- # Display selected candidates if already processed
1014
- if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
1015
- selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
1016
-
1017
- # Display selected candidates
1018
- st.subheader("Selected Candidates")
1019
-
1020
- # Display token usage statistics (will persist until job is changed)
1021
- if 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
1022
- display_token_usage()
1023
-
1024
- if len(selected_candidates) > 0:
1025
- for i, candidate in enumerate(selected_candidates):
1026
- with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
1027
- col1, col2 = st.columns([3, 1])
1028
-
1029
- with col1:
1030
- st.markdown(f"**Summary:** {candidate['summary']}")
1031
- st.markdown(f"**Current:** {candidate['Current Title & Company']}")
1032
- st.markdown(f"**Education:** {candidate['Educational Background']}")
1033
- st.markdown(f"**Experience:** {candidate['Years of Experience']}")
1034
- st.markdown(f"**Location:** {candidate['Location']}")
1035
- st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
1036
-
1037
- with col2:
1038
- st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
1039
-
1040
- st.markdown("**Justification:**")
1041
- st.info(candidate['justification'])
1042
- else:
1043
- st.info("No candidates met the minimum score threshold (8.8) for this job.")
1044
-
1045
- # We don't show tech-matched candidates here since they are generated
1046
- # during the LLM matching process now
1047
-
1048
- # Add a reset button to start over
1049
- if st.button("Reset and Process Again"):
1050
- # Don't reset token counters here - we want them to persist
1051
- st.session_state[job_key] = False
1052
- st.rerun()
1053
-
1054
- if __name__ == "__main__":
1055
  main()
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import json
4
+ # import os
5
+ # from pydantic import BaseModel, Field
6
+ # from typing import List, Set, Dict, Any, Optional
7
+ # import time
8
+ # from langchain_openai import ChatOpenAI
9
+ # from langchain_core.messages import HumanMessage
10
+ # from langchain_core.prompts import ChatPromptTemplate
11
+ # from langchain_core.output_parsers import StrOutputParser
12
+ # from langchain_core.prompts import PromptTemplate
13
+ # import gspread
14
+ # from google.oauth2 import service_account
15
+
16
+ # st.set_page_config(
17
+ # page_title="Candidate Matching App",
18
+ # page_icon="👨‍💻🎯",
19
+ # layout="wide"
20
+ # )
21
+
22
+ # # Define pydantic model for structured output
23
+ # class Shortlist(BaseModel):
24
+ # fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
+ # candidate_name: str = Field(description="The name of the candidate.")
26
+ # candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
+ # candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
+ # candidate_location: str = Field(description="The location of the candidate.")
29
+ # justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
+
31
+ # # Function to parse and normalize tech stacks
32
+ # def parse_tech_stack(stack):
33
+ # if pd.isna(stack) or stack == "" or stack is None:
34
+ # return set()
35
+ # if isinstance(stack, set):
36
+ # return stack
37
+ # try:
38
+ # # Handle potential string representation of sets
39
+ # if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
+ # # This could be a string representation of a set
41
+ # items = stack.strip("{}").split(",")
42
+ # return set(item.strip().strip("'\"") for item in items if item.strip())
43
+ # return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
+ # except Exception as e:
45
+ # st.error(f"Error parsing tech stack: {e}")
46
+ # return set()
47
+
48
+ # def display_tech_stack(stack_set):
49
+ # if isinstance(stack_set, set):
50
+ # return ", ".join(sorted(stack_set))
51
+ # return str(stack_set)
52
+
53
+ # def get_matching_candidates(job_stack, candidates_df):
54
+ # """Find candidates with matching tech stack for a specific job"""
55
+ # matched = []
56
+ # job_stack_set = parse_tech_stack(job_stack)
57
+
58
+ # for _, candidate in candidates_df.iterrows():
59
+ # candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
+ # common = job_stack_set & candidate_stack
61
+ # if len(common) >= 2:
62
+ # matched.append({
63
+ # "Name": candidate["Full Name"],
64
+ # "URL": candidate["LinkedIn URL"],
65
+ # "Degree & Education": candidate["Degree & University"],
66
+ # "Years of Experience": candidate["Years of Experience"],
67
+ # "Current Title & Company": candidate['Current Title & Company'],
68
+ # "Key Highlights": candidate["Key Highlights"],
69
+ # "Location": candidate["Location (from most recent experience)"],
70
+ # "Experience": str(candidate["Experience"]),
71
+ # "Tech Stack": candidate_stack
72
+ # })
73
+ # return matched
74
+
75
+ # def setup_llm():
76
+ # """Set up the LangChain LLM with structured output"""
77
+ # # Create LLM instance
78
+ # llm = ChatOpenAI(
79
+ # model="gpt-4o-mini",
80
+ # temperature=0,
81
+ # max_tokens=None,
82
+ # timeout=None,
83
+ # max_retries=2,
84
+ # )
85
+
86
+ # # Create structured output
87
+ # sum_llm = llm.with_structured_output(Shortlist)
88
+
89
+ # # Create system prompt
90
+ # system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
+ # the profile is according to job.
92
+ # Try to ensure following points while estimating the candidate's fit score:
93
+ # For education:
94
+ # Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
+ # Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
+ # Tier3 - Unknown or unranked institutions - Lower points or reject
97
+
98
+ # Startup Experience Requirement:
99
+ # Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
100
+ # preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
101
+
102
+ # The fit score signifies based on following metrics:
103
+ # 1–5 - Poor Fit - Auto-reject
104
+ # 6–7 - Weak Fit - Auto-reject
105
+ # 8.0–8.7 - Moderate Fit - Auto-reject
106
+ # 8.810 - STRONG Fit - Include in results
107
+ # """
108
+
109
+ # # Create query prompt
110
+ # query_prompt = ChatPromptTemplate.from_messages([
111
+ # ("system", system),
112
+ # ("human", """
113
+ # You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
114
+ # For this you will be provided with the follwing inputs of job and candidates:
115
+ # Job Details
116
+ # Company: {Company}
117
+ # Role: {Role}
118
+ # About Company: {desc}
119
+ # Locations: {Locations}
120
+ # Tech Stack: {Tech_Stack}
121
+ # Industry: {Industry}
122
+
123
+
124
+ # Candidate Details:
125
+ # Full Name: {Full_Name}
126
+ # LinkedIn URL: {LinkedIn_URL}
127
+ # Current Title & Company: {Current_Title_Company}
128
+ # Years of Experience: {Years_of_Experience}
129
+ # Degree & University: {Degree_University}
130
+ # Key Tech Stack: {Key_Tech_Stack}
131
+ # Key Highlights: {Key_Highlights}
132
+ # Location (from most recent experience): {cand_Location}
133
+ # Past_Experience: {Experience}
134
+
135
+
136
+ # Answer in the structured manner as per the schema.
137
+ # If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
138
+ # """),
139
+ # ])
140
+
141
+ # # Chain the prompt and LLM
142
+ # cat_class = query_prompt | sum_llm
143
+
144
+ # return cat_class
145
+
146
+ # def call_llm(candidate_data, job_data, llm_chain):
147
+ # """Call the actual LLM to evaluate the candidate"""
148
+ # try:
149
+ # # Convert tech stacks to strings for the LLM payload
150
+ # job_tech_stack = job_data.get("Tech_Stack", set())
151
+ # candidate_tech_stack = candidate_data.get("Tech Stack", set())
152
+
153
+ # if isinstance(job_tech_stack, set):
154
+ # job_tech_stack = ", ".join(sorted(job_tech_stack))
155
+
156
+ # if isinstance(candidate_tech_stack, set):
157
+ # candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
158
+
159
+ # # Prepare payload for LLM
160
+ # payload = {
161
+ # "Company": job_data.get("Company", ""),
162
+ # "Role": job_data.get("Role", ""),
163
+ # "desc": job_data.get("desc", ""),
164
+ # "Locations": job_data.get("Locations", ""),
165
+ # "Tech_Stack": job_tech_stack,
166
+ # "Industry": job_data.get("Industry", ""),
167
+
168
+ # "Full_Name": candidate_data.get("Name", ""),
169
+ # "LinkedIn_URL": candidate_data.get("URL", ""),
170
+ # "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
171
+ # "Years_of_Experience": candidate_data.get("Years of Experience", ""),
172
+ # "Degree_University": candidate_data.get("Degree & Education", ""),
173
+ # "Key_Tech_Stack": candidate_tech_stack,
174
+ # "Key_Highlights": candidate_data.get("Key Highlights", ""),
175
+ # "cand_Location": candidate_data.get("Location", ""),
176
+ # "Experience": candidate_data.get("Experience", "")
177
+ # }
178
+
179
+ # # Call LLM
180
+ # response = llm_chain.invoke(payload)
181
+ # print(candidate_data.get("Experience", ""))
182
+
183
+ # # Return response in expected format
184
+ # return {
185
+ # "candidate_name": response.candidate_name,
186
+ # "candidate_url": response.candidate_url,
187
+ # "candidate_summary": response.candidate_summary,
188
+ # "candidate_location": response.candidate_location,
189
+ # "fit_score": response.fit_score,
190
+ # "justification": response.justification
191
+ # }
192
+ # except Exception as e:
193
+ # st.error(f"Error calling LLM: {e}")
194
+ # # Fallback to a default response
195
+ # return {
196
+ # "candidate_name": candidate_data.get("Name", "Unknown"),
197
+ # "candidate_url": candidate_data.get("URL", ""),
198
+ # "candidate_summary": "Error processing candidate profile",
199
+ # "candidate_location": candidate_data.get("Location", "Unknown"),
200
+ # "fit_score": 0.0,
201
+ # "justification": f"Error in LLM processing: {str(e)}"
202
+ # }
203
+
204
+ # def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
205
+ # """Process candidates for a specific job using the LLM"""
206
+ # if llm_chain is None:
207
+ # with st.spinner("Setting up LLM..."):
208
+ # llm_chain = setup_llm()
209
+
210
+ # selected_candidates = []
211
+
212
+ # try:
213
+ # # Get job-specific data
214
+ # job_data = {
215
+ # "Company": job_row["Company"],
216
+ # "Role": job_row["Role"],
217
+ # "desc": job_row.get("One liner", ""),
218
+ # "Locations": job_row.get("Locations", ""),
219
+ # "Tech_Stack": job_row["Tech Stack"],
220
+ # "Industry": job_row.get("Industry", "")
221
+ # }
222
+
223
+ # # Find matching candidates for this job
224
+ # with st.spinner("Finding matching candidates based on tech stack..."):
225
+ # matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
226
+
227
+ # if not matching_candidates:
228
+ # st.warning("No candidates with matching tech stack found for this job.")
229
+ # return []
230
+
231
+ # st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
232
+
233
+ # # Create progress elements
234
+ # candidates_progress = st.progress(0)
235
+ # candidate_status = st.empty()
236
+
237
+ # # Process each candidate
238
+ # for i, candidate_data in enumerate(matching_candidates):
239
+ # # Update progress
240
+ # candidates_progress.progress((i + 1) / len(matching_candidates))
241
+ # candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
242
+
243
+ # # Process the candidate with the LLM
244
+ # response = call_llm(candidate_data, job_data, llm_chain)
245
+
246
+ # response_dict = {
247
+ # "Name": response["candidate_name"],
248
+ # "LinkedIn": response["candidate_url"],
249
+ # "summary": response["candidate_summary"],
250
+ # "Location": response["candidate_location"],
251
+ # "Fit Score": response["fit_score"],
252
+ # "justification": response["justification"],
253
+ # # Add back original candidate data for context
254
+ # "Educational Background": candidate_data.get("Degree & Education", ""),
255
+ # "Years of Experience": candidate_data.get("Years of Experience", ""),
256
+ # "Current Title & Company": candidate_data.get("Current Title & Company", "")
257
+ # }
258
+
259
+ # # Add to selected candidates if score is high enough
260
+ # if response["fit_score"] >= 8.8:
261
+ # selected_candidates.append(response_dict)
262
+ # st.markdown(response_dict)
263
+ # else:
264
+ # st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
265
+
266
+ # # Clear progress indicators
267
+ # candidates_progress.empty()
268
+ # candidate_status.empty()
269
+
270
+ # # Show results
271
+ # if selected_candidates:
272
+ # st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
273
+ # else:
274
+ # st.info("No candidates met the minimum fit score threshold for this job.")
275
+
276
+ # return selected_candidates
277
+
278
+ # except Exception as e:
279
+ # st.error(f"Error processing job: {e}")
280
+ # return []
281
+
282
+ # def main():
283
+ # st.title("👨‍💻 Candidate Matching App")
284
+
285
+ # # Initialize session state
286
+ # if 'processed_jobs' not in st.session_state:
287
+ # st.session_state.processed_jobs = {}
288
+
289
+ # st.write("""
290
+ # This app matches job listings with candidate profiles based on tech stack and other criteria.
291
+ # Select a job to find matching candidates.
292
+ # """)
293
+
294
+ # # API Key input
295
+ # with st.sidebar:
296
+ # st.header("API Configuration")
297
+ # api_key = st.text_input("Enter OpenAI API Key", type="password")
298
+ # if api_key:
299
+ # os.environ["OPENAI_API_KEY"] = api_key
300
+ # st.success("API Key set!")
301
+ # else:
302
+ # st.warning("Please enter OpenAI API Key to use LLM features")
303
+
304
+ # # Show API key warning if not set
305
+ # SERVICE_ACCOUNT_FILE = 'synapse-recruitment-e94255ca76fd.json'
306
+ # SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
307
+ # creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
308
+ # gc = gspread.authorize(creds)
309
+ # job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
310
+ # candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
311
+
312
+ # if not api_key:
313
+ # st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
314
+
315
+ # if api_key:
316
+ # try:
317
+ # # Load data from Google Sheets
318
+ # job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
319
+ # job_data = job_worksheet.get_all_values()
320
+ # candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
321
+ # candidate_data = candidate_worksheet.get_all_values()
322
+
323
+ # # Convert to DataFrames
324
+ # jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
325
+ # candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
326
+ # candidates_df = candidates_df.fillna("Unknown")
327
+
328
+ # # Display data preview
329
+ # with st.expander("Preview uploaded data"):
330
+ # st.subheader("Jobs Data Preview")
331
+ # st.dataframe(jobs_df.head(3))
332
+
333
+ # st.subheader("Candidates Data Preview")
334
+ # st.dataframe(candidates_df.head(3))
335
+
336
+ # # Map column names if needed
337
+ # column_mapping = {
338
+ # "Full Name": "Full Name",
339
+ # "LinkedIn URL": "LinkedIn URL",
340
+ # "Current Title & Company": "Current Title & Company",
341
+ # "Years of Experience": "Years of Experience",
342
+ # "Degree & University": "Degree & University",
343
+ # "Key Tech Stack": "Key Tech Stack",
344
+ # "Key Highlights": "Key Highlights",
345
+ # "Location (from most recent experience)": "Location (from most recent experience)"
346
+ # }
347
+
348
+ # # Rename columns if they don't match expected
349
+ # candidates_df = candidates_df.rename(columns={
350
+ # col: mapping for col, mapping in column_mapping.items()
351
+ # if col in candidates_df.columns and col != mapping
352
+ # })
353
+
354
+ # # Now, instead of processing all jobs upfront, we'll display job selection
355
+ # # and only process the selected job when the user chooses it
356
+ # display_job_selection(jobs_df, candidates_df)
357
+
358
+ # except Exception as e:
359
+ # st.error(f"Error processing files: {e}")
360
+
361
+ # st.divider()
362
+
363
+
364
+ # def display_job_selection(jobs_df, candidates_df):
365
+ # # Store the LLM chain as a session state to avoid recreating it
366
+ # if 'llm_chain' not in st.session_state:
367
+ # st.session_state.llm_chain = None
368
+
369
+ # st.subheader("Select a job to view potential matches")
370
+
371
+ # # Create job options - but don't compute matches yet
372
+ # job_options = []
373
+ # for i, row in jobs_df.iterrows():
374
+ # job_options.append(f"{row['Role']} at {row['Company']}")
375
+
376
+ # if job_options:
377
+ # selected_job_index = st.selectbox("Jobs:",
378
+ # range(len(job_options)),
379
+ # format_func=lambda x: job_options[x])
380
+
381
+ # # Display job details
382
+ # job_row = jobs_df.iloc[selected_job_index]
383
+
384
+ # # Parse tech stack for display
385
+ # job_row_stack = parse_tech_stack(job_row["Tech Stack"])
386
+
387
+ # col1, col2 = st.columns([2, 1])
388
+
389
+ # with col1:
390
+ # st.subheader(f"Job Details: {job_row['Role']}")
391
+
392
+ # job_details = {
393
+ # "Company": job_row["Company"],
394
+ # "Role": job_row["Role"],
395
+ # "Description": job_row.get("One liner", "N/A"),
396
+ # "Locations": job_row.get("Locations", "N/A"),
397
+ # "Industry": job_row.get("Industry", "N/A"),
398
+ # "Tech Stack": display_tech_stack(job_row_stack)
399
+ # }
400
+
401
+ # for key, value in job_details.items():
402
+ # st.markdown(f"**{key}:** {value}")
403
+
404
+ # # Create a key for this job in session state
405
+ # job_key = f"job_{selected_job_index}_processed"
406
+
407
+ # if job_key not in st.session_state:
408
+ # st.session_state[job_key] = False
409
+
410
+ # # Add a process button for this job
411
+ # if not st.session_state[job_key]:
412
+ # if st.button(f"Find Matching Candidates for this Job"):
413
+ # if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
414
+ # st.error("Please enter your OpenAI API key in the sidebar before processing")
415
+ # else:
416
+ # # Process candidates for this job (only when requested)
417
+ # selected_candidates = process_candidates_for_job(
418
+ # job_row,
419
+ # candidates_df,
420
+ # st.session_state.llm_chain
421
+ # )
422
+
423
+ # # Store the results and set as processed
424
+ # if 'Selected_Candidates' not in st.session_state:
425
+ # st.session_state.Selected_Candidates = {}
426
+ # st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
427
+ # st.session_state[job_key] = True
428
+
429
+ # # Store the LLM chain for reuse
430
+ # if st.session_state.llm_chain is None:
431
+ # st.session_state.llm_chain = setup_llm()
432
+
433
+ # # Force refresh
434
+ # st.rerun()
435
+
436
+ # # Display selected candidates if already processed
437
+ # if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
438
+ # selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
439
+
440
+ # # Display selected candidates
441
+ # st.subheader("Selected Candidates")
442
+
443
+ # if len(selected_candidates) > 0:
444
+ # for i, candidate in enumerate(selected_candidates):
445
+ # with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
446
+ # col1, col2 = st.columns([3, 1])
447
+
448
+ # with col1:
449
+ # st.markdown(f"**Summary:** {candidate['summary']}")
450
+ # st.markdown(f"**Current:** {candidate['Current Title & Company']}")
451
+ # st.markdown(f"**Education:** {candidate['Educational Background']}")
452
+ # st.markdown(f"**Experience:** {candidate['Years of Experience']}")
453
+ # st.markdown(f"**Location:** {candidate['Location']}")
454
+ # st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
455
+
456
+ # with col2:
457
+ # st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
458
+
459
+ # st.markdown("**Justification:**")
460
+ # st.info(candidate['justification'])
461
+ # else:
462
+ # st.info("No candidates met the minimum score threshold (8.8) for this job.")
463
+
464
+ # # We don't show tech-matched candidates here since they are generated
465
+ # # during the LLM matching process now
466
+
467
+ # # Add a reset button to start over
468
+ # if st.button("Reset and Process Again"):
469
+ # st.session_state[job_key] = False
470
+ # st.rerun()
471
+
472
+ # if __name__ == "__main__":
473
+ # main()
474
+
475
+
476
+
477
+ import streamlit as st
478
+ import pandas as pd
479
+ import json
480
+ import os
481
+ from pydantic import BaseModel, Field
482
+ from typing import List, Set, Dict, Any, Optional
483
+ import time
484
+ from langchain_openai import ChatOpenAI
485
+ from langchain_core.messages import HumanMessage
486
+ from langchain_core.prompts import ChatPromptTemplate
487
+ from langchain_core.output_parsers import StrOutputParser
488
+ from langchain_core.prompts import PromptTemplate
489
+ import gspread
490
+ from google.oauth2 import service_account
491
+ import tiktoken
492
+
493
+ st.set_page_config(
494
+ page_title="Candidate Matching App",
495
+ page_icon="👨‍💻🎯",
496
+ layout="wide"
497
+ )
498
+
499
+ # Define pydantic model for structured output
500
+ class Shortlist(BaseModel):
501
+ fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
502
+ candidate_name: str = Field(description="The name of the candidate.")
503
+ candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
504
+ candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
505
+ candidate_location: str = Field(description="The location of the candidate.")
506
+ justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
507
+
508
+ # Function to calculate tokens
509
+ def calculate_tokens(text, model="gpt-4o-mini"):
510
+ """Calculate the number of tokens in a given text for a specific model"""
511
+ try:
512
+ # Get the encoding for the model
513
+ if "gpt-4" in model:
514
+ encoding = tiktoken.encoding_for_model("gpt-4o-mini")
515
+ elif "gpt-3.5" in model:
516
+ encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
517
+ else:
518
+ encoding = tiktoken.get_encoding("cl100k_base") # Default for newer models
519
+
520
+ # Encode the text and return the token count
521
+ return len(encoding.encode(text))
522
+ except Exception as e:
523
+ # If there's an error, make a rough estimate (1 token ≈ 4 chars)
524
+ return len(text) // 4
525
+
526
+ # Function to display token usage
527
+ def display_token_usage():
528
+ """Display token usage statistics"""
529
+ if 'total_input_tokens' not in st.session_state:
530
+ st.session_state.total_input_tokens = 0
531
+ if 'total_output_tokens' not in st.session_state:
532
+ st.session_state.total_output_tokens = 0
533
+
534
+ total_input = st.session_state.total_input_tokens
535
+ total_output = st.session_state.total_output_tokens
536
+ total_tokens = total_input + total_output
537
+
538
+ # Estimate cost based on model
539
+ if st.session_state.model_name == "gpt-4o-mini":
540
+ input_cost_per_1k = 0.0003 # $0.0003 per 1K input tokens
541
+ output_cost_per_1k = 0.0006 # $$0.0006 per 1K output tokens
542
+ elif "gpt-4" in st.session_state.model_name:
543
+ input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
544
+ output_cost_per_1k = 0.60 # $0.60 per 1K output tokens
545
+ else: # Assume gpt-3.5-turbo pricing
546
+ input_cost_per_1k = 0.0015 # $0.0015 per 1K input tokens
547
+ output_cost_per_1k = 0.015 # $0.002 per 1K output tokens
548
+
549
+ estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
550
+
551
+ st.subheader("📊 Token Usage Statistics")
552
+
553
+ col1, col2, col3 = st.columns(3)
554
+
555
+ with col1:
556
+ st.metric("Input Tokens", f"{total_input:,}")
557
+
558
+ with col2:
559
+ st.metric("Output Tokens", f"{total_output:,}")
560
+
561
+ with col3:
562
+ st.metric("Total Tokens", f"{total_tokens:,}")
563
+
564
+ st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
565
+
566
+ return total_tokens
567
+
568
+ # Function to parse and normalize tech stacks
569
+ def parse_tech_stack(stack):
570
+ if pd.isna(stack) or stack == "" or stack is None:
571
+ return set()
572
+ if isinstance(stack, set):
573
+ return stack
574
+ try:
575
+ # Handle potential string representation of sets
576
+ if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
577
+ # This could be a string representation of a set
578
+ items = stack.strip("{}").split(",")
579
+ return set(item.strip().strip("'\"") for item in items if item.strip())
580
+ return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
581
+ except Exception as e:
582
+ st.error(f"Error parsing tech stack: {e}")
583
+ return set()
584
+
585
+ def display_tech_stack(stack_set):
586
+ if isinstance(stack_set, set):
587
+ return ", ".join(sorted(stack_set))
588
+ return str(stack_set)
589
+
590
+ def get_matching_candidates(job_stack, candidates_df):
591
+ """Find candidates with matching tech stack for a specific job"""
592
+ matched = []
593
+ job_stack_set = parse_tech_stack(job_stack)
594
+
595
+ for _, candidate in candidates_df.iterrows():
596
+ candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
597
+ common = job_stack_set & candidate_stack
598
+ if len(common) >= 2:
599
+ matched.append({
600
+ "Name": candidate["Full Name"],
601
+ "URL": candidate["LinkedIn URL"],
602
+ "Degree & Education": candidate["Degree & University"],
603
+ "Years of Experience": candidate["Years of Experience"],
604
+ "Current Title & Company": candidate['Current Title & Company'],
605
+ "Key Highlights": candidate["Key Highlights"],
606
+ "Location": candidate["Location (from most recent experience)"],
607
+ "Experience": str(candidate["Experience"]),
608
+ "Tech Stack": candidate_stack
609
+ })
610
+ return matched
611
+
612
+ def setup_llm():
613
+ """Set up the LangChain LLM with structured output"""
614
+ # Define the model to use
615
+ model_name = "gpt-4o-mini"
616
+
617
+ # Store model name in session state for token calculation
618
+ if 'model_name' not in st.session_state:
619
+ st.session_state.model_name = model_name
620
+
621
+ # Create LLM instance
622
+ llm = ChatOpenAI(
623
+ model=model_name,
624
+ temperature=0,
625
+ max_tokens=None,
626
+ timeout=None,
627
+ max_retries=2,
628
+ )
629
+
630
+ # Create structured output
631
+ sum_llm = llm.with_structured_output(Shortlist)
632
+
633
+ # Create system prompt
634
+ system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
635
+ the profile is according to job.
636
+ Try to ensure following points while estimating the candidate's fit score:
637
+ For education:
638
+ Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
639
+ Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
640
+ Tier3 - Unknown or unranked institutions - Lower points or reject
641
+
642
+ Startup Experience Requirement:
643
+ Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
644
+ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
645
+
646
+ The fit score signifies based on following metrics:
647
+ 1–5 - Poor Fit - Auto-reject
648
+ 6–7 - Weak Fit - Auto-reject
649
+ 8.0–8.7 - Moderate Fit - Auto-reject
650
+ 8.8–10 - STRONG Fit - Include in results
651
+ """
652
+
653
+ # Create query prompt
654
+ query_prompt = ChatPromptTemplate.from_messages([
655
+ ("system", system),
656
+ ("human", """
657
+ You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
658
+ For this you will be provided with the follwing inputs of job and candidates:
659
+ Job Details
660
+ Company: {Company}
661
+ Role: {Role}
662
+ About Company: {desc}
663
+ Locations: {Locations}
664
+ Tech Stack: {Tech_Stack}
665
+ Industry: {Industry}
666
+
667
+
668
+ Candidate Details:
669
+ Full Name: {Full_Name}
670
+ LinkedIn URL: {LinkedIn_URL}
671
+ Current Title & Company: {Current_Title_Company}
672
+ Years of Experience: {Years_of_Experience}
673
+ Degree & University: {Degree_University}
674
+ Key Tech Stack: {Key_Tech_Stack}
675
+ Key Highlights: {Key_Highlights}
676
+ Location (from most recent experience): {cand_Location}
677
+ Past_Experience: {Experience}
678
+
679
+
680
+ Answer in the structured manner as per the schema.
681
+ If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
682
+ """),
683
+ ])
684
+
685
+ # Chain the prompt and LLM
686
+ cat_class = query_prompt | sum_llm
687
+
688
+ return cat_class
689
+
690
+ def call_llm(candidate_data, job_data, llm_chain):
691
+ """Call the actual LLM to evaluate the candidate"""
692
+ try:
693
+ # Convert tech stacks to strings for the LLM payload
694
+ job_tech_stack = job_data.get("Tech_Stack", set())
695
+ candidate_tech_stack = candidate_data.get("Tech Stack", set())
696
+
697
+ if isinstance(job_tech_stack, set):
698
+ job_tech_stack = ", ".join(sorted(job_tech_stack))
699
+
700
+ if isinstance(candidate_tech_stack, set):
701
+ candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
702
+
703
+ # Prepare payload for LLM
704
+ payload = {
705
+ "Company": job_data.get("Company", ""),
706
+ "Role": job_data.get("Role", ""),
707
+ "desc": job_data.get("desc", ""),
708
+ "Locations": job_data.get("Locations", ""),
709
+ "Tech_Stack": job_tech_stack,
710
+ "Industry": job_data.get("Industry", ""),
711
+
712
+ "Full_Name": candidate_data.get("Name", ""),
713
+ "LinkedIn_URL": candidate_data.get("URL", ""),
714
+ "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
715
+ "Years_of_Experience": candidate_data.get("Years of Experience", ""),
716
+ "Degree_University": candidate_data.get("Degree & Education", ""),
717
+ "Key_Tech_Stack": candidate_tech_stack,
718
+ "Key_Highlights": candidate_data.get("Key Highlights", ""),
719
+ "cand_Location": candidate_data.get("Location", ""),
720
+ "Experience": candidate_data.get("Experience", "")
721
+ }
722
+
723
+ # Convert payload to a string for token calculation
724
+ payload_str = json.dumps(payload)
725
+
726
+ # Calculate input tokens
727
+ input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
728
+
729
+ # Call LLM
730
+ response = llm_chain.invoke(payload)
731
+ print(candidate_data.get("Experience", ""))
732
+
733
+ # Convert response to string for token calculation
734
+ response_str = f"""
735
+ candidate_name: {response.candidate_name}
736
+ candidate_url: {response.candidate_url}
737
+ candidate_summary: {response.candidate_summary}
738
+ candidate_location: {response.candidate_location}
739
+ fit_score: {response.fit_score}
740
+ justification: {response.justification}
741
+ """
742
+
743
+ # Calculate output tokens
744
+ output_tokens = calculate_tokens(response_str, st.session_state.model_name)
745
+
746
+ # Update token counts in session state
747
+ if 'total_input_tokens' not in st.session_state:
748
+ st.session_state.total_input_tokens = 0
749
+ if 'total_output_tokens' not in st.session_state:
750
+ st.session_state.total_output_tokens = 0
751
+
752
+ st.session_state.total_input_tokens += input_tokens
753
+ st.session_state.total_output_tokens += output_tokens
754
+
755
+ # Return response in expected format
756
+ return {
757
+ "candidate_name": response.candidate_name,
758
+ "candidate_url": response.candidate_url,
759
+ "candidate_summary": response.candidate_summary,
760
+ "candidate_location": response.candidate_location,
761
+ "fit_score": response.fit_score,
762
+ "justification": response.justification
763
+ }
764
+ except Exception as e:
765
+ st.error(f"Error calling LLM: {e}")
766
+ # Fallback to a default response
767
+ return {
768
+ "candidate_name": candidate_data.get("Name", "Unknown"),
769
+ "candidate_url": candidate_data.get("URL", ""),
770
+ "candidate_summary": "Error processing candidate profile",
771
+ "candidate_location": candidate_data.get("Location", "Unknown"),
772
+ "fit_score": 0.0,
773
+ "justification": f"Error in LLM processing: {str(e)}"
774
+ }
775
+
776
+ def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
777
+ """Process candidates for a specific job using the LLM"""
778
+ # Reset token counters for this job
779
+ st.session_state.total_input_tokens = 0
780
+ st.session_state.total_output_tokens = 0
781
+
782
+ if llm_chain is None:
783
+ with st.spinner("Setting up LLM..."):
784
+ llm_chain = setup_llm()
785
+
786
+ selected_candidates = []
787
+
788
+ try:
789
+ # Get job-specific data
790
+ job_data = {
791
+ "Company": job_row["Company"],
792
+ "Role": job_row["Role"],
793
+ "desc": job_row.get("One liner", ""),
794
+ "Locations": job_row.get("Locations", ""),
795
+ "Tech_Stack": job_row["Tech Stack"],
796
+ "Industry": job_row.get("Industry", "")
797
+ }
798
+
799
+ # Find matching candidates for this job
800
+ with st.spinner("Finding matching candidates based on tech stack..."):
801
+ matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
802
+
803
+ if not matching_candidates:
804
+ st.warning("No candidates with matching tech stack found for this job.")
805
+ return []
806
+
807
+ st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
808
+
809
+ # Create progress elements
810
+ candidates_progress = st.progress(0)
811
+ candidate_status = st.empty()
812
+
813
+ # Process each candidate
814
+ for i, candidate_data in enumerate(matching_candidates):
815
+ # Update progress
816
+ candidates_progress.progress((i + 1) / len(matching_candidates))
817
+ candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
818
+
819
+ # Process the candidate with the LLM
820
+ response = call_llm(candidate_data, job_data, llm_chain)
821
+
822
+ response_dict = {
823
+ "Name": response["candidate_name"],
824
+ "LinkedIn": response["candidate_url"],
825
+ "summary": response["candidate_summary"],
826
+ "Location": response["candidate_location"],
827
+ "Fit Score": response["fit_score"],
828
+ "justification": response["justification"],
829
+ # Add back original candidate data for context
830
+ "Educational Background": candidate_data.get("Degree & Education", ""),
831
+ "Years of Experience": candidate_data.get("Years of Experience", ""),
832
+ "Current Title & Company": candidate_data.get("Current Title & Company", "")
833
+ }
834
+
835
+ # Add to selected candidates if score is high enough
836
+ if response["fit_score"] >= 8.8:
837
+ selected_candidates.append(response_dict)
838
+ st.markdown(response_dict)
839
+ else:
840
+ st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
841
+
842
+ # Clear progress indicators
843
+ candidates_progress.empty()
844
+ candidate_status.empty()
845
+
846
+ # Show results
847
+ if selected_candidates:
848
+ st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
849
+ else:
850
+ st.info("No candidates met the minimum fit score threshold for this job.")
851
+
852
+ # Token usage is now displayed in display_job_selection when showing results
853
+ return selected_candidates
854
+
855
+ except Exception as e:
856
+ st.error(f"Error processing job: {e}")
857
+ return []
858
+
859
+ def main():
860
+ st.title("👨‍💻 Candidate Matching App")
861
+
862
+ # Initialize session state
863
+ if 'processed_jobs' not in st.session_state:
864
+ st.session_state.processed_jobs = {}
865
+
866
+ st.write("""
867
+ This app matches job listings with candidate profiles based on tech stack and other criteria.
868
+ Select a job to find matching candidates.
869
+ """)
870
+
871
+ # API Key input
872
+ with st.sidebar:
873
+ st.header("API Configuration")
874
+ api_key = st.text_input("Enter OpenAI API Key", type="password")
875
+ if api_key:
876
+ os.environ["OPENAI_API_KEY"] = api_key
877
+ st.success("API Key set!")
878
+ else:
879
+ st.warning("Please enter OpenAI API Key to use LLM features")
880
+
881
+ # Show API key warning if not set
882
+ SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json'
883
+ SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
884
+ creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
885
+ gc = gspread.authorize(creds)
886
+ job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
887
+ candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
888
+
889
+ if not api_key:
890
+ st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
891
+
892
+ if api_key:
893
+ try:
894
+ # Load data from Google Sheets
895
+ job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
896
+ job_data = job_worksheet.get_all_values()
897
+ candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
898
+ candidate_data = candidate_worksheet.get_all_values()
899
+
900
+ # Convert to DataFrames
901
+ jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
902
+ candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
903
+ candidates_df = candidates_df.fillna("Unknown")
904
+
905
+ # Display data preview
906
+ with st.expander("Preview uploaded data"):
907
+ st.subheader("Jobs Data Preview")
908
+ st.dataframe(jobs_df.head(3))
909
+
910
+ st.subheader("Candidates Data Preview")
911
+ st.dataframe(candidates_df.head(3))
912
+
913
+ # Map column names if needed
914
+ column_mapping = {
915
+ "Full Name": "Full Name",
916
+ "LinkedIn URL": "LinkedIn URL",
917
+ "Current Title & Company": "Current Title & Company",
918
+ "Years of Experience": "Years of Experience",
919
+ "Degree & University": "Degree & University",
920
+ "Key Tech Stack": "Key Tech Stack",
921
+ "Key Highlights": "Key Highlights",
922
+ "Location (from most recent experience)": "Location (from most recent experience)"
923
+ }
924
+
925
+ # Rename columns if they don't match expected
926
+ candidates_df = candidates_df.rename(columns={
927
+ col: mapping for col, mapping in column_mapping.items()
928
+ if col in candidates_df.columns and col != mapping
929
+ })
930
+
931
+ # Now, instead of processing all jobs upfront, we'll display job selection
932
+ # and only process the selected job when the user chooses it
933
+ display_job_selection(jobs_df, candidates_df)
934
+
935
+ except Exception as e:
936
+ st.error(f"Error processing files: {e}")
937
+
938
+ st.divider()
939
+
940
+
941
+ def display_job_selection(jobs_df, candidates_df):
942
+ # Store the LLM chain as a session state to avoid recreating it
943
+ if 'llm_chain' not in st.session_state:
944
+ st.session_state.llm_chain = None
945
+
946
+ st.subheader("Select a job to view potential matches")
947
+
948
+ # Create job options - but don't compute matches yet
949
+ job_options = []
950
+ for i, row in jobs_df.iterrows():
951
+ job_options.append(f"{row['Role']} at {row['Company']}")
952
+
953
+ if job_options:
954
+ selected_job_index = st.selectbox("Jobs:",
955
+ range(len(job_options)),
956
+ format_func=lambda x: job_options[x])
957
+
958
+ # Display job details
959
+ job_row = jobs_df.iloc[selected_job_index]
960
+
961
+ # Parse tech stack for display
962
+ job_row_stack = parse_tech_stack(job_row["Tech Stack"])
963
+
964
+ col1, col2 = st.columns([2, 1])
965
+
966
+ with col1:
967
+ st.subheader(f"Job Details: {job_row['Role']}")
968
+
969
+ job_details = {
970
+ "Company": job_row["Company"],
971
+ "Role": job_row["Role"],
972
+ "Description": job_row.get("One liner", "N/A"),
973
+ "Locations": job_row.get("Locations", "N/A"),
974
+ "Industry": job_row.get("Industry", "N/A"),
975
+ "Tech Stack": display_tech_stack(job_row_stack)
976
+ }
977
+
978
+ for key, value in job_details.items():
979
+ st.markdown(f"**{key}:** {value}")
980
+
981
+ # Create a key for this job in session state
982
+ job_key = f"job_{selected_job_index}_processed"
983
+
984
+ if job_key not in st.session_state:
985
+ st.session_state[job_key] = False
986
+
987
+ # Add a process button for this job
988
+ if not st.session_state[job_key]:
989
+ if st.button(f"Find Matching Candidates for this Job"):
990
+ if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
991
+ st.error("Please enter your OpenAI API key in the sidebar before processing")
992
+ else:
993
+ # Process candidates for this job (only when requested)
994
+ selected_candidates = process_candidates_for_job(
995
+ job_row,
996
+ candidates_df,
997
+ st.session_state.llm_chain
998
+ )
999
+
1000
+ # Store the results and set as processed
1001
+ if 'Selected_Candidates' not in st.session_state:
1002
+ st.session_state.Selected_Candidates = {}
1003
+ st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
1004
+ st.session_state[job_key] = True
1005
+
1006
+ # Store the LLM chain for reuse
1007
+ if st.session_state.llm_chain is None:
1008
+ st.session_state.llm_chain = setup_llm()
1009
+
1010
+ # Force refresh
1011
+ st.rerun()
1012
+
1013
+ # Display selected candidates if already processed
1014
+ if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
1015
+ selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
1016
+
1017
+ # Display selected candidates
1018
+ st.subheader("Selected Candidates")
1019
+
1020
+ # Display token usage statistics (will persist until job is changed)
1021
+ if 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
1022
+ display_token_usage()
1023
+
1024
+ if len(selected_candidates) > 0:
1025
+ for i, candidate in enumerate(selected_candidates):
1026
+ with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
1027
+ col1, col2 = st.columns([3, 1])
1028
+
1029
+ with col1:
1030
+ st.markdown(f"**Summary:** {candidate['summary']}")
1031
+ st.markdown(f"**Current:** {candidate['Current Title & Company']}")
1032
+ st.markdown(f"**Education:** {candidate['Educational Background']}")
1033
+ st.markdown(f"**Experience:** {candidate['Years of Experience']}")
1034
+ st.markdown(f"**Location:** {candidate['Location']}")
1035
+ st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
1036
+
1037
+ with col2:
1038
+ st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
1039
+
1040
+ st.markdown("**Justification:**")
1041
+ st.info(candidate['justification'])
1042
+ else:
1043
+ st.info("No candidates met the minimum score threshold (8.8) for this job.")
1044
+
1045
+ # We don't show tech-matched candidates here since they are generated
1046
+ # during the LLM matching process now
1047
+
1048
+ # Add a reset button to start over
1049
+ if st.button("Reset and Process Again"):
1050
+ # Don't reset token counters here - we want them to persist
1051
+ st.session_state[job_key] = False
1052
+ st.rerun()
1053
+
1054
+ if __name__ == "__main__":
1055
  main()