ak0601 commited on
Commit
66cf2b6
·
verified ·
1 Parent(s): 766675c

Update src/app_job_copy_1.py

Browse files
Files changed (1) hide show
  1. src/app_job_copy_1.py +686 -47
src/app_job_copy_1.py CHANGED
@@ -1,3 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import json
@@ -22,7 +604,7 @@ st.set_page_config(
22
 
23
  # Define pydantic model for structured output
24
  class Shortlist(BaseModel):
25
- fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
26
  candidate_name: str = Field(description="The name of the candidate.")
27
  candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
28
  candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
@@ -145,7 +727,7 @@ def setup_llm():
145
  # Create LLM instance
146
  llm = ChatOpenAI(
147
  model=model_name,
148
- temperature=0,
149
  max_tokens=None,
150
  timeout=None,
151
  max_retries=2,
@@ -163,6 +745,7 @@ Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi
163
  Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
164
  Tier3 - Unknown or unranked institutions - Lower points or reject
165
 
 
166
  Startup Experience Requirement:
167
  Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
168
  preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
@@ -172,13 +755,17 @@ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,B
172
  6–7 - Weak Fit - Auto-reject
173
  8.0–8.7 - Moderate Fit - Auto-reject
174
  8.8–10 - STRONG Fit - Include in results
 
 
175
  """
176
 
177
  # Create query prompt
178
  query_prompt = ChatPromptTemplate.from_messages([
179
  ("system", system),
180
  ("human", """
181
- You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
 
 
182
  For this you will be provided with the follwing inputs of job and candidates:
183
  Job Details
184
  Company: {Company}
@@ -203,6 +790,7 @@ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,B
203
 
204
  Answer in the structured manner as per the schema.
205
  If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
 
206
  """),
207
  ])
208
 
@@ -260,7 +848,7 @@ def call_llm(candidate_data, job_data, llm_chain):
260
  candidate_url: {response.candidate_url}
261
  candidate_summary: {response.candidate_summary}
262
  candidate_location: {response.candidate_location}
263
- fit_score: {response.fit_score}
264
  justification: {response.justification}
265
  """
266
 
@@ -348,7 +936,7 @@ def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
348
  "LinkedIn": response["candidate_url"],
349
  "summary": response["candidate_summary"],
350
  "Location": response["candidate_location"],
351
- "Fit Score": response["fit_score"],
352
  "justification": response["justification"],
353
  # Add back original candidate data for context
354
  "Educational Background": candidate_data.get("Degree & Education", ""),
@@ -357,7 +945,7 @@ def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
357
  }
358
 
359
  # Add to selected candidates if score is high enough
360
- if response["fit_score"] >= 8.8:
361
  selected_candidates.append(response_dict)
362
  st.markdown(response_dict)
363
  else:
@@ -454,7 +1042,7 @@ def main():
454
 
455
  # Now, instead of processing all jobs upfront, we'll display job selection
456
  # and only process the selected job when the user chooses it
457
- display_job_selection(jobs_df, candidates_df)
458
 
459
  except Exception as e:
460
  st.error(f"Error processing files: {e}")
@@ -462,14 +1050,16 @@ def main():
462
  st.divider()
463
 
464
 
465
- def display_job_selection(jobs_df, candidates_df):
466
- # Store the LLM chain as a session state to avoid recreating it
 
 
467
  if 'llm_chain' not in st.session_state:
468
- st.session_state.llm_chain = None
469
 
470
  st.subheader("Select a job to view potential matches")
471
 
472
- # Create job options - but don't compute matches yet
473
  job_options = []
474
  for i, row in jobs_df.iterrows():
475
  job_options.append(f"{row['Role']} at {row['Company']}")
@@ -508,6 +1098,25 @@ def display_job_selection(jobs_df, candidates_df):
508
  if job_key not in st.session_state:
509
  st.session_state[job_key] = False
510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  # Add a process button for this job
512
  if not st.session_state[job_key]:
513
  if st.button(f"Find Matching Candidates for this Job"):
@@ -515,65 +1124,95 @@ def display_job_selection(jobs_df, candidates_df):
515
  st.error("Please enter your OpenAI API key in the sidebar before processing")
516
  else:
517
  # Process candidates for this job (only when requested)
518
- selected_candidates = process_candidates_for_job(
519
- job_row,
520
- candidates_df,
521
- st.session_state.llm_chain
522
- )
 
 
523
 
524
- # Store the results and set as processed
525
- if 'Selected_Candidates' not in st.session_state:
526
- st.session_state.Selected_Candidates = {}
527
- st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
528
- st.session_state[job_key] = True
529
-
530
- # Store the LLM chain for reuse
531
- if st.session_state.llm_chain is None:
532
- st.session_state.llm_chain = setup_llm()
533
-
534
- # Force refresh
535
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
  # Display selected candidates if already processed
538
- if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
539
- selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  # Display selected candidates
542
  st.subheader("Selected Candidates")
543
 
544
- # Display token usage statistics (will persist until job is changed)
545
- if 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
546
  display_token_usage()
547
 
548
  if len(selected_candidates) > 0:
549
  for i, candidate in enumerate(selected_candidates):
550
- with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
551
  col1, col2 = st.columns([3, 1])
552
 
553
  with col1:
554
- st.markdown(f"**Summary:** {candidate['summary']}")
555
- st.markdown(f"**Current:** {candidate['Current Title & Company']}")
556
- st.markdown(f"**Education:** {candidate['Educational Background']}")
557
- st.markdown(f"**Experience:** {candidate['Years of Experience']}")
558
- st.markdown(f"**Location:** {candidate['Location']}")
559
- st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
 
560
 
561
  with col2:
562
- st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
 
563
 
564
- st.markdown("**Justification:**")
565
- st.info(candidate['justification'])
 
566
  else:
567
- st.info("No candidates met the minimum score threshold (8.8) for this job.")
568
-
569
- # We don't show tech-matched candidates here since they are generated
570
- # during the LLM matching process now
571
 
572
  # Add a reset button to start over
573
  if st.button("Reset and Process Again"):
574
- # Don't reset token counters here - we want them to persist
575
  st.session_state[job_key] = False
 
 
576
  st.rerun()
577
 
 
578
  if __name__ == "__main__":
579
  main()
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import json
4
+ # import os
5
+ # from pydantic import BaseModel, Field
6
+ # from typing import List, Set, Dict, Any, Optional
7
+ # import time
8
+ # from langchain_openai import ChatOpenAI
9
+ # from langchain_core.messages import HumanMessage
10
+ # from langchain_core.prompts import ChatPromptTemplate
11
+ # from langchain_core.output_parsers import StrOutputParser
12
+ # from langchain_core.prompts import PromptTemplate
13
+ # import gspread
14
+ # from google.oauth2 import service_account
15
+ # import tiktoken
16
+
17
+ # st.set_page_config(
18
+ # page_title="Candidate Matching App",
19
+ # page_icon="👨‍💻🎯",
20
+ # layout="wide"
21
+ # )
22
+
23
+ # # Define pydantic model for structured output
24
+ # class Shortlist(BaseModel):
25
+ # fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
26
+ # candidate_name: str = Field(description="The name of the candidate.")
27
+ # candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
28
+ # candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
29
+ # candidate_location: str = Field(description="The location of the candidate.")
30
+ # justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
31
+
32
+ # # Function to calculate tokens
33
+ # def calculate_tokens(text, model="gpt-4o-mini"):
34
+ # """Calculate the number of tokens in a given text for a specific model"""
35
+ # try:
36
+ # # Get the encoding for the model
37
+ # if "gpt-4" in model:
38
+ # encoding = tiktoken.encoding_for_model("gpt-4o-mini")
39
+ # elif "gpt-3.5" in model:
40
+ # encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
41
+ # else:
42
+ # encoding = tiktoken.get_encoding("cl100k_base") # Default for newer models
43
+
44
+ # # Encode the text and return the token count
45
+ # return len(encoding.encode(text))
46
+ # except Exception as e:
47
+ # # If there's an error, make a rough estimate (1 token ≈ 4 chars)
48
+ # return len(text) // 4
49
+
50
+ # # Function to display token usage
51
+ # def display_token_usage():
52
+ # """Display token usage statistics"""
53
+ # if 'total_input_tokens' not in st.session_state:
54
+ # st.session_state.total_input_tokens = 0
55
+ # if 'total_output_tokens' not in st.session_state:
56
+ # st.session_state.total_output_tokens = 0
57
+
58
+ # total_input = st.session_state.total_input_tokens
59
+ # total_output = st.session_state.total_output_tokens
60
+ # total_tokens = total_input + total_output
61
+
62
+ # # Estimate cost based on model
63
+ # if st.session_state.model_name == "gpt-4o-mini":
64
+ # input_cost_per_1k = 0.0003 # $0.0003 per 1K input tokens
65
+ # output_cost_per_1k = 0.0006 # $$0.0006 per 1K output tokens
66
+ # elif "gpt-4" in st.session_state.model_name:
67
+ # input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
68
+ # output_cost_per_1k = 0.60 # $0.60 per 1K output tokens
69
+ # else: # Assume gpt-3.5-turbo pricing
70
+ # input_cost_per_1k = 0.0015 # $0.0015 per 1K input tokens
71
+ # output_cost_per_1k = 0.015 # $0.002 per 1K output tokens
72
+
73
+ # estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
74
+
75
+ # st.subheader("📊 Token Usage Statistics")
76
+
77
+ # col1, col2, col3 = st.columns(3)
78
+
79
+ # with col1:
80
+ # st.metric("Input Tokens", f"{total_input:,}")
81
+
82
+ # with col2:
83
+ # st.metric("Output Tokens", f"{total_output:,}")
84
+
85
+ # with col3:
86
+ # st.metric("Total Tokens", f"{total_tokens:,}")
87
+
88
+ # st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
89
+
90
+ # return total_tokens
91
+
92
+ # # Function to parse and normalize tech stacks
93
+ # def parse_tech_stack(stack):
94
+ # if pd.isna(stack) or stack == "" or stack is None:
95
+ # return set()
96
+ # if isinstance(stack, set):
97
+ # return stack
98
+ # try:
99
+ # # Handle potential string representation of sets
100
+ # if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
101
+ # # This could be a string representation of a set
102
+ # items = stack.strip("{}").split(",")
103
+ # return set(item.strip().strip("'\"") for item in items if item.strip())
104
+ # return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
105
+ # except Exception as e:
106
+ # st.error(f"Error parsing tech stack: {e}")
107
+ # return set()
108
+
109
+ # def display_tech_stack(stack_set):
110
+ # if isinstance(stack_set, set):
111
+ # return ", ".join(sorted(stack_set))
112
+ # return str(stack_set)
113
+
114
+ # def get_matching_candidates(job_stack, candidates_df):
115
+ # """Find candidates with matching tech stack for a specific job"""
116
+ # matched = []
117
+ # job_stack_set = parse_tech_stack(job_stack)
118
+
119
+ # for _, candidate in candidates_df.iterrows():
120
+ # candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
121
+ # common = job_stack_set & candidate_stack
122
+ # if len(common) >= 2:
123
+ # matched.append({
124
+ # "Name": candidate["Full Name"],
125
+ # "URL": candidate["LinkedIn URL"],
126
+ # "Degree & Education": candidate["Degree & University"],
127
+ # "Years of Experience": candidate["Years of Experience"],
128
+ # "Current Title & Company": candidate['Current Title & Company'],
129
+ # "Key Highlights": candidate["Key Highlights"],
130
+ # "Location": candidate["Location (from most recent experience)"],
131
+ # "Experience": str(candidate["Experience"]),
132
+ # "Tech Stack": candidate_stack
133
+ # })
134
+ # return matched
135
+
136
+ # def setup_llm():
137
+ # """Set up the LangChain LLM with structured output"""
138
+ # # Define the model to use
139
+ # model_name = "gpt-4o-mini"
140
+
141
+ # # Store model name in session state for token calculation
142
+ # if 'model_name' not in st.session_state:
143
+ # st.session_state.model_name = model_name
144
+
145
+ # # Create LLM instance
146
+ # llm = ChatOpenAI(
147
+ # model=model_name,
148
+ # temperature=0,
149
+ # max_tokens=None,
150
+ # timeout=None,
151
+ # max_retries=2,
152
+ # )
153
+
154
+ # # Create structured output
155
+ # sum_llm = llm.with_structured_output(Shortlist)
156
+
157
+ # # Create system prompt
158
+ # system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
159
+ # the profile is according to job.
160
+ # Try to ensure following points while estimating the candidate's fit score:
161
+ # For education:
162
+ # Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
163
+ # Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
164
+ # Tier3 - Unknown or unranked institutions - Lower points or reject
165
+
166
+ # Startup Experience Requirement:
167
+ # Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
168
+ # preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
169
+
170
+ # The fit score signifies based on following metrics:
171
+ # 1–5 - Poor Fit - Auto-reject
172
+ # 6–7 - Weak Fit - Auto-reject
173
+ # 8.0–8.7 - Moderate Fit - Auto-reject
174
+ # 8.8–10 - STRONG Fit - Include in results
175
+ # """
176
+
177
+ # # Create query prompt
178
+ # query_prompt = ChatPromptTemplate.from_messages([
179
+ # ("system", system),
180
+ # ("human", """
181
+ # You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
182
+ # For this you will be provided with the follwing inputs of job and candidates:
183
+ # Job Details
184
+ # Company: {Company}
185
+ # Role: {Role}
186
+ # About Company: {desc}
187
+ # Locations: {Locations}
188
+ # Tech Stack: {Tech_Stack}
189
+ # Industry: {Industry}
190
+
191
+
192
+ # Candidate Details:
193
+ # Full Name: {Full_Name}
194
+ # LinkedIn URL: {LinkedIn_URL}
195
+ # Current Title & Company: {Current_Title_Company}
196
+ # Years of Experience: {Years_of_Experience}
197
+ # Degree & University: {Degree_University}
198
+ # Key Tech Stack: {Key_Tech_Stack}
199
+ # Key Highlights: {Key_Highlights}
200
+ # Location (from most recent experience): {cand_Location}
201
+ # Past_Experience: {Experience}
202
+
203
+
204
+ # Answer in the structured manner as per the schema.
205
+ # If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
206
+ # """),
207
+ # ])
208
+
209
+ # # Chain the prompt and LLM
210
+ # cat_class = query_prompt | sum_llm
211
+
212
+ # return cat_class
213
+
214
+ # def call_llm(candidate_data, job_data, llm_chain):
215
+ # """Call the actual LLM to evaluate the candidate"""
216
+ # try:
217
+ # # Convert tech stacks to strings for the LLM payload
218
+ # job_tech_stack = job_data.get("Tech_Stack", set())
219
+ # candidate_tech_stack = candidate_data.get("Tech Stack", set())
220
+
221
+ # if isinstance(job_tech_stack, set):
222
+ # job_tech_stack = ", ".join(sorted(job_tech_stack))
223
+
224
+ # if isinstance(candidate_tech_stack, set):
225
+ # candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
226
+
227
+ # # Prepare payload for LLM
228
+ # payload = {
229
+ # "Company": job_data.get("Company", ""),
230
+ # "Role": job_data.get("Role", ""),
231
+ # "desc": job_data.get("desc", ""),
232
+ # "Locations": job_data.get("Locations", ""),
233
+ # "Tech_Stack": job_tech_stack,
234
+ # "Industry": job_data.get("Industry", ""),
235
+
236
+ # "Full_Name": candidate_data.get("Name", ""),
237
+ # "LinkedIn_URL": candidate_data.get("URL", ""),
238
+ # "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
239
+ # "Years_of_Experience": candidate_data.get("Years of Experience", ""),
240
+ # "Degree_University": candidate_data.get("Degree & Education", ""),
241
+ # "Key_Tech_Stack": candidate_tech_stack,
242
+ # "Key_Highlights": candidate_data.get("Key Highlights", ""),
243
+ # "cand_Location": candidate_data.get("Location", ""),
244
+ # "Experience": candidate_data.get("Experience", "")
245
+ # }
246
+
247
+ # # Convert payload to a string for token calculation
248
+ # payload_str = json.dumps(payload)
249
+
250
+ # # Calculate input tokens
251
+ # input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
252
+
253
+ # # Call LLM
254
+ # response = llm_chain.invoke(payload)
255
+ # print(candidate_data.get("Experience", ""))
256
+
257
+ # # Convert response to string for token calculation
258
+ # response_str = f"""
259
+ # candidate_name: {response.candidate_name}
260
+ # candidate_url: {response.candidate_url}
261
+ # candidate_summary: {response.candidate_summary}
262
+ # candidate_location: {response.candidate_location}
263
+ # fit_score: {response.fit_score}
264
+ # justification: {response.justification}
265
+ # """
266
+
267
+ # # Calculate output tokens
268
+ # output_tokens = calculate_tokens(response_str, st.session_state.model_name)
269
+
270
+ # # Update token counts in session state
271
+ # if 'total_input_tokens' not in st.session_state:
272
+ # st.session_state.total_input_tokens = 0
273
+ # if 'total_output_tokens' not in st.session_state:
274
+ # st.session_state.total_output_tokens = 0
275
+
276
+ # st.session_state.total_input_tokens += input_tokens
277
+ # st.session_state.total_output_tokens += output_tokens
278
+
279
+ # # Return response in expected format
280
+ # return {
281
+ # "candidate_name": response.candidate_name,
282
+ # "candidate_url": response.candidate_url,
283
+ # "candidate_summary": response.candidate_summary,
284
+ # "candidate_location": response.candidate_location,
285
+ # "fit_score": response.fit_score,
286
+ # "justification": response.justification
287
+ # }
288
+ # except Exception as e:
289
+ # st.error(f"Error calling LLM: {e}")
290
+ # # Fallback to a default response
291
+ # return {
292
+ # "candidate_name": candidate_data.get("Name", "Unknown"),
293
+ # "candidate_url": candidate_data.get("URL", ""),
294
+ # "candidate_summary": "Error processing candidate profile",
295
+ # "candidate_location": candidate_data.get("Location", "Unknown"),
296
+ # "fit_score": 0.0,
297
+ # "justification": f"Error in LLM processing: {str(e)}"
298
+ # }
299
+
300
+ # def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
301
+ # """Process candidates for a specific job using the LLM"""
302
+ # # Reset token counters for this job
303
+ # st.session_state.total_input_tokens = 0
304
+ # st.session_state.total_output_tokens = 0
305
+
306
+ # if llm_chain is None:
307
+ # with st.spinner("Setting up LLM..."):
308
+ # llm_chain = setup_llm()
309
+
310
+ # selected_candidates = []
311
+
312
+ # try:
313
+ # # Get job-specific data
314
+ # job_data = {
315
+ # "Company": job_row["Company"],
316
+ # "Role": job_row["Role"],
317
+ # "desc": job_row.get("One liner", ""),
318
+ # "Locations": job_row.get("Locations", ""),
319
+ # "Tech_Stack": job_row["Tech Stack"],
320
+ # "Industry": job_row.get("Industry", "")
321
+ # }
322
+
323
+ # # Find matching candidates for this job
324
+ # with st.spinner("Finding matching candidates based on tech stack..."):
325
+ # matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
326
+
327
+ # if not matching_candidates:
328
+ # st.warning("No candidates with matching tech stack found for this job.")
329
+ # return []
330
+
331
+ # st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
332
+
333
+ # # Create progress elements
334
+ # candidates_progress = st.progress(0)
335
+ # candidate_status = st.empty()
336
+
337
+ # # Process each candidate
338
+ # for i, candidate_data in enumerate(matching_candidates):
339
+ # # Update progress
340
+ # candidates_progress.progress((i + 1) / len(matching_candidates))
341
+ # candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
342
+
343
+ # # Process the candidate with the LLM
344
+ # response = call_llm(candidate_data, job_data, llm_chain)
345
+
346
+ # response_dict = {
347
+ # "Name": response["candidate_name"],
348
+ # "LinkedIn": response["candidate_url"],
349
+ # "summary": response["candidate_summary"],
350
+ # "Location": response["candidate_location"],
351
+ # "Fit Score": response["fit_score"],
352
+ # "justification": response["justification"],
353
+ # # Add back original candidate data for context
354
+ # "Educational Background": candidate_data.get("Degree & Education", ""),
355
+ # "Years of Experience": candidate_data.get("Years of Experience", ""),
356
+ # "Current Title & Company": candidate_data.get("Current Title & Company", "")
357
+ # }
358
+
359
+ # # Add to selected candidates if score is high enough
360
+ # if response["fit_score"] >= 8.8:
361
+ # selected_candidates.append(response_dict)
362
+ # st.markdown(response_dict)
363
+ # else:
364
+ # st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
365
+
366
+ # # Clear progress indicators
367
+ # candidates_progress.empty()
368
+ # candidate_status.empty()
369
+
370
+ # # Show results
371
+ # if selected_candidates:
372
+ # st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
373
+ # else:
374
+ # st.info("No candidates met the minimum fit score threshold for this job.")
375
+
376
+ # # Token usage is now displayed in display_job_selection when showing results
377
+ # return selected_candidates
378
+
379
+ # except Exception as e:
380
+ # st.error(f"Error processing job: {e}")
381
+ # return []
382
+
383
+ # def main():
384
+ # st.title("👨‍💻 Candidate Matching App")
385
+
386
+ # # Initialize session state
387
+ # if 'processed_jobs' not in st.session_state:
388
+ # st.session_state.processed_jobs = {}
389
+
390
+ # st.write("""
391
+ # This app matches job listings with candidate profiles based on tech stack and other criteria.
392
+ # Select a job to find matching candidates.
393
+ # """)
394
+
395
+ # # API Key input
396
+ # with st.sidebar:
397
+ # st.header("API Configuration")
398
+ # api_key = st.text_input("Enter OpenAI API Key", type="password")
399
+ # if api_key:
400
+ # os.environ["OPENAI_API_KEY"] = api_key
401
+ # st.success("API Key set!")
402
+ # else:
403
+ # st.warning("Please enter OpenAI API Key to use LLM features")
404
+
405
+ # # Show API key warning if not set
406
+ # SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json'
407
+ # SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
408
+ # creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
409
+ # gc = gspread.authorize(creds)
410
+ # job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
411
+ # candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
412
+
413
+ # if not api_key:
414
+ # st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
415
+
416
+ # if api_key:
417
+ # try:
418
+ # # Load data from Google Sheets
419
+ # job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
420
+ # job_data = job_worksheet.get_all_values()
421
+ # candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
422
+ # candidate_data = candidate_worksheet.get_all_values()
423
+
424
+ # # Convert to DataFrames
425
+ # jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
426
+ # candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
427
+ # candidates_df = candidates_df.fillna("Unknown")
428
+
429
+ # # Display data preview
430
+ # with st.expander("Preview uploaded data"):
431
+ # st.subheader("Jobs Data Preview")
432
+ # st.dataframe(jobs_df.head(3))
433
+
434
+ # st.subheader("Candidates Data Preview")
435
+ # st.dataframe(candidates_df.head(3))
436
+
437
+ # # Map column names if needed
438
+ # column_mapping = {
439
+ # "Full Name": "Full Name",
440
+ # "LinkedIn URL": "LinkedIn URL",
441
+ # "Current Title & Company": "Current Title & Company",
442
+ # "Years of Experience": "Years of Experience",
443
+ # "Degree & University": "Degree & University",
444
+ # "Key Tech Stack": "Key Tech Stack",
445
+ # "Key Highlights": "Key Highlights",
446
+ # "Location (from most recent experience)": "Location (from most recent experience)"
447
+ # }
448
+
449
+ # # Rename columns if they don't match expected
450
+ # candidates_df = candidates_df.rename(columns={
451
+ # col: mapping for col, mapping in column_mapping.items()
452
+ # if col in candidates_df.columns and col != mapping
453
+ # })
454
+
455
+ # # Now, instead of processing all jobs upfront, we'll display job selection
456
+ # # and only process the selected job when the user chooses it
457
+ # display_job_selection(jobs_df, candidates_df)
458
+
459
+ # except Exception as e:
460
+ # st.error(f"Error processing files: {e}")
461
+
462
+ # st.divider()
463
+
464
+
465
+ # def display_job_selection(jobs_df, candidates_df):
466
+ # # Store the LLM chain as a session state to avoid recreating it
467
+ # if 'llm_chain' not in st.session_state:
468
+ # st.session_state.llm_chain = None
469
+
470
+ # st.subheader("Select a job to view potential matches")
471
+
472
+ # # Create job options - but don't compute matches yet
473
+ # job_options = []
474
+ # for i, row in jobs_df.iterrows():
475
+ # job_options.append(f"{row['Role']} at {row['Company']}")
476
+
477
+ # if job_options:
478
+ # selected_job_index = st.selectbox("Jobs:",
479
+ # range(len(job_options)),
480
+ # format_func=lambda x: job_options[x])
481
+
482
+ # # Display job details
483
+ # job_row = jobs_df.iloc[selected_job_index]
484
+
485
+ # # Parse tech stack for display
486
+ # job_row_stack = parse_tech_stack(job_row["Tech Stack"])
487
+
488
+ # col1, col2 = st.columns([2, 1])
489
+
490
+ # with col1:
491
+ # st.subheader(f"Job Details: {job_row['Role']}")
492
+
493
+ # job_details = {
494
+ # "Company": job_row["Company"],
495
+ # "Role": job_row["Role"],
496
+ # "Description": job_row.get("One liner", "N/A"),
497
+ # "Locations": job_row.get("Locations", "N/A"),
498
+ # "Industry": job_row.get("Industry", "N/A"),
499
+ # "Tech Stack": display_tech_stack(job_row_stack)
500
+ # }
501
+
502
+ # for key, value in job_details.items():
503
+ # st.markdown(f"**{key}:** {value}")
504
+
505
+ # # Create a key for this job in session state
506
+ # job_key = f"job_{selected_job_index}_processed"
507
+
508
+ # if job_key not in st.session_state:
509
+ # st.session_state[job_key] = False
510
+
511
+ # # Add a process button for this job
512
+ # if not st.session_state[job_key]:
513
+ # if st.button(f"Find Matching Candidates for this Job"):
514
+ # if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
515
+ # st.error("Please enter your OpenAI API key in the sidebar before processing")
516
+ # else:
517
+ # # Process candidates for this job (only when requested)
518
+ # selected_candidates = process_candidates_for_job(
519
+ # job_row,
520
+ # candidates_df,
521
+ # st.session_state.llm_chain
522
+ # )
523
+
524
+ # # Store the results and set as processed
525
+ # if 'Selected_Candidates' not in st.session_state:
526
+ # st.session_state.Selected_Candidates = {}
527
+ # st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
528
+ # st.session_state[job_key] = True
529
+
530
+ # # Store the LLM chain for reuse
531
+ # if st.session_state.llm_chain is None:
532
+ # st.session_state.llm_chain = setup_llm()
533
+
534
+ # # Force refresh
535
+ # st.rerun()
536
+
537
+ # # Display selected candidates if already processed
538
+ # if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
539
+ # selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
540
+
541
+ # # Display selected candidates
542
+ # st.subheader("Selected Candidates")
543
+
544
+ # # Display token usage statistics (will persist until job is changed)
545
+ # if 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
546
+ # display_token_usage()
547
+
548
+ # if len(selected_candidates) > 0:
549
+ # for i, candidate in enumerate(selected_candidates):
550
+ # with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
551
+ # col1, col2 = st.columns([3, 1])
552
+
553
+ # with col1:
554
+ # st.markdown(f"**Summary:** {candidate['summary']}")
555
+ # st.markdown(f"**Current:** {candidate['Current Title & Company']}")
556
+ # st.markdown(f"**Education:** {candidate['Educational Background']}")
557
+ # st.markdown(f"**Experience:** {candidate['Years of Experience']}")
558
+ # st.markdown(f"**Location:** {candidate['Location']}")
559
+ # st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
560
+
561
+ # with col2:
562
+ # st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
563
+
564
+ # st.markdown("**Justification:**")
565
+ # st.info(candidate['justification'])
566
+ # else:
567
+ # st.info("No candidates met the minimum score threshold (8.8) for this job.")
568
+
569
+ # # We don't show tech-matched candidates here since they are generated
570
+ # # during the LLM matching process now
571
+
572
+ # # Add a reset button to start over
573
+ # if st.button("Reset and Process Again"):
574
+ # # Don't reset token counters here - we want them to persist
575
+ # st.session_state[job_key] = False
576
+ # st.rerun()
577
+
578
+ # if __name__ == "__main__":
579
+ # main()
580
+
581
+
582
+
583
  import streamlit as st
584
  import pandas as pd
585
  import json
 
604
 
605
  # Define pydantic model for structured output
606
  class Shortlist(BaseModel):
607
+ fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
608
  candidate_name: str = Field(description="The name of the candidate.")
609
  candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
610
  candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
 
727
  # Create LLM instance
728
  llm = ChatOpenAI(
729
  model=model_name,
730
+ temperature=0.3,
731
  max_tokens=None,
732
  timeout=None,
733
  max_retries=2,
 
745
  Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
746
  Tier3 - Unknown or unranked institutions - Lower points or reject
747
 
748
+
749
  Startup Experience Requirement:
750
  Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
751
  preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
 
755
  6–7 - Weak Fit - Auto-reject
756
  8.0–8.7 - Moderate Fit - Auto-reject
757
  8.8–10 - STRONG Fit - Include in results
758
+
759
+ Each candidate's fit score should be calculated based on a weighted evaluation of their background and **must be distinct even if candidates have similar profiles**. You may use slight variations to reflect nuanced differences.
760
  """
761
 
762
  # Create query prompt
763
  query_prompt = ChatPromptTemplate.from_messages([
764
  ("system", system),
765
  ("human", """
766
+ You are an expert Recruitor. Your task is to determine if the candidate matches the given job.
767
+ Provide the score as a `float` rounded to exactly **three decimal places** (e.g., 8.943, 9.211, etc.).
768
+ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **unique** fit score.
769
  For this you will be provided with the follwing inputs of job and candidates:
770
  Job Details
771
  Company: {Company}
 
790
 
791
  Answer in the structured manner as per the schema.
792
  If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
793
+ The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
794
  """),
795
  ])
796
 
 
848
  candidate_url: {response.candidate_url}
849
  candidate_summary: {response.candidate_summary}
850
  candidate_location: {response.candidate_location}
851
+ fit_score: {float(f"{response.fit_score:.3f}")}
852
  justification: {response.justification}
853
  """
854
 
 
936
  "LinkedIn": response["candidate_url"],
937
  "summary": response["candidate_summary"],
938
  "Location": response["candidate_location"],
939
+ "Fit Score": float(f"{response['fit_score']:.3f}"),
940
  "justification": response["justification"],
941
  # Add back original candidate data for context
942
  "Educational Background": candidate_data.get("Degree & Education", ""),
 
945
  }
946
 
947
  # Add to selected candidates if score is high enough
948
+ if response["fit_score"] >= 8.800:
949
  selected_candidates.append(response_dict)
950
  st.markdown(response_dict)
951
  else:
 
1042
 
1043
  # Now, instead of processing all jobs upfront, we'll display job selection
1044
  # and only process the selected job when the user chooses it
1045
+ display_job_selection(jobs_df, candidates_df, job_sheet)
1046
 
1047
  except Exception as e:
1048
  st.error(f"Error processing files: {e}")
 
1050
  st.divider()
1051
 
1052
 
1053
+ def display_job_selection(jobs_df, candidates_df, sh):
1054
+ # Initialize session state variables if they don't exist
1055
+ if 'Selected_Candidates' not in st.session_state:
1056
+ st.session_state.Selected_Candidates = {}
1057
  if 'llm_chain' not in st.session_state:
1058
+ st.session_state.llm_chain = setup_llm()
1059
 
1060
  st.subheader("Select a job to view potential matches")
1061
 
1062
+ # Create job options
1063
  job_options = []
1064
  for i, row in jobs_df.iterrows():
1065
  job_options.append(f"{row['Role']} at {row['Company']}")
 
1098
  if job_key not in st.session_state:
1099
  st.session_state[job_key] = False
1100
 
1101
+ # Create worksheet name
1102
+ sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
1103
+
1104
+ # Check if worksheet exists and has data
1105
+ worksheet_exists = False
1106
+ existing_candidates = []
1107
+
1108
+ try:
1109
+ cand_worksheet = sh.worksheet(sheet_name)
1110
+ worksheet_exists = True
1111
+ # Get existing data if worksheet exists
1112
+ existing_data = cand_worksheet.get_all_values()
1113
+ if len(existing_data) > 1: # Has data beyond header
1114
+ existing_candidates = existing_data[1:]
1115
+ st.session_state[job_key] = True
1116
+ # Don't show the info message about existing data
1117
+ except gspread.exceptions.WorksheetNotFound:
1118
+ pass
1119
+
1120
  # Add a process button for this job
1121
  if not st.session_state[job_key]:
1122
  if st.button(f"Find Matching Candidates for this Job"):
 
1124
  st.error("Please enter your OpenAI API key in the sidebar before processing")
1125
  else:
1126
  # Process candidates for this job (only when requested)
1127
+ with st.spinner("Processing candidates..."):
1128
+ selected_candidates = process_candidates_for_job(
1129
+ job_row,
1130
+ candidates_df,
1131
+ st.session_state.llm_chain
1132
+ )
1133
+ selected_candidates.sort(key=lambda x: x["Fit Score"], reverse=True)
1134
 
1135
+ # Only create worksheet if we have candidates
1136
+ if selected_candidates:
1137
+ try:
1138
+ if not worksheet_exists:
1139
+ cand_worksheet = sh.add_worksheet(title=sheet_name, rows=10000, cols=50)
1140
+
1141
+ # Prepare data for Google Sheet
1142
+ headers = list(selected_candidates[0].keys())
1143
+ rows = [headers] + [list(candidate.values()) for candidate in selected_candidates]
1144
+
1145
+ # Clear existing data if any
1146
+ cand_worksheet.clear()
1147
+
1148
+ # Write data to the worksheet
1149
+ cand_worksheet.update('A1', rows)
1150
+
1151
+ st.success(f"Successfully processed {len(selected_candidates)} candidates")
1152
+ except Exception as e:
1153
+ st.error(f"Error writing to Google Sheet: {e}")
1154
+
1155
+ # Store the results and set as processed
1156
+ st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
1157
+ st.session_state[job_key] = True
1158
+
1159
+ # Force refresh
1160
+ st.rerun()
1161
 
1162
  # Display selected candidates if already processed
1163
+ if st.session_state[job_key]:
1164
+ if existing_candidates:
1165
+ # Convert existing worksheet data to our format
1166
+ headers = existing_data[0]
1167
+ selected_candidates = []
1168
+ for row in existing_data[1:]:
1169
+ candidate = dict(zip(headers, row))
1170
+ selected_candidates.append(candidate)
1171
+ st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
1172
+ elif 'Selected_Candidates' in st.session_state:
1173
+ selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
1174
+ else:
1175
+ selected_candidates = []
1176
 
1177
  # Display selected candidates
1178
  st.subheader("Selected Candidates")
1179
 
1180
+ # Display token usage statistics (only if we processed with LLM)
1181
+ if not existing_candidates and 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
1182
  display_token_usage()
1183
 
1184
  if len(selected_candidates) > 0:
1185
  for i, candidate in enumerate(selected_candidates):
1186
+ with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate.get('Fit Score', 'N/A')})"):
1187
  col1, col2 = st.columns([3, 1])
1188
 
1189
  with col1:
1190
+ st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
1191
+ st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
1192
+ st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
1193
+ st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
1194
+ st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
1195
+ if 'LinkedIn' in candidate:
1196
+ st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
1197
 
1198
  with col2:
1199
+ if 'Fit Score' in candidate:
1200
+ st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
1201
 
1202
+ if 'justification' in candidate:
1203
+ st.markdown("**Justification:**")
1204
+ st.info(candidate['justification'])
1205
  else:
1206
+ st.info("No candidates found for this job.")
 
 
 
1207
 
1208
  # Add a reset button to start over
1209
  if st.button("Reset and Process Again"):
1210
+ # Reset this job's processing state
1211
  st.session_state[job_key] = False
1212
+ if 'Selected_Candidates' in st.session_state and selected_job_index in st.session_state.Selected_Candidates:
1213
+ del st.session_state.Selected_Candidates[selected_job_index]
1214
  st.rerun()
1215
 
1216
+
1217
  if __name__ == "__main__":
1218
  main()