ak0601 commited on
Commit
e465a59
·
verified ·
1 Parent(s): 0cf6316

Update src/app_job_copy_1.py

Browse files
Files changed (1) hide show
  1. src/app_job_copy_1.py +1035 -415
src/app_job_copy_1.py CHANGED
@@ -1,15 +1,660 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import json
4
  import os
5
  from pydantic import BaseModel, Field
6
- from typing import List, Set, Dict, Any, Optional
7
- import time
8
  from langchain_openai import ChatOpenAI
9
- from langchain_core.messages import HumanMessage
10
  from langchain_core.prompts import ChatPromptTemplate
11
- from langchain_core.output_parsers import StrOutputParser
12
- from langchain_core.prompts import PromptTemplate
13
  import gspread
14
  import tempfile
15
  from google.oauth2 import service_account
@@ -22,7 +667,6 @@ st.set_page_config(
22
  )
23
  os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
24
  os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
25
-
26
  # Define pydantic model for structured output
27
  class Shortlist(BaseModel):
28
  fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
@@ -34,25 +678,19 @@ class Shortlist(BaseModel):
34
 
35
  # Function to calculate tokens
36
  def calculate_tokens(text, model="gpt-4o-mini"):
37
- """Calculate the number of tokens in a given text for a specific model"""
38
  try:
39
- # Get the encoding for the model
40
  if "gpt-4" in model:
41
  encoding = tiktoken.encoding_for_model("gpt-4o-mini")
42
  elif "gpt-3.5" in model:
43
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
44
  else:
45
- encoding = tiktoken.get_encoding("cl100k_base") # Default for newer models
46
-
47
- # Encode the text and return the token count
48
  return len(encoding.encode(text))
49
  except Exception as e:
50
- # If there's an error, make a rough estimate (1 token ≈ 4 chars)
51
  return len(text) // 4
52
 
53
  # Function to display token usage
54
  def display_token_usage():
55
- """Display token usage statistics"""
56
  if 'total_input_tokens' not in st.session_state:
57
  st.session_state.total_input_tokens = 0
58
  if 'total_output_tokens' not in st.session_state:
@@ -62,46 +700,35 @@ def display_token_usage():
62
  total_output = st.session_state.total_output_tokens
63
  total_tokens = total_input + total_output
64
 
65
- # Estimate cost based on model
66
- if st.session_state.model_name == "gpt-4o-mini":
67
- input_cost_per_1k = 0.0003 # $0.0003 per 1K input tokens
68
- output_cost_per_1k = 0.0006 # $$0.0006 per 1K output tokens
69
- elif "gpt-4" in st.session_state.model_name:
70
- input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
71
- output_cost_per_1k = 0.60 # $0.60 per 1K output tokens
 
72
  else: # Assume gpt-3.5-turbo pricing
73
- input_cost_per_1k = 0.0015 # $0.0015 per 1K input tokens
74
- output_cost_per_1k = 0.015 # $0.002 per 1K output tokens
75
 
76
  estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
77
 
78
- st.subheader("📊 Token Usage Statistics")
79
 
80
  col1, col2, col3 = st.columns(3)
81
-
82
- with col1:
83
- st.metric("Input Tokens", f"{total_input:,}")
84
-
85
- with col2:
86
- st.metric("Output Tokens", f"{total_output:,}")
87
-
88
- with col3:
89
- st.metric("Total Tokens", f"{total_tokens:,}")
90
-
91
  st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
92
-
93
  return total_tokens
94
 
95
  # Function to parse and normalize tech stacks
96
  def parse_tech_stack(stack):
97
- if pd.isna(stack) or stack == "" or stack is None:
98
- return set()
99
- if isinstance(stack, set):
100
- return stack
101
  try:
102
- # Handle potential string representation of sets
103
  if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
104
- # This could be a string representation of a set
105
  items = stack.strip("{}").split(",")
106
  return set(item.strip().strip("'\"") for item in items if item.strip())
107
  return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
@@ -110,29 +737,24 @@ def parse_tech_stack(stack):
110
  return set()
111
 
112
  def display_tech_stack(stack_set):
113
- if isinstance(stack_set, set):
114
- return ", ".join(sorted(stack_set))
115
- return str(stack_set)
116
 
117
  def get_matching_candidates(job_stack, candidates_df):
118
- """Find candidates with matching tech stack for a specific job"""
119
  matched = []
120
  job_stack_set = parse_tech_stack(job_stack)
121
-
122
  for _, candidate in candidates_df.iterrows():
123
  candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
124
  common = job_stack_set & candidate_stack
125
- if len(common) >= 2:
126
  matched.append({
127
- "Name": candidate["Full Name"],
128
- "URL": candidate["LinkedIn URL"],
129
  "Degree & Education": candidate["Degree & University"],
130
  "Years of Experience": candidate["Years of Experience"],
131
  "Current Title & Company": candidate['Current Title & Company'],
132
  "Key Highlights": candidate["Key Highlights"],
133
  "Location": candidate["Location (from most recent experience)"],
134
- "Experience": str(candidate["Experience"]),
135
- "Tech Stack": candidate_stack
136
  })
137
  return matched
138
 
@@ -160,25 +782,21 @@ def setup_llm():
160
  # Create system prompt
161
  system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
162
  the profile is according to job.
 
 
163
  Try to ensure following points while estimating the candidate's fit score:
164
  For education:
165
  Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
166
  Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
167
  Tier3 - Unknown or unranked institutions - Lower points or reject
168
-
169
-
170
  Startup Experience Requirement:
171
  Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
172
  preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
173
-
174
- Apart from this the candidate must reside near or on the job location. If it is not immediately give a fit score below 5.
175
-
176
  The fit score signifies based on following metrics:
177
  1–5 - Poor Fit - Auto-reject
178
  6–7 - Weak Fit - Auto-reject
179
  8.0–8.7 - Moderate Fit - Auto-reject
180
  8.8–10 - STRONG Fit - Include in results
181
-
182
  Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
183
  """
184
 
@@ -198,7 +816,6 @@ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **
198
  Tech Stack: {Tech_Stack}
199
  Industry: {Industry}
200
 
201
-
202
  Candidate Details:
203
  Full Name: {Full_Name}
204
  LinkedIn URL: {LinkedIn_URL}
@@ -209,8 +826,6 @@ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **
209
  Key Highlights: {Key_Highlights}
210
  Location (from most recent experience): {cand_Location}
211
  Past_Experience: {Experience}
212
-
213
-
214
  Answer in the structured manner as per the schema.
215
  If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
216
  The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
@@ -223,420 +838,425 @@ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **
223
  return cat_class
224
 
225
  def call_llm(candidate_data, job_data, llm_chain):
226
- """Call the actual LLM to evaluate the candidate"""
227
  try:
228
- # Convert tech stacks to strings for the LLM payload
229
- job_tech_stack = job_data.get("Tech_Stack", set())
230
- candidate_tech_stack = candidate_data.get("Tech Stack", set())
231
-
232
- if isinstance(job_tech_stack, set):
233
- job_tech_stack = ", ".join(sorted(job_tech_stack))
234
-
235
- if isinstance(candidate_tech_stack, set):
236
- candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
237
 
238
- # Prepare payload for LLM
239
  payload = {
240
- "Company": job_data.get("Company", ""),
241
- "Role": job_data.get("Role", ""),
242
- "desc": job_data.get("desc", ""),
243
- "Locations": job_data.get("Locations", ""),
244
- "Tech_Stack": job_tech_stack,
245
- "Industry": job_data.get("Industry", ""),
246
-
247
- "Full_Name": candidate_data.get("Name", ""),
248
- "LinkedIn_URL": candidate_data.get("URL", ""),
249
  "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
250
  "Years_of_Experience": candidate_data.get("Years of Experience", ""),
251
  "Degree_University": candidate_data.get("Degree & Education", ""),
252
- "Key_Tech_Stack": candidate_tech_stack,
253
- "Key_Highlights": candidate_data.get("Key Highlights", ""),
254
- "cand_Location": candidate_data.get("Location", ""),
255
- "Experience": candidate_data.get("Experience", "")
256
  }
257
-
258
- # Convert payload to a string for token calculation
259
  payload_str = json.dumps(payload)
260
-
261
- # Calculate input tokens
262
  input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
263
-
264
- # Call LLM
265
  response = llm_chain.invoke(payload)
266
- print(candidate_data.get("Experience", ""))
267
-
268
- # Convert response to string for token calculation
269
- response_str = f"""
270
- candidate_name: {response.candidate_name}
271
- candidate_url: {response.candidate_url}
272
- candidate_summary: {response.candidate_summary}
273
- candidate_location: {response.candidate_location}
274
- fit_score: {float(f"{response.fit_score:.3f}")}
275
- justification: {response.justification}
276
- """
277
-
278
- # Calculate output tokens
279
  output_tokens = calculate_tokens(response_str, st.session_state.model_name)
280
 
281
- # Update token counts in session state
282
- if 'total_input_tokens' not in st.session_state:
283
- st.session_state.total_input_tokens = 0
284
- if 'total_output_tokens' not in st.session_state:
285
- st.session_state.total_output_tokens = 0
286
-
287
  st.session_state.total_input_tokens += input_tokens
288
  st.session_state.total_output_tokens += output_tokens
289
 
290
- # Return response in expected format
291
  return {
292
- "candidate_name": response.candidate_name,
293
- "candidate_url": response.candidate_url,
294
- "candidate_summary": response.candidate_summary,
295
- "candidate_location": response.candidate_location,
296
- "fit_score": response.fit_score,
297
- "justification": response.justification
298
  }
299
  except Exception as e:
300
- st.error(f"Error calling LLM: {e}")
301
- # Fallback to a default response
302
  return {
303
- "candidate_name": candidate_data.get("Name", "Unknown"),
304
- "candidate_url": candidate_data.get("URL", ""),
305
- "candidate_summary": "Error processing candidate profile",
306
- "candidate_location": candidate_data.get("Location", "Unknown"),
307
- "fit_score": 0.0,
308
- "justification": f"Error in LLM processing: {str(e)}"
309
  }
310
 
311
  def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
312
- """Process candidates for a specific job using the LLM"""
313
- # Reset token counters for this job
314
- st.session_state.total_input_tokens = 0
315
  st.session_state.total_output_tokens = 0
316
-
317
  if llm_chain is None:
318
- with st.spinner("Setting up LLM..."):
319
- llm_chain = setup_llm()
320
 
321
  selected_candidates = []
 
 
 
 
322
 
323
- try:
324
- # Get job-specific data
325
- job_data = {
326
- "Company": job_row["Company"],
327
- "Role": job_row["Role"],
328
- "desc": job_row.get("One liner", ""),
329
- "Locations": job_row.get("Locations", ""),
330
- "Tech_Stack": job_row["Tech Stack"],
331
- "Industry": job_row.get("Industry", "")
332
- }
333
-
334
- # Find matching candidates for this job
335
- with st.spinner("Finding matching candidates based on tech stack..."):
336
- matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
337
-
338
- if not matching_candidates:
339
- st.warning("No candidates with matching tech stack found for this job.")
340
- return []
341
-
342
- st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
343
-
344
- # Create progress elements
345
- candidates_progress = st.progress(0)
346
- candidate_status = st.empty()
347
-
348
- # Process each candidate
349
- for i, candidate_data in enumerate(matching_candidates):
350
- # Update progress
351
- candidates_progress.progress((i + 1) / len(matching_candidates))
352
- candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
353
-
354
- # Process the candidate with the LLM
355
- response = call_llm(candidate_data, job_data, llm_chain)
356
-
357
- response_dict = {
358
- "Name": response["candidate_name"],
359
- "LinkedIn": response["candidate_url"],
360
- "summary": response["candidate_summary"],
361
- "Location": response["candidate_location"],
362
- "Fit Score": float(f"{response['fit_score']:.3f}"),
363
- "justification": response["justification"],
364
- # Add back original candidate data for context
365
- "Educational Background": candidate_data.get("Degree & Education", ""),
366
- "Years of Experience": candidate_data.get("Years of Experience", ""),
367
- "Current Title & Company": candidate_data.get("Current Title & Company", "")
368
- }
369
-
370
- # Add to selected candidates if score is high enough
371
- if response["fit_score"] >= 8.800:
372
- selected_candidates.append(response_dict)
373
- st.markdown(response_dict)
374
- else:
375
- st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
376
 
377
- # Clear progress indicators
378
- candidates_progress.empty()
379
- candidate_status.empty()
 
 
 
 
 
380
 
381
- # Show results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  if selected_candidates:
383
- st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
384
  else:
385
- st.info("No candidates met the minimum fit score threshold for this job.")
386
-
387
- # Token usage is now displayed in display_job_selection when showing results
388
- return selected_candidates
389
-
390
- except Exception as e:
391
- st.error(f"Error processing job: {e}")
392
- return []
393
 
394
  def main():
395
  st.title("👨‍💻 Candidate Matching App")
 
 
 
 
 
 
 
 
396
 
397
- # Initialize session state
398
- if 'processed_jobs' not in st.session_state:
399
- st.session_state.processed_jobs = {}
400
-
401
- st.write("""
402
- This app matches job listings with candidate profiles based on tech stack and other criteria.
403
- Select a job to find matching candidates.
404
- """)
405
-
406
- # API Key input
407
  with st.sidebar:
408
  st.header("API Configuration")
409
- api_key = st.text_input("Enter OpenAI API Key", type="password")
410
  if api_key:
411
  os.environ["OPENAI_API_KEY"] = api_key
412
- st.success("API Key set!")
 
 
 
 
413
  else:
414
  st.warning("Please enter OpenAI API Key to use LLM features")
 
415
 
416
- # Show API key warning if not set
417
- SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json'
418
- SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
419
- creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
420
- gc = gspread.authorize(creds)
421
- job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
422
- candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
423
-
424
- if not api_key:
 
 
 
 
 
 
425
  st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
 
 
 
 
 
426
 
427
- if api_key:
428
- try:
429
- # Load data from Google Sheets
430
- job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
431
- job_data = job_worksheet.get_all_values()
432
- candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
433
- candidate_data = candidate_worksheet.get_all_values()
434
-
435
- # Convert to DataFrames
436
- jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
437
- jobs_df = jobs_df.drop(["Link"],axis = 1)
438
- candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
439
- candidates_df = candidates_df.fillna("Unknown")
440
-
441
- # Display data preview
442
- with st.expander("Preview uploaded data"):
443
- st.subheader("Jobs Data Preview")
444
- st.dataframe(jobs_df.head(3))
445
-
446
- st.subheader("Candidates Data Preview")
447
- st.dataframe(candidates_df.head(3))
448
-
449
- # Map column names if needed
450
- column_mapping = {
451
- "Full Name": "Full Name",
452
- "LinkedIn URL": "LinkedIn URL",
453
- "Current Title & Company": "Current Title & Company",
454
- "Years of Experience": "Years of Experience",
455
- "Degree & University": "Degree & University",
456
- "Key Tech Stack": "Key Tech Stack",
457
- "Key Highlights": "Key Highlights",
458
- "Location (from most recent experience)": "Location (from most recent experience)"
459
- }
460
-
461
- # Rename columns if they don't match expected
462
- candidates_df = candidates_df.rename(columns={
463
- col: mapping for col, mapping in column_mapping.items()
464
- if col in candidates_df.columns and col != mapping
465
- })
466
 
467
- # Now, instead of processing all jobs upfront, we'll display job selection
468
- # and only process the selected job when the user chooses it
469
- display_job_selection(jobs_df, candidates_df, job_sheet)
470
 
471
- except Exception as e:
472
- st.error(f"Error processing files: {e}")
473
-
474
  st.divider()
475
 
 
 
 
 
 
 
 
476
 
477
- def display_job_selection(jobs_df, candidates_df, sh):
478
- # Initialize session state variables if they don't exist
479
- if 'Selected_Candidates' not in st.session_state:
480
- st.session_state.Selected_Candidates = {}
481
- if 'llm_chain' not in st.session_state:
482
- st.session_state.llm_chain = setup_llm()
483
 
484
- st.subheader("Select a job to view potential matches")
 
485
 
486
- # Create job options
487
- job_options = []
488
- for i, row in jobs_df.iterrows():
489
- job_options.append(f"{row['Role']} at {row['Company']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
- if job_options:
492
- selected_job_index = st.selectbox("Jobs:",
493
- range(len(job_options)),
494
- format_func=lambda x: job_options[x])
495
-
496
- # Display job details
497
- job_row = jobs_df.iloc[selected_job_index]
 
 
 
 
 
 
 
 
498
 
499
- # Parse tech stack for display
500
- job_row_stack = parse_tech_stack(job_row["Tech Stack"])
 
 
 
 
 
 
 
 
 
 
 
 
501
 
502
- col1, col2 = st.columns([2, 1])
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
- with col1:
505
- st.subheader(f"Job Details: {job_row['Role']}")
506
-
507
- job_details = {
508
- "Company": job_row["Company"],
509
- "Role": job_row["Role"],
510
- "Description": job_row.get("One liner", "N/A"),
511
- "Locations": job_row.get("Locations", "N/A"),
512
- "Industry": job_row.get("Industry", "N/A"),
513
- "Tech Stack": display_tech_stack(job_row_stack)
514
- }
515
-
516
- for key, value in job_details.items():
517
- st.markdown(f"**{key}:** {value}")
518
-
519
- # Create a key for this job in session state
520
- job_key = f"job_{selected_job_index}_processed"
521
-
522
- if job_key not in st.session_state:
523
- st.session_state[job_key] = False
524
-
525
- # Create worksheet name
526
- sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
527
-
528
- # Check if worksheet exists and has data
529
- worksheet_exists = False
530
- existing_candidates = []
531
-
532
- try:
533
- cand_worksheet = sh.worksheet(sheet_name)
534
- worksheet_exists = True
535
- # Get existing data if worksheet exists
536
- existing_data = cand_worksheet.get_all_values()
537
- if len(existing_data) > 1: # Has data beyond header
538
- existing_candidates = existing_data[1:]
539
- st.session_state[job_key] = True
540
- # Don't show the info message about existing data
541
- except gspread.exceptions.WorksheetNotFound:
542
- pass
543
-
544
- # Add a process button for this job
545
- if not st.session_state[job_key]:
546
- if st.button(f"Find Matching Candidates for this Job"):
547
- if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
548
- st.error("Please enter your OpenAI API key in the sidebar before processing")
549
- else:
550
- # Process candidates for this job (only when requested)
551
- with st.spinner("Processing candidates..."):
552
- selected_candidates = process_candidates_for_job(
553
- job_row,
554
- candidates_df,
555
- st.session_state.llm_chain
556
- )
557
- selected_candidates.sort(key=lambda x: x["Fit Score"], reverse=True)
558
 
559
- # Only create worksheet if we have candidates
560
- if selected_candidates:
561
- try:
562
- if not worksheet_exists:
563
- cand_worksheet = sh.add_worksheet(title=sheet_name, rows=10000, cols=50)
564
-
565
- # Prepare data for Google Sheet
566
- headers = list(selected_candidates[0].keys())
567
- rows = [headers] + [list(candidate.values()) for candidate in selected_candidates]
568
-
569
- # Clear existing data if any
570
- cand_worksheet.clear()
571
-
572
- # Write data to the worksheet
573
- cand_worksheet.update('A1', rows)
574
-
575
- st.success(f"Successfully processed {len(selected_candidates)} candidates")
576
- except Exception as e:
577
- st.error(f"Error writing to Google Sheet: {e}")
578
-
579
- # Store the results and set as processed
580
- st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
581
- st.session_state[job_key] = True
582
-
583
- # Force refresh
584
- st.rerun()
585
-
586
- # Display selected candidates if already processed
587
- if st.session_state[job_key]:
588
- if existing_candidates:
589
- # Convert existing worksheet data to our format
590
- headers = existing_data[0]
591
- selected_candidates = []
592
- for row in existing_data[1:]:
593
- candidate = dict(zip(headers, row))
594
- selected_candidates.append(candidate)
595
- st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
596
- elif 'Selected_Candidates' in st.session_state:
597
- selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
598
  else:
599
- selected_candidates = []
600
-
601
- # Display selected candidates
602
- st.subheader("Selected Candidates")
603
-
604
- # Display token usage statistics (only if we processed with LLM)
605
- if not existing_candidates and 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
606
- display_token_usage()
607
-
608
- if len(selected_candidates) > 0:
609
- for i, candidate in enumerate(selected_candidates):
610
- with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate.get('Fit Score', 'N/A')})"):
611
- col1, col2 = st.columns([3, 1])
612
-
613
- with col1:
614
- st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
615
- st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
616
- st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
617
- st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
618
- st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
619
- if 'LinkedIn' in candidate:
620
- st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
621
-
622
- with col2:
623
- if 'Fit Score' in candidate:
624
- st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
625
-
626
- if 'justification' in candidate:
627
- st.markdown("**Justification:**")
628
- st.info(candidate['justification'])
629
- else:
630
- st.info("No candidates found for this job.")
631
 
632
- # Add a reset button to start over
633
- if st.button("Reset and Process Again"):
634
- # Reset this job's processing state
635
- st.session_state[job_key] = False
636
- if 'Selected_Candidates' in st.session_state and selected_job_index in st.session_state.Selected_Candidates:
637
- del st.session_state.Selected_Candidates[selected_job_index]
638
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
  if __name__ == "__main__":
642
- main()
 
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import json
4
+ # import os
5
+ # from pydantic import BaseModel, Field
6
+ # from typing import List, Set, Dict, Any, Optional
7
+ # import time
8
+ # from langchain_openai import ChatOpenAI
9
+ # from langchain_core.messages import HumanMessage
10
+ # from langchain_core.prompts import ChatPromptTemplate
11
+ # from langchain_core.output_parsers import StrOutputParser
12
+ # from langchain_core.prompts import PromptTemplate
13
+ # import gspread
14
+ # import tempfile
15
+ # from google.oauth2 import service_account
16
+ # import tiktoken
17
+
18
+ # st.set_page_config(
19
+ # page_title="Candidate Matching App",
20
+ # page_icon="👨‍💻🎯",
21
+ # layout="wide"
22
+ # )
23
+ # os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
24
+ # os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
25
+
26
+ # # Define pydantic model for structured output
27
+ # class Shortlist(BaseModel):
28
+ # fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
29
+ # candidate_name: str = Field(description="The name of the candidate.")
30
+ # candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
31
+ # candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
32
+ # candidate_location: str = Field(description="The location of the candidate.")
33
+ # justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
34
+
35
+ # # Function to calculate tokens
36
+ # def calculate_tokens(text, model="gpt-4o-mini"):
37
+ # """Calculate the number of tokens in a given text for a specific model"""
38
+ # try:
39
+ # # Get the encoding for the model
40
+ # if "gpt-4" in model:
41
+ # encoding = tiktoken.encoding_for_model("gpt-4o-mini")
42
+ # elif "gpt-3.5" in model:
43
+ # encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
44
+ # else:
45
+ # encoding = tiktoken.get_encoding("cl100k_base") # Default for newer models
46
+
47
+ # # Encode the text and return the token count
48
+ # return len(encoding.encode(text))
49
+ # except Exception as e:
50
+ # # If there's an error, make a rough estimate (1 token ≈ 4 chars)
51
+ # return len(text) // 4
52
+
53
+ # # Function to display token usage
54
+ # def display_token_usage():
55
+ # """Display token usage statistics"""
56
+ # if 'total_input_tokens' not in st.session_state:
57
+ # st.session_state.total_input_tokens = 0
58
+ # if 'total_output_tokens' not in st.session_state:
59
+ # st.session_state.total_output_tokens = 0
60
+
61
+ # total_input = st.session_state.total_input_tokens
62
+ # total_output = st.session_state.total_output_tokens
63
+ # total_tokens = total_input + total_output
64
+
65
+ # # Estimate cost based on model
66
+ # if st.session_state.model_name == "gpt-4o-mini":
67
+ # input_cost_per_1k = 0.0003 # $0.0003 per 1K input tokens
68
+ # output_cost_per_1k = 0.0006 # $$0.0006 per 1K output tokens
69
+ # elif "gpt-4" in st.session_state.model_name:
70
+ # input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
71
+ # output_cost_per_1k = 0.60 # $0.60 per 1K output tokens
72
+ # else: # Assume gpt-3.5-turbo pricing
73
+ # input_cost_per_1k = 0.0015 # $0.0015 per 1K input tokens
74
+ # output_cost_per_1k = 0.015 # $0.002 per 1K output tokens
75
+
76
+ # estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
77
+
78
+ # st.subheader("📊 Token Usage Statistics")
79
+
80
+ # col1, col2, col3 = st.columns(3)
81
+
82
+ # with col1:
83
+ # st.metric("Input Tokens", f"{total_input:,}")
84
+
85
+ # with col2:
86
+ # st.metric("Output Tokens", f"{total_output:,}")
87
+
88
+ # with col3:
89
+ # st.metric("Total Tokens", f"{total_tokens:,}")
90
+
91
+ # st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
92
+
93
+ # return total_tokens
94
+
95
+ # # Function to parse and normalize tech stacks
96
+ # def parse_tech_stack(stack):
97
+ # if pd.isna(stack) or stack == "" or stack is None:
98
+ # return set()
99
+ # if isinstance(stack, set):
100
+ # return stack
101
+ # try:
102
+ # # Handle potential string representation of sets
103
+ # if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
104
+ # # This could be a string representation of a set
105
+ # items = stack.strip("{}").split(",")
106
+ # return set(item.strip().strip("'\"") for item in items if item.strip())
107
+ # return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
108
+ # except Exception as e:
109
+ # st.error(f"Error parsing tech stack: {e}")
110
+ # return set()
111
+
112
+ # def display_tech_stack(stack_set):
113
+ # if isinstance(stack_set, set):
114
+ # return ", ".join(sorted(stack_set))
115
+ # return str(stack_set)
116
+
117
+ # def get_matching_candidates(job_stack, candidates_df):
118
+ # """Find candidates with matching tech stack for a specific job"""
119
+ # matched = []
120
+ # job_stack_set = parse_tech_stack(job_stack)
121
+
122
+ # for _, candidate in candidates_df.iterrows():
123
+ # candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
124
+ # common = job_stack_set & candidate_stack
125
+ # if len(common) >= 2:
126
+ # matched.append({
127
+ # "Name": candidate["Full Name"],
128
+ # "URL": candidate["LinkedIn URL"],
129
+ # "Degree & Education": candidate["Degree & University"],
130
+ # "Years of Experience": candidate["Years of Experience"],
131
+ # "Current Title & Company": candidate['Current Title & Company'],
132
+ # "Key Highlights": candidate["Key Highlights"],
133
+ # "Location": candidate["Location (from most recent experience)"],
134
+ # "Experience": str(candidate["Experience"]),
135
+ # "Tech Stack": candidate_stack
136
+ # })
137
+ # return matched
138
+
139
+ # def setup_llm():
140
+ # """Set up the LangChain LLM with structured output"""
141
+ # # Define the model to use
142
+ # model_name = "gpt-4o-mini"
143
+
144
+ # # Store model name in session state for token calculation
145
+ # if 'model_name' not in st.session_state:
146
+ # st.session_state.model_name = model_name
147
+
148
+ # # Create LLM instance
149
+ # llm = ChatOpenAI(
150
+ # model=model_name,
151
+ # temperature=0.3,
152
+ # max_tokens=None,
153
+ # timeout=None,
154
+ # max_retries=2,
155
+ # )
156
+
157
+ # # Create structured output
158
+ # sum_llm = llm.with_structured_output(Shortlist)
159
+
160
+ # # Create system prompt
161
+ # system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
162
+ # the profile is according to job.
163
+ # Try to ensure following points while estimating the candidate's fit score:
164
+ # For education:
165
+ # Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
166
+ # Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
167
+ # Tier3 - Unknown or unranked institutions - Lower points or reject
168
+
169
+
170
+ # Startup Experience Requirement:
171
+ # Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
172
+ # preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
173
+
174
+ # Apart from this the candidate must reside near or on the job location. If it is not immediately give a fit score below 5.
175
+
176
+ # The fit score signifies based on following metrics:
177
+ # 1–5 - Poor Fit - Auto-reject
178
+ # 6–7 - Weak Fit - Auto-reject
179
+ # 8.0–8.7 - Moderate Fit - Auto-reject
180
+ # 8.8–10 - STRONG Fit - Include in results
181
+
182
+ # Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
183
+ # """
184
+
185
+ # # Create query prompt
186
+ # query_prompt = ChatPromptTemplate.from_messages([
187
+ # ("system", system),
188
+ # ("human", """
189
+ # You are an expert Recruitor. Your task is to determine if the candidate matches the given job.
190
+ # Provide the score as a `float` rounded to exactly **three decimal places** (e.g., 8.943, 9.211, etc.).
191
+ # Avoid rounding to whole or one-decimal numbers. Every candidate should have a **unique** fit score.
192
+ # For this you will be provided with the follwing inputs of job and candidates:
193
+ # Job Details
194
+ # Company: {Company}
195
+ # Role: {Role}
196
+ # About Company: {desc}
197
+ # Locations: {Locations}
198
+ # Tech Stack: {Tech_Stack}
199
+ # Industry: {Industry}
200
+
201
+
202
+ # Candidate Details:
203
+ # Full Name: {Full_Name}
204
+ # LinkedIn URL: {LinkedIn_URL}
205
+ # Current Title & Company: {Current_Title_Company}
206
+ # Years of Experience: {Years_of_Experience}
207
+ # Degree & University: {Degree_University}
208
+ # Key Tech Stack: {Key_Tech_Stack}
209
+ # Key Highlights: {Key_Highlights}
210
+ # Location (from most recent experience): {cand_Location}
211
+ # Past_Experience: {Experience}
212
+
213
+
214
+ # Answer in the structured manner as per the schema.
215
+ # If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
216
+ # The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
217
+ # """),
218
+ # ])
219
+
220
+ # # Chain the prompt and LLM
221
+ # cat_class = query_prompt | sum_llm
222
+
223
+ # return cat_class
224
+
225
+ # def call_llm(candidate_data, job_data, llm_chain):
226
+ # """Call the actual LLM to evaluate the candidate"""
227
+ # try:
228
+ # # Convert tech stacks to strings for the LLM payload
229
+ # job_tech_stack = job_data.get("Tech_Stack", set())
230
+ # candidate_tech_stack = candidate_data.get("Tech Stack", set())
231
+
232
+ # if isinstance(job_tech_stack, set):
233
+ # job_tech_stack = ", ".join(sorted(job_tech_stack))
234
+
235
+ # if isinstance(candidate_tech_stack, set):
236
+ # candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
237
+
238
+ # # Prepare payload for LLM
239
+ # payload = {
240
+ # "Company": job_data.get("Company", ""),
241
+ # "Role": job_data.get("Role", ""),
242
+ # "desc": job_data.get("desc", ""),
243
+ # "Locations": job_data.get("Locations", ""),
244
+ # "Tech_Stack": job_tech_stack,
245
+ # "Industry": job_data.get("Industry", ""),
246
+
247
+ # "Full_Name": candidate_data.get("Name", ""),
248
+ # "LinkedIn_URL": candidate_data.get("URL", ""),
249
+ # "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
250
+ # "Years_of_Experience": candidate_data.get("Years of Experience", ""),
251
+ # "Degree_University": candidate_data.get("Degree & Education", ""),
252
+ # "Key_Tech_Stack": candidate_tech_stack,
253
+ # "Key_Highlights": candidate_data.get("Key Highlights", ""),
254
+ # "cand_Location": candidate_data.get("Location", ""),
255
+ # "Experience": candidate_data.get("Experience", "")
256
+ # }
257
+
258
+ # # Convert payload to a string for token calculation
259
+ # payload_str = json.dumps(payload)
260
+
261
+ # # Calculate input tokens
262
+ # input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
263
+
264
+ # # Call LLM
265
+ # response = llm_chain.invoke(payload)
266
+ # print(candidate_data.get("Experience", ""))
267
+
268
+ # # Convert response to string for token calculation
269
+ # response_str = f"""
270
+ # candidate_name: {response.candidate_name}
271
+ # candidate_url: {response.candidate_url}
272
+ # candidate_summary: {response.candidate_summary}
273
+ # candidate_location: {response.candidate_location}
274
+ # fit_score: {float(f"{response.fit_score:.3f}")}
275
+ # justification: {response.justification}
276
+ # """
277
+
278
+ # # Calculate output tokens
279
+ # output_tokens = calculate_tokens(response_str, st.session_state.model_name)
280
+
281
+ # # Update token counts in session state
282
+ # if 'total_input_tokens' not in st.session_state:
283
+ # st.session_state.total_input_tokens = 0
284
+ # if 'total_output_tokens' not in st.session_state:
285
+ # st.session_state.total_output_tokens = 0
286
+
287
+ # st.session_state.total_input_tokens += input_tokens
288
+ # st.session_state.total_output_tokens += output_tokens
289
+
290
+ # # Return response in expected format
291
+ # return {
292
+ # "candidate_name": response.candidate_name,
293
+ # "candidate_url": response.candidate_url,
294
+ # "candidate_summary": response.candidate_summary,
295
+ # "candidate_location": response.candidate_location,
296
+ # "fit_score": response.fit_score,
297
+ # "justification": response.justification
298
+ # }
299
+ # except Exception as e:
300
+ # st.error(f"Error calling LLM: {e}")
301
+ # # Fallback to a default response
302
+ # return {
303
+ # "candidate_name": candidate_data.get("Name", "Unknown"),
304
+ # "candidate_url": candidate_data.get("URL", ""),
305
+ # "candidate_summary": "Error processing candidate profile",
306
+ # "candidate_location": candidate_data.get("Location", "Unknown"),
307
+ # "fit_score": 0.0,
308
+ # "justification": f"Error in LLM processing: {str(e)}"
309
+ # }
310
+
311
+ # def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
312
+ # """Process candidates for a specific job using the LLM"""
313
+ # # Reset token counters for this job
314
+ # st.session_state.total_input_tokens = 0
315
+ # st.session_state.total_output_tokens = 0
316
+
317
+ # if llm_chain is None:
318
+ # with st.spinner("Setting up LLM..."):
319
+ # llm_chain = setup_llm()
320
+
321
+ # selected_candidates = []
322
+
323
+ # try:
324
+ # # Get job-specific data
325
+ # job_data = {
326
+ # "Company": job_row["Company"],
327
+ # "Role": job_row["Role"],
328
+ # "desc": job_row.get("One liner", ""),
329
+ # "Locations": job_row.get("Locations", ""),
330
+ # "Tech_Stack": job_row["Tech Stack"],
331
+ # "Industry": job_row.get("Industry", "")
332
+ # }
333
+
334
+ # # Find matching candidates for this job
335
+ # with st.spinner("Finding matching candidates based on tech stack..."):
336
+ # matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
337
+
338
+ # if not matching_candidates:
339
+ # st.warning("No candidates with matching tech stack found for this job.")
340
+ # return []
341
+
342
+ # st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
343
+
344
+ # # Create progress elements
345
+ # candidates_progress = st.progress(0)
346
+ # candidate_status = st.empty()
347
+
348
+ # # Process each candidate
349
+ # for i, candidate_data in enumerate(matching_candidates):
350
+ # # Update progress
351
+ # candidates_progress.progress((i + 1) / len(matching_candidates))
352
+ # candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
353
+
354
+ # # Process the candidate with the LLM
355
+ # response = call_llm(candidate_data, job_data, llm_chain)
356
+
357
+ # response_dict = {
358
+ # "Name": response["candidate_name"],
359
+ # "LinkedIn": response["candidate_url"],
360
+ # "summary": response["candidate_summary"],
361
+ # "Location": response["candidate_location"],
362
+ # "Fit Score": float(f"{response['fit_score']:.3f}"),
363
+ # "justification": response["justification"],
364
+ # # Add back original candidate data for context
365
+ # "Educational Background": candidate_data.get("Degree & Education", ""),
366
+ # "Years of Experience": candidate_data.get("Years of Experience", ""),
367
+ # "Current Title & Company": candidate_data.get("Current Title & Company", "")
368
+ # }
369
+
370
+ # # Add to selected candidates if score is high enough
371
+ # if response["fit_score"] >= 8.800:
372
+ # selected_candidates.append(response_dict)
373
+ # st.markdown(response_dict)
374
+ # else:
375
+ # st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
376
+
377
+ # # Clear progress indicators
378
+ # candidates_progress.empty()
379
+ # candidate_status.empty()
380
+
381
+ # # Show results
382
+ # if selected_candidates:
383
+ # st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
384
+ # else:
385
+ # st.info("No candidates met the minimum fit score threshold for this job.")
386
+
387
+ # # Token usage is now displayed in display_job_selection when showing results
388
+ # return selected_candidates
389
+
390
+ # except Exception as e:
391
+ # st.error(f"Error processing job: {e}")
392
+ # return []
393
+
394
+ # def main():
395
+ # st.title("👨‍💻 Candidate Matching App")
396
+
397
+ # # Initialize session state
398
+ # if 'processed_jobs' not in st.session_state:
399
+ # st.session_state.processed_jobs = {}
400
+
401
+ # st.write("""
402
+ # This app matches job listings with candidate profiles based on tech stack and other criteria.
403
+ # Select a job to find matching candidates.
404
+ # """)
405
+
406
+ # # API Key input
407
+ # with st.sidebar:
408
+ # st.header("API Configuration")
409
+ # api_key = st.text_input("Enter OpenAI API Key", type="password")
410
+ # if api_key:
411
+ # os.environ["OPENAI_API_KEY"] = api_key
412
+ # st.success("API Key set!")
413
+ # else:
414
+ # st.warning("Please enter OpenAI API Key to use LLM features")
415
+
416
+ # # Show API key warning if not set
417
+ # SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json'
418
+ # SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
419
+ # creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
420
+ # gc = gspread.authorize(creds)
421
+ # job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
422
+ # candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
423
+
424
+ # if not api_key:
425
+ # st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
426
+
427
+ # if api_key:
428
+ # try:
429
+ # # Load data from Google Sheets
430
+ # job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
431
+ # job_data = job_worksheet.get_all_values()
432
+ # candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
433
+ # candidate_data = candidate_worksheet.get_all_values()
434
+
435
+ # # Convert to DataFrames
436
+ # jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
437
+ # jobs_df = jobs_df.drop(["Link"],axis = 1)
438
+ # candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
439
+ # candidates_df = candidates_df.fillna("Unknown")
440
+
441
+ # # Display data preview
442
+ # with st.expander("Preview uploaded data"):
443
+ # st.subheader("Jobs Data Preview")
444
+ # st.dataframe(jobs_df.head(3))
445
+
446
+ # st.subheader("Candidates Data Preview")
447
+ # st.dataframe(candidates_df.head(3))
448
+
449
+ # # Map column names if needed
450
+ # column_mapping = {
451
+ # "Full Name": "Full Name",
452
+ # "LinkedIn URL": "LinkedIn URL",
453
+ # "Current Title & Company": "Current Title & Company",
454
+ # "Years of Experience": "Years of Experience",
455
+ # "Degree & University": "Degree & University",
456
+ # "Key Tech Stack": "Key Tech Stack",
457
+ # "Key Highlights": "Key Highlights",
458
+ # "Location (from most recent experience)": "Location (from most recent experience)"
459
+ # }
460
+
461
+ # # Rename columns if they don't match expected
462
+ # candidates_df = candidates_df.rename(columns={
463
+ # col: mapping for col, mapping in column_mapping.items()
464
+ # if col in candidates_df.columns and col != mapping
465
+ # })
466
+
467
+ # # Now, instead of processing all jobs upfront, we'll display job selection
468
+ # # and only process the selected job when the user chooses it
469
+ # display_job_selection(jobs_df, candidates_df, job_sheet)
470
+
471
+ # except Exception as e:
472
+ # st.error(f"Error processing files: {e}")
473
+
474
+ # st.divider()
475
+
476
+
477
+ # def display_job_selection(jobs_df, candidates_df, sh):
478
+ # # Initialize session state variables if they don't exist
479
+ # if 'Selected_Candidates' not in st.session_state:
480
+ # st.session_state.Selected_Candidates = {}
481
+ # if 'llm_chain' not in st.session_state:
482
+ # st.session_state.llm_chain = setup_llm()
483
+
484
+ # st.subheader("Select a job to view potential matches")
485
+
486
+ # # Create job options
487
+ # job_options = []
488
+ # for i, row in jobs_df.iterrows():
489
+ # job_options.append(f"{row['Role']} at {row['Company']}")
490
+
491
+ # if job_options:
492
+ # selected_job_index = st.selectbox("Jobs:",
493
+ # range(len(job_options)),
494
+ # format_func=lambda x: job_options[x])
495
+
496
+ # # Display job details
497
+ # job_row = jobs_df.iloc[selected_job_index]
498
+
499
+ # # Parse tech stack for display
500
+ # job_row_stack = parse_tech_stack(job_row["Tech Stack"])
501
+
502
+ # col1, col2 = st.columns([2, 1])
503
+
504
+ # with col1:
505
+ # st.subheader(f"Job Details: {job_row['Role']}")
506
+
507
+ # job_details = {
508
+ # "Company": job_row["Company"],
509
+ # "Role": job_row["Role"],
510
+ # "Description": job_row.get("One liner", "N/A"),
511
+ # "Locations": job_row.get("Locations", "N/A"),
512
+ # "Industry": job_row.get("Industry", "N/A"),
513
+ # "Tech Stack": display_tech_stack(job_row_stack)
514
+ # }
515
+
516
+ # for key, value in job_details.items():
517
+ # st.markdown(f"**{key}:** {value}")
518
+
519
+ # # Create a key for this job in session state
520
+ # job_key = f"job_{selected_job_index}_processed"
521
+
522
+ # if job_key not in st.session_state:
523
+ # st.session_state[job_key] = False
524
+
525
+ # # Create worksheet name
526
+ # sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
527
+
528
+ # # Check if worksheet exists and has data
529
+ # worksheet_exists = False
530
+ # existing_candidates = []
531
+
532
+ # try:
533
+ # cand_worksheet = sh.worksheet(sheet_name)
534
+ # worksheet_exists = True
535
+ # # Get existing data if worksheet exists
536
+ # existing_data = cand_worksheet.get_all_values()
537
+ # if len(existing_data) > 1: # Has data beyond header
538
+ # existing_candidates = existing_data[1:]
539
+ # st.session_state[job_key] = True
540
+ # # Don't show the info message about existing data
541
+ # except gspread.exceptions.WorksheetNotFound:
542
+ # pass
543
+
544
+ # # Add a process button for this job
545
+ # if not st.session_state[job_key]:
546
+ # if st.button(f"Find Matching Candidates for this Job"):
547
+ # if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
548
+ # st.error("Please enter your OpenAI API key in the sidebar before processing")
549
+ # else:
550
+ # # Process candidates for this job (only when requested)
551
+ # with st.spinner("Processing candidates..."):
552
+ # selected_candidates = process_candidates_for_job(
553
+ # job_row,
554
+ # candidates_df,
555
+ # st.session_state.llm_chain
556
+ # )
557
+ # selected_candidates.sort(key=lambda x: x["Fit Score"], reverse=True)
558
+
559
+ # # Only create worksheet if we have candidates
560
+ # if selected_candidates:
561
+ # try:
562
+ # if not worksheet_exists:
563
+ # cand_worksheet = sh.add_worksheet(title=sheet_name, rows=10000, cols=50)
564
+
565
+ # # Prepare data for Google Sheet
566
+ # headers = list(selected_candidates[0].keys())
567
+ # rows = [headers] + [list(candidate.values()) for candidate in selected_candidates]
568
+
569
+ # # Clear existing data if any
570
+ # cand_worksheet.clear()
571
+
572
+ # # Write data to the worksheet
573
+ # cand_worksheet.update('A1', rows)
574
+
575
+ # st.success(f"Successfully processed {len(selected_candidates)} candidates")
576
+ # except Exception as e:
577
+ # st.error(f"Error writing to Google Sheet: {e}")
578
+
579
+ # # Store the results and set as processed
580
+ # st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
581
+ # st.session_state[job_key] = True
582
+
583
+ # # Force refresh
584
+ # st.rerun()
585
+
586
+ # # Display selected candidates if already processed
587
+ # if st.session_state[job_key]:
588
+ # if existing_candidates:
589
+ # # Convert existing worksheet data to our format
590
+ # headers = existing_data[0]
591
+ # selected_candidates = []
592
+ # for row in existing_data[1:]:
593
+ # candidate = dict(zip(headers, row))
594
+ # selected_candidates.append(candidate)
595
+ # st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
596
+ # elif 'Selected_Candidates' in st.session_state:
597
+ # selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
598
+ # else:
599
+ # selected_candidates = []
600
+
601
+ # # Display selected candidates
602
+ # st.subheader("Selected Candidates")
603
+
604
+ # # Display token usage statistics (only if we processed with LLM)
605
+ # if not existing_candidates and 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
606
+ # display_token_usage()
607
+
608
+ # if len(selected_candidates) > 0:
609
+ # for i, candidate in enumerate(selected_candidates):
610
+ # with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate.get('Fit Score', 'N/A')})"):
611
+ # col1, col2 = st.columns([3, 1])
612
+
613
+ # with col1:
614
+ # st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
615
+ # st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
616
+ # st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
617
+ # st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
618
+ # st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
619
+ # if 'LinkedIn' in candidate:
620
+ # st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
621
+
622
+ # with col2:
623
+ # if 'Fit Score' in candidate:
624
+ # st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
625
+
626
+ # if 'justification' in candidate:
627
+ # st.markdown("**Justification:**")
628
+ # st.info(candidate['justification'])
629
+ # else:
630
+ # st.info("No candidates found for this job.")
631
+
632
+ # # Add a reset button to start over
633
+ # if st.button("Reset and Process Again"):
634
+ # # Reset this job's processing state
635
+ # st.session_state[job_key] = False
636
+ # if 'Selected_Candidates' in st.session_state and selected_job_index in st.session_state.Selected_Candidates:
637
+ # del st.session_state.Selected_Candidates[selected_job_index]
638
+ # st.rerun()
639
+
640
+
641
+ # if __name__ == "__main__":
642
+ # main()
643
+
644
+
645
+
646
  import streamlit as st
647
  import pandas as pd
648
  import json
649
  import os
650
  from pydantic import BaseModel, Field
651
+ from typing import List, Set, Dict, Any, Optional # Already have these, but commented for brevity if not all used
652
+ import time # Added for potential small delays if needed
653
  from langchain_openai import ChatOpenAI
654
+ from langchain_core.messages import HumanMessage # Not directly used in provided snippet
655
  from langchain_core.prompts import ChatPromptTemplate
656
+ from langchain_core.output_parsers import StrOutputParser # Not directly used in provided snippet
657
+ from langchain_core.prompts import PromptTemplate # Not directly used in provided snippet
658
  import gspread
659
  import tempfile
660
  from google.oauth2 import service_account
 
667
  )
668
  os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
669
  os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
 
670
  # Define pydantic model for structured output
671
  class Shortlist(BaseModel):
672
  fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
 
678
 
679
  # Function to calculate tokens
680
  def calculate_tokens(text, model="gpt-4o-mini"):
 
681
  try:
 
682
  if "gpt-4" in model:
683
  encoding = tiktoken.encoding_for_model("gpt-4o-mini")
684
  elif "gpt-3.5" in model:
685
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
686
  else:
687
+ encoding = tiktoken.get_encoding("cl100k_base")
 
 
688
  return len(encoding.encode(text))
689
  except Exception as e:
 
690
  return len(text) // 4
691
 
692
  # Function to display token usage
693
  def display_token_usage():
 
694
  if 'total_input_tokens' not in st.session_state:
695
  st.session_state.total_input_tokens = 0
696
  if 'total_output_tokens' not in st.session_state:
 
700
  total_output = st.session_state.total_output_tokens
701
  total_tokens = total_input + total_output
702
 
703
+ model_to_check = st.session_state.get('model_name', "gpt-4o-mini") # Use a default if not set
704
+
705
+ if model_to_check == "gpt-4o-mini":
706
+ input_cost_per_1k = 0.00015 # Adjusted to example rates ($0.15 / 1M tokens)
707
+ output_cost_per_1k = 0.0006 # Adjusted to example rates ($0.60 / 1M tokens)
708
+ elif "gpt-4" in model_to_check: # Fallback for other gpt-4
709
+ input_cost_per_1k = 0.005
710
+ output_cost_per_1k = 0.015 # General gpt-4 pricing can vary
711
  else: # Assume gpt-3.5-turbo pricing
712
+ input_cost_per_1k = 0.0005 # $0.0005 per 1K input tokens
713
+ output_cost_per_1k = 0.0015 # $0.0015 per 1K output tokens
714
 
715
  estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
716
 
717
+ st.subheader("📊 Token Usage Statistics (for last processed job)")
718
 
719
  col1, col2, col3 = st.columns(3)
720
+ with col1: st.metric("Input Tokens", f"{total_input:,}")
721
+ with col2: st.metric("Output Tokens", f"{total_output:,}")
722
+ with col3: st.metric("Total Tokens", f"{total_tokens:,}")
 
 
 
 
 
 
 
723
  st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
 
724
  return total_tokens
725
 
726
  # Function to parse and normalize tech stacks
727
  def parse_tech_stack(stack):
728
+ if pd.isna(stack) or stack == "" or stack is None: return set()
729
+ if isinstance(stack, set): return stack
 
 
730
  try:
 
731
  if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
 
732
  items = stack.strip("{}").split(",")
733
  return set(item.strip().strip("'\"") for item in items if item.strip())
734
  return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
 
737
  return set()
738
 
739
  def display_tech_stack(stack_set):
740
+ return ", ".join(sorted(list(stack_set))) if isinstance(stack_set, set) else str(stack_set)
741
+
 
742
 
743
  def get_matching_candidates(job_stack, candidates_df):
 
744
  matched = []
745
  job_stack_set = parse_tech_stack(job_stack)
 
746
  for _, candidate in candidates_df.iterrows():
747
  candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
748
  common = job_stack_set & candidate_stack
749
+ if len(common) >= 2: # Original condition
750
  matched.append({
751
+ "Name": candidate["Full Name"], "URL": candidate["LinkedIn URL"],
 
752
  "Degree & Education": candidate["Degree & University"],
753
  "Years of Experience": candidate["Years of Experience"],
754
  "Current Title & Company": candidate['Current Title & Company'],
755
  "Key Highlights": candidate["Key Highlights"],
756
  "Location": candidate["Location (from most recent experience)"],
757
+ "Experience": str(candidate["Experience"]), "Tech Stack": candidate_stack
 
758
  })
759
  return matched
760
 
 
782
  # Create system prompt
783
  system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
784
  the profile is according to job.
785
+ First of all check the location of the candidate, if the location is not in the range of the job location then reject the candidate directly without any further analysis.
786
+ for example if the job location is New York and the candidate is in San Francisco then reject the candidate. Similarly for other states as well.
787
  Try to ensure following points while estimating the candidate's fit score:
788
  For education:
789
  Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
790
  Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
791
  Tier3 - Unknown or unranked institutions - Lower points or reject
 
 
792
  Startup Experience Requirement:
793
  Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
794
  preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
 
 
 
795
  The fit score signifies based on following metrics:
796
  1–5 - Poor Fit - Auto-reject
797
  6–7 - Weak Fit - Auto-reject
798
  8.0–8.7 - Moderate Fit - Auto-reject
799
  8.8–10 - STRONG Fit - Include in results
 
800
  Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
801
  """
802
 
 
816
  Tech Stack: {Tech_Stack}
817
  Industry: {Industry}
818
 
 
819
  Candidate Details:
820
  Full Name: {Full_Name}
821
  LinkedIn URL: {LinkedIn_URL}
 
826
  Key Highlights: {Key_Highlights}
827
  Location (from most recent experience): {cand_Location}
828
  Past_Experience: {Experience}
 
 
829
  Answer in the structured manner as per the schema.
830
  If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
831
  The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
 
838
  return cat_class
839
 
840
  def call_llm(candidate_data, job_data, llm_chain):
 
841
  try:
842
+ job_tech_stack = ", ".join(sorted(list(job_data.get("Tech_Stack", set())))) if isinstance(job_data.get("Tech_Stack"), set) else job_data.get("Tech_Stack", "")
843
+ candidate_tech_stack = ", ".join(sorted(list(candidate_data.get("Tech Stack", set())))) if isinstance(candidate_data.get("Tech Stack"), set) else candidate_data.get("Tech Stack", "")
 
 
 
 
 
 
 
844
 
 
845
  payload = {
846
+ "Company": job_data.get("Company", ""), "Role": job_data.get("Role", ""),
847
+ "desc": job_data.get("desc", ""), "Locations": job_data.get("Locations", ""),
848
+ "Tech_Stack": job_tech_stack, "Industry": job_data.get("Industry", ""),
849
+ "Full_Name": candidate_data.get("Name", ""), "LinkedIn_URL": candidate_data.get("URL", ""),
 
 
 
 
 
850
  "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
851
  "Years_of_Experience": candidate_data.get("Years of Experience", ""),
852
  "Degree_University": candidate_data.get("Degree & Education", ""),
853
+ "Key_Tech_Stack": candidate_tech_stack, "Key_Highlights": candidate_data.get("Key Highlights", ""),
854
+ "cand_Location": candidate_data.get("Location", ""), "Experience": candidate_data.get("Experience", "")
 
 
855
  }
 
 
856
  payload_str = json.dumps(payload)
 
 
857
  input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
 
 
858
  response = llm_chain.invoke(payload)
859
+ # print(candidate_data.get("Experience", "")) # Kept for your debugging if needed
860
+
861
+ response_str = f"candidate_name: {response.candidate_name} ... fit_score: {float(f'{response.fit_score:.3f}')} ..." # Truncated
 
 
 
 
 
 
 
 
 
 
862
  output_tokens = calculate_tokens(response_str, st.session_state.model_name)
863
 
864
+ if 'total_input_tokens' not in st.session_state: st.session_state.total_input_tokens = 0
865
+ if 'total_output_tokens' not in st.session_state: st.session_state.total_output_tokens = 0
 
 
 
 
866
  st.session_state.total_input_tokens += input_tokens
867
  st.session_state.total_output_tokens += output_tokens
868
 
 
869
  return {
870
+ "candidate_name": response.candidate_name, "candidate_url": response.candidate_url,
871
+ "candidate_summary": response.candidate_summary, "candidate_location": response.candidate_location,
872
+ "fit_score": response.fit_score, "justification": response.justification
 
 
 
873
  }
874
  except Exception as e:
875
+ st.error(f"Error calling LLM for {candidate_data.get('Name', 'Unknown')}: {e}")
 
876
  return {
877
+ "candidate_name": candidate_data.get("Name", "Unknown"), "candidate_url": candidate_data.get("URL", ""),
878
+ "candidate_summary": "Error processing candidate profile", "candidate_location": candidate_data.get("Location", "Unknown"),
879
+ "fit_score": 0.0, "justification": f"Error in LLM processing: {str(e)}"
 
 
 
880
  }
881
 
882
  def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
883
+ st.session_state.total_input_tokens = 0 # Reset for this job
 
 
884
  st.session_state.total_output_tokens = 0
885
+
886
  if llm_chain is None:
887
+ with st.spinner("Setting up LLM..."): llm_chain = setup_llm()
 
888
 
889
  selected_candidates = []
890
+ job_data = {
891
+ "Company": job_row["Company"], "Role": job_row["Role"], "desc": job_row.get("One liner", ""),
892
+ "Locations": job_row.get("Locations", ""), "Tech_Stack": job_row["Tech Stack"], "Industry": job_row.get("Industry", "")
893
+ }
894
 
895
+ with st.spinner("Finding matching candidates based on tech stack..."):
896
+ matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
897
+
898
+ if not matching_candidates:
899
+ st.warning("No candidates with matching tech stack found for this job.")
900
+ return []
901
+
902
+ st.success(f"Found {len(matching_candidates)} candidates with matching tech stack. Evaluating with LLM...")
903
+
904
+ candidates_progress = st.progress(0)
905
+ candidate_status = st.empty() # For live updates
906
+
907
+ for i, candidate_data in enumerate(matching_candidates):
908
+ # *** MODIFICATION: Check for stop flag ***
909
+ if st.session_state.get('stop_processing_flag', False):
910
+ candidate_status.warning("Processing stopped by user.")
911
+ time.sleep(1) # Allow message to be seen
912
+ break
913
+
914
+ candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
915
+ response = call_llm(candidate_data, job_data, llm_chain)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
916
 
917
+ response_dict = {
918
+ "Name": response["candidate_name"], "LinkedIn": response["candidate_url"],
919
+ "summary": response["candidate_summary"], "Location": response["candidate_location"],
920
+ "Fit Score": float(f"{response['fit_score']:.3f}"), "justification": response["justification"],
921
+ "Educational Background": candidate_data.get("Degree & Education", ""),
922
+ "Years of Experience": candidate_data.get("Years of Experience", ""),
923
+ "Current Title & Company": candidate_data.get("Current Title & Company", "")
924
+ }
925
 
926
+ # *** MODIFICATION: Live output of candidate dicts - will disappear on rerun after processing ***
927
+ if response["fit_score"] >= 8.800:
928
+ selected_candidates.append(response_dict)
929
+ # This st.markdown will be visible during processing and cleared on the next full script rerun
930
+ # after this processing block finishes or is stopped.
931
+ st.markdown(
932
+ f"**Selected Candidate:** [{response_dict['Name']}]({response_dict['LinkedIn']}) "
933
+ f"(Score: {response_dict['Fit Score']:.3f}, Location: {response_dict['Location']})"
934
+ )
935
+ else:
936
+ # This st.write will also be visible during processing and cleared later.
937
+ st.write(f"Rejected candidate: {response_dict['Name']} with score: {response_dict['Fit Score']:.3f}, Location: {response_dict['Location']})")
938
+ candidates_progress.progress((i + 1) / len(matching_candidates))
939
+
940
+ candidates_progress.empty()
941
+ candidate_status.empty()
942
+
943
+ if not st.session_state.get('stop_processing_flag', False): # Only show if not stopped
944
  if selected_candidates:
945
+ st.success(f"✅ LLM evaluation complete. Found {len(selected_candidates)} suitable candidates for this job!")
946
  else:
947
+ st.info("LLM evaluation complete. No candidates met the minimum fit score threshold for this job.")
948
+
949
+ return selected_candidates
950
+
 
 
 
 
951
 
952
  def main():
953
  st.title("👨‍💻 Candidate Matching App")
954
+ if 'processed_jobs' not in st.session_state: st.session_state.processed_jobs = {} # May not be used with new logic
955
+ if 'Selected_Candidates' not in st.session_state: st.session_state.Selected_Candidates = {}
956
+ if 'llm_chain' not in st.session_state: st.session_state.llm_chain = None # Initialize to None
957
+ # *** MODIFICATION: Initialize stop flag ***
958
+ if 'stop_processing_flag' not in st.session_state: st.session_state.stop_processing_flag = False
959
+
960
+
961
+ st.write("This app matches job listings with candidate profiles...")
962
 
 
 
 
 
 
 
 
 
 
 
963
  with st.sidebar:
964
  st.header("API Configuration")
965
+ api_key = st.text_input("Enter OpenAI API Key", type="password", key="api_key_input")
966
  if api_key:
967
  os.environ["OPENAI_API_KEY"] = api_key
968
+ # Initialize LLM chain once API key is set
969
+ if st.session_state.llm_chain is None:
970
+ with st.spinner("Setting up LLM..."):
971
+ st.session_state.llm_chain = setup_llm()
972
+ st.success("API Key set")
973
  else:
974
  st.warning("Please enter OpenAI API Key to use LLM features")
975
+ st.session_state.llm_chain = None # Clear chain if key removed
976
 
977
+
978
+ # ... (rest of your gspread setup) ...
979
+ try:
980
+ SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json' # Ensure this path is correct
981
+ SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
982
+ creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
983
+ gc = gspread.authorize(creds)
984
+ job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
985
+ candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
986
+ except Exception as e:
987
+ st.error(f"Failed to connect to Google Sheets. Ensure '{SERVICE_ACCOUNT_FILE}' is valid and has permissions. Error: {e}")
988
+ st.stop()
989
+
990
+
991
+ if not os.environ.get("OPENAI_API_KEY"):
992
  st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
993
+ st.stop()
994
+ if st.session_state.llm_chain is None and os.environ.get("OPENAI_API_KEY"):
995
+ with st.spinner("Setting up LLM..."):
996
+ st.session_state.llm_chain = setup_llm()
997
+ st.rerun() # Rerun to ensure LLM is ready for the main display logic
998
 
999
+ try:
1000
+ job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
1001
+ job_data = job_worksheet.get_all_values()
1002
+ candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
1003
+ candidate_data = candidate_worksheet.get_all_values()
1004
+
1005
+ jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0]).drop(["Link"], axis=1, errors='ignore')
1006
+ candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0]).fillna("Unknown")
1007
+ candidates_df.drop_duplicates(subset=['LinkedIn URL'], keep='first', inplace=True)
1008
+
1009
+ with st.expander("Preview uploaded data"):
1010
+ st.subheader("Jobs Data Preview"); st.dataframe(jobs_df.head(3))
1011
+ st.subheader("Candidates Data Preview"); st.dataframe(candidates_df.head(3))
1012
+
1013
+ # Column mapping (simplified, ensure your CSVs have these exact names or adjust)
1014
+ # candidates_df = candidates_df.rename(columns={...}) # Add if needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1015
 
1016
+ display_job_selection(jobs_df, candidates_df, job_sheet) # job_sheet is 'sh'
 
 
1017
 
1018
+ except Exception as e:
1019
+ st.error(f"Error processing files or data: {e}")
 
1020
  st.divider()
1021
 
1022
+ def display_job_selection(jobs_df, candidates_df, sh): # 'sh' is the Google Sheets client
1023
+ st.subheader("Select a job to view potential matches")
1024
+ job_options = [f"{row['Role']} at {row['Company']}" for _, row in jobs_df.iterrows()]
1025
+
1026
+ if not job_options:
1027
+ st.warning("No jobs found to display.")
1028
+ return
1029
 
1030
+ selected_job_index = st.selectbox("Jobs:", range(len(job_options)), format_func=lambda x: job_options[x], key="job_selectbox")
 
 
 
 
 
1031
 
1032
+ job_row = jobs_df.iloc[selected_job_index]
1033
+ job_row_stack = parse_tech_stack(job_row["Tech Stack"]) # Assuming parse_tech_stack is defined
1034
 
1035
+ col_job_details_display, _ = st.columns([2,1])
1036
+ with col_job_details_display:
1037
+ st.subheader(f"Job Details: {job_row['Role']}")
1038
+ job_details_dict = {
1039
+ "Company": job_row["Company"], "Role": job_row["Role"], "Description": job_row.get("One liner", "N/A"),
1040
+ "Locations": job_row.get("Locations", "N/A"), "Industry": job_row.get("Industry", "N/A"),
1041
+ "Tech Stack": display_tech_stack(job_row_stack) # Assuming display_tech_stack is defined
1042
+ }
1043
+ for key, value in job_details_dict.items(): st.markdown(f"**{key}:** {value}")
1044
+
1045
+ # State keys for the selected job
1046
+ job_processed_key = f"job_{selected_job_index}_processed_successfully"
1047
+ job_is_processing_key = f"job_{selected_job_index}_is_currently_processing"
1048
+
1049
+ # Initialize states if they don't exist for this job
1050
+ if job_processed_key not in st.session_state: st.session_state[job_processed_key] = False
1051
+ if job_is_processing_key not in st.session_state: st.session_state[job_is_processing_key] = False
1052
 
1053
+ sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
1054
+ worksheet_exists = False
1055
+ existing_candidates_from_sheet = [] # This will store raw data from sheet
1056
+ try:
1057
+ cand_worksheet = sh.worksheet(sheet_name)
1058
+ worksheet_exists = True
1059
+ existing_data = cand_worksheet.get_all_values() # Get all values as list of lists
1060
+ if len(existing_data) > 1: # Has data beyond header
1061
+ existing_candidates_from_sheet = existing_data # Store raw data
1062
+ except gspread.exceptions.WorksheetNotFound:
1063
+ pass
1064
+
1065
+ # --- Processing Control Area ---
1066
+ # Show controls if not successfully processed in this session OR if sheet exists (allow re-process/overwrite)
1067
+ if not st.session_state.get(job_processed_key, False) or existing_candidates_from_sheet:
1068
 
1069
+ if existing_candidates_from_sheet and not st.session_state.get(job_is_processing_key, False) and not st.session_state.get(job_processed_key, False):
1070
+ st.info(f"Processing ('{sheet_name}')")
1071
+
1072
+ col_find, col_stop = st.columns(2)
1073
+ with col_find:
1074
+ if st.button(f"Find Matching Candidates for this Job", key=f"find_btn_{selected_job_index}", disabled=st.session_state.get(job_is_processing_key, False)):
1075
+ if not os.environ.get("OPENAI_API_KEY") or st.session_state.llm_chain is None: # Assuming llm_chain is in session_state
1076
+ st.error("OpenAI API key not set or LLM not initialized. Please check sidebar.")
1077
+ else:
1078
+ st.session_state[job_is_processing_key] = True
1079
+ st.session_state.stop_processing_flag = False # Reset for new run, assuming stop_processing_flag is used
1080
+ st.session_state.Selected_Candidates[selected_job_index] = [] # Clear previous run for this job
1081
+ st.session_state[job_processed_key] = False # Mark as not successfully processed yet for this attempt
1082
+ st.rerun()
1083
 
1084
+ with col_stop:
1085
+ if st.session_state.get(job_is_processing_key, False): # Show STOP only if "Find" was clicked and currently processing
1086
+ if st.button("STOP Processing", key=f"stop_btn_{selected_job_index}"):
1087
+ st.session_state.stop_processing_flag = True # Assuming stop_processing_flag is used
1088
+ st.warning("Stop request sent. Processing will halt shortly.")
1089
+
1090
+ # --- Actual Processing Logic ---
1091
+ if st.session_state.get(job_is_processing_key, False):
1092
+ with st.spinner(f"Processing candidates for {job_row['Role']} at {job_row['Company']}..."):
1093
+ # Assuming process_candidates_for_job is defined and handles stop_processing_flag
1094
+ processed_candidates_list = process_candidates_for_job(
1095
+ job_row, candidates_df, st.session_state.llm_chain # Assuming llm_chain from session_state
1096
+ )
1097
 
1098
+ st.session_state[job_is_processing_key] = False # Mark as no longer actively processing
1099
+
1100
+ if not st.session_state.get('stop_processing_flag', False): # If processing was NOT stopped
1101
+ if processed_candidates_list:
1102
+ # Ensure Fit Score is float for reliable sorting
1103
+ for cand in processed_candidates_list:
1104
+ if 'Fit Score' in cand and isinstance(cand['Fit Score'], str):
1105
+ try: cand['Fit Score'] = float(cand['Fit Score'])
1106
+ except ValueError: cand['Fit Score'] = 0.0 # Default if conversion fails
1107
+ elif 'Fit Score' not in cand:
1108
+ cand['Fit Score'] = 0.0
1109
+
1110
+ processed_candidates_list.sort(key=lambda x: x.get("Fit Score", 0.0), reverse=True)
1111
+ st.session_state.Selected_Candidates[selected_job_index] = processed_candidates_list
1112
+ st.session_state[job_processed_key] = True # Mark as successfully processed
1113
+
1114
+ # Save to Google Sheet
1115
+ try:
1116
+ target_worksheet = None
1117
+ if not worksheet_exists:
1118
+ target_worksheet = sh.add_worksheet(title=sheet_name, rows=max(100, len(processed_candidates_list) + 10), cols=20)
1119
+ else:
1120
+ target_worksheet = sh.worksheet(sheet_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
 
1122
+ headers = list(processed_candidates_list[0].keys())
1123
+ # Ensure all values are converted to strings for gspread
1124
+ rows_to_write = [headers] + [[str(candidate.get(h, "")) for h in headers] for candidate in processed_candidates_list]
1125
+ target_worksheet.clear()
1126
+ target_worksheet.update('A1', rows_to_write)
1127
+ st.success(f"Results saved to Google Sheet: '{sheet_name}'")
1128
+ except Exception as e:
1129
+ st.error(f"Error writing to Google Sheet '{sheet_name}': {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1130
  else:
1131
+ st.info("No suitable candidates found after processing.")
1132
+ st.session_state.Selected_Candidates[selected_job_index] = []
1133
+ st.session_state[job_processed_key] = True # Mark as processed, even if no results
1134
+ else: # If processing WAS stopped
1135
+ st.info("Processing was stopped by user. Results (if any) were not saved. You can try processing again.")
1136
+ st.session_state.Selected_Candidates[selected_job_index] = [] # Clear any partial results
1137
+ st.session_state[job_processed_key] = False # Not successfully processed
1138
+
1139
+ st.session_state.pop('stop_processing_flag', None) # Clean up flag
1140
+ st.rerun() # Rerun to update UI based on new state
1141
+
1142
+ # --- Display Results Area ---
1143
+ should_display_results_area = False
1144
+ final_candidates_to_display = [] # Initialize to ensure it's always defined
1145
+
1146
+ if st.session_state.get(job_is_processing_key, False):
1147
+ should_display_results_area = False # Not if actively processing
1148
+ elif st.session_state.get(job_processed_key, False): # If successfully processed in this session
1149
+ should_display_results_area = True
1150
+ final_candidates_to_display = st.session_state.Selected_Candidates.get(selected_job_index, [])
1151
+ elif existing_candidates_from_sheet: # If not processed in this session, but sheet has data
1152
+ should_display_results_area = True
1153
+ headers = existing_candidates_from_sheet[0]
1154
+ parsed_sheet_candidates = []
1155
+ for row_idx, row_data in enumerate(existing_candidates_from_sheet[1:]): # Skip header row
1156
+ candidate_dict = {}
1157
+ for col_idx, header_name in enumerate(headers):
1158
+ candidate_dict[header_name] = row_data[col_idx] if col_idx < len(row_data) else None
 
 
 
 
1159
 
1160
+ # Convert Fit Score from string to float for consistent handling
1161
+ if 'Fit Score' in candidate_dict and isinstance(candidate_dict['Fit Score'], str):
1162
+ try:
1163
+ candidate_dict['Fit Score'] = float(candidate_dict['Fit Score'])
1164
+ except ValueError:
1165
+ st.warning(f"Could not convert Fit Score '{candidate_dict['Fit Score']}' to float for candidate in sheet row {row_idx+2}.")
1166
+ candidate_dict['Fit Score'] = 0.0 # Default if conversion fails
1167
+ elif 'Fit Score' not in candidate_dict:
1168
+ candidate_dict['Fit Score'] = 0.0
1169
+
1170
+
1171
+ parsed_sheet_candidates.append(candidate_dict)
1172
+ final_candidates_to_display = sorted(parsed_sheet_candidates, key=lambda x: x.get("Fit Score", 0.0), reverse=True)
1173
+ if not st.session_state.get(job_processed_key, False): # Inform if loading from sheet and not explicitly processed
1174
+ st.info(f"Displaying: '{sheet_name}'.")
1175
+
1176
+ if should_display_results_area:
1177
+ st.subheader("Selected Candidates")
1178
+
1179
+ # Display token usage if it was just processed (job_processed_key is True and tokens exist)
1180
+ if st.session_state.get(job_processed_key, False) and \
1181
+ (st.session_state.get('total_input_tokens', 0) > 0 or st.session_state.get('total_output_tokens', 0) > 0):
1182
+ display_token_usage() # Assuming display_token_usage is defined
1183
 
1184
+ if final_candidates_to_display:
1185
+ for i, candidate in enumerate(final_candidates_to_display):
1186
+ score_display = candidate.get('Fit Score', 'N/A')
1187
+ if isinstance(score_display, (float, int)):
1188
+ score_display = f"{score_display:.3f}"
1189
+ # If score_display is still a string (e.g. 'N/A' or failed float conversion), it will be displayed as is.
1190
+
1191
+ expander_title = f"{i+1}. {candidate.get('Name', 'N/A')} (Score: {score_display})"
1192
+
1193
+ with st.expander(expander_title):
1194
+ text_to_copy = f"""Candidate: {candidate.get('Name', 'N/A')} (Score: {score_display})
1195
+ Summary: {candidate.get('summary', 'N/A')}
1196
+ Current: {candidate.get('Current Title & Company', 'N/A')}
1197
+ Education: {candidate.get('Educational Background', 'N/A')}
1198
+ Experience: {candidate.get('Years of Experience', 'N/A')}
1199
+ Location: {candidate.get('Location', 'N/A')}
1200
+ LinkedIn: {candidate.get('LinkedIn', 'N/A')}
1201
+ Justification: {candidate.get('justification', 'N/A')}
1202
+ """
1203
+ js_text_to_copy = json.dumps(text_to_copy)
1204
+ button_unique_id = f"copy_btn_job{selected_job_index}_cand{i}"
1205
+
1206
+ copy_button_html = f"""
1207
+ <script>
1208
+ function copyToClipboard_{button_unique_id}() {{
1209
+ const textToCopy = {js_text_to_copy};
1210
+ navigator.clipboard.writeText(textToCopy).then(function() {{
1211
+ const btn = document.getElementById('{button_unique_id}');
1212
+ if (btn) {{ // Check if button exists
1213
+ const originalText = btn.innerText;
1214
+ btn.innerText = 'Copied!';
1215
+ setTimeout(function() {{ btn.innerText = originalText; }}, 1500);
1216
+ }}
1217
+ }}, function(err) {{
1218
+ console.error('Could not copy text: ', err);
1219
+ alert('Failed to copy text. Please use Ctrl+C or your browser\\'s copy function.');
1220
+ }});
1221
+ }}
1222
+ </script>
1223
+ <button id="{button_unique_id}" onclick="copyToClipboard_{button_unique_id}()">📋 Copy Details</button>
1224
+ """
1225
+
1226
+ expander_cols = st.columns([0.82, 0.18])
1227
+ with expander_cols[1]:
1228
+ st.components.v1.html(copy_button_html, height=40)
1229
+
1230
+ with expander_cols[0]:
1231
+ st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
1232
+ st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
1233
+ st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
1234
+ st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
1235
+ st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
1236
+ if 'LinkedIn' in candidate and candidate.get('LinkedIn'):
1237
+ st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
1238
+ else:
1239
+ st.markdown("**LinkedIn Profile:** N/A")
1240
+
1241
+ if 'justification' in candidate and candidate.get('justification'):
1242
+ st.markdown("**Justification:**")
1243
+ st.info(candidate['justification'])
1244
+
1245
+ elif st.session_state.get(job_processed_key, False): # Processed but no candidates
1246
+ st.info("No candidates met the criteria for this job after processing.")
1247
+
1248
+ # This "Reset" button is now governed by should_display_results_area
1249
+ if st.button("Reset and Process Again", key=f"reset_btn_{selected_job_index}"):
1250
+ st.session_state[job_processed_key] = False
1251
+ st.session_state.pop(job_is_processing_key, None)
1252
+ if selected_job_index in st.session_state.Selected_Candidates:
1253
+ del st.session_state.Selected_Candidates[selected_job_index]
1254
+ try:
1255
+ sh.worksheet(sheet_name).clear()
1256
+ st.info(f"Cleared Google Sheet '{sheet_name}' as part of reset.")
1257
+ except: pass # Ignore if sheet not found or error
1258
+ st.rerun()
1259
 
1260
  if __name__ == "__main__":
1261
+ main()
1262
+