Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import json | |
import os | |
from pydantic import BaseModel, Field | |
from typing import List, Set, Dict, Any, Optional | |
import time | |
from langchain_openai import ChatOpenAI | |
from langchain_core.messages import HumanMessage | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import PromptTemplate | |
import gspread | |
from google.oauth2 import service_account | |
st.set_page_config( | |
page_title="Candidate Matching App", | |
page_icon="π¨βπ»π―", | |
layout="wide" | |
) | |
# Define pydantic model for structured output | |
class Shortlist(BaseModel): | |
fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.") | |
candidate_name: str = Field(description="The name of the candidate.") | |
candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.") | |
candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.") | |
candidate_location: str = Field(description="The location of the candidate.") | |
justification: str = Field(description="Justification for the shortlisted candidate with the fit score") | |
# Function to parse and normalize tech stacks | |
def parse_tech_stack(stack): | |
if pd.isna(stack) or stack == "" or stack is None: | |
return set() | |
if isinstance(stack, set): | |
return stack | |
try: | |
# Handle potential string representation of sets | |
if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"): | |
# This could be a string representation of a set | |
items = stack.strip("{}").split(",") | |
return set(item.strip().strip("'\"") for item in items if item.strip()) | |
return set(map(lambda x: x.strip().lower(), str(stack).split(','))) | |
except Exception as e: | |
st.error(f"Error parsing tech stack: {e}") | |
return set() | |
def display_tech_stack(stack_set): | |
if isinstance(stack_set, set): | |
return ", ".join(sorted(stack_set)) | |
return str(stack_set) | |
def get_matching_candidates(job_stack, candidates_df): | |
"""Find candidates with matching tech stack for a specific job""" | |
matched = [] | |
job_stack_set = parse_tech_stack(job_stack) | |
for _, candidate in candidates_df.iterrows(): | |
candidate_stack = parse_tech_stack(candidate['Key Tech Stack']) | |
common = job_stack_set & candidate_stack | |
if len(common) >= 2: | |
matched.append({ | |
"Name": candidate["Full Name"], | |
"URL": candidate["LinkedIn URL"], | |
"Degree & Education": candidate["Degree & University"], | |
"Years of Experience": candidate["Years of Experience"], | |
"Current Title & Company": candidate['Current Title & Company'], | |
"Key Highlights": candidate["Key Highlights"], | |
"Location": candidate["Location (from most recent experience)"], | |
"Experience": str(candidate["Experience"]), | |
"Tech Stack": candidate_stack | |
}) | |
return matched | |
def setup_llm(): | |
"""Set up the LangChain LLM with structured output""" | |
# Create LLM instance | |
llm = ChatOpenAI( | |
model="gpt-4o-mini", | |
temperature=0, | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
) | |
# Create structured output | |
sum_llm = llm.with_structured_output(Shortlist) | |
# Create system prompt | |
system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the | |
the profile is according to job. | |
Try to ensure following points while estimating the candidate's fit score: | |
For education: | |
Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points | |
Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points | |
Tier3 - Unknown or unranked institutions - Lower points or reject | |
Startup Experience Requirement: | |
Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D) | |
preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc. | |
The fit score signifies based on following metrics: | |
1β5 - Poor Fit - Auto-reject | |
6β7 - Weak Fit - Auto-reject | |
8.0β8.7 - Moderate Fit - Auto-reject | |
8.8β10 - STRONG Fit - Include in results | |
""" | |
# Create query prompt | |
query_prompt = ChatPromptTemplate.from_messages([ | |
("system", system), | |
("human", """ | |
You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not. | |
For this you will be provided with the follwing inputs of job and candidates: | |
Job Details | |
Company: {Company} | |
Role: {Role} | |
About Company: {desc} | |
Locations: {Locations} | |
Tech Stack: {Tech_Stack} | |
Industry: {Industry} | |
Candidate Details: | |
Full Name: {Full_Name} | |
LinkedIn URL: {LinkedIn_URL} | |
Current Title & Company: {Current_Title_Company} | |
Years of Experience: {Years_of_Experience} | |
Degree & University: {Degree_University} | |
Key Tech Stack: {Key_Tech_Stack} | |
Key Highlights: {Key_Highlights} | |
Location (from most recent experience): {cand_Location} | |
Past_Experience: {Experience} | |
Answer in the structured manner as per the schema. | |
If any parameter is Unknown try not to include in the summary, only include those parameters which are known. | |
"""), | |
]) | |
# Chain the prompt and LLM | |
cat_class = query_prompt | sum_llm | |
return cat_class | |
def call_llm(candidate_data, job_data, llm_chain): | |
"""Call the actual LLM to evaluate the candidate""" | |
try: | |
# Convert tech stacks to strings for the LLM payload | |
job_tech_stack = job_data.get("Tech_Stack", set()) | |
candidate_tech_stack = candidate_data.get("Tech Stack", set()) | |
if isinstance(job_tech_stack, set): | |
job_tech_stack = ", ".join(sorted(job_tech_stack)) | |
if isinstance(candidate_tech_stack, set): | |
candidate_tech_stack = ", ".join(sorted(candidate_tech_stack)) | |
# Prepare payload for LLM | |
payload = { | |
"Company": job_data.get("Company", ""), | |
"Role": job_data.get("Role", ""), | |
"desc": job_data.get("desc", ""), | |
"Locations": job_data.get("Locations", ""), | |
"Tech_Stack": job_tech_stack, | |
"Industry": job_data.get("Industry", ""), | |
"Full_Name": candidate_data.get("Name", ""), | |
"LinkedIn_URL": candidate_data.get("URL", ""), | |
"Current_Title_Company": candidate_data.get("Current Title & Company", ""), | |
"Years_of_Experience": candidate_data.get("Years of Experience", ""), | |
"Degree_University": candidate_data.get("Degree & Education", ""), | |
"Key_Tech_Stack": candidate_tech_stack, | |
"Key_Highlights": candidate_data.get("Key Highlights", ""), | |
"cand_Location": candidate_data.get("Location", ""), | |
"Experience": candidate_data.get("Experience", "") | |
} | |
# Call LLM | |
response = llm_chain.invoke(payload) | |
print(candidate_data.get("Experience", "")) | |
# Return response in expected format | |
return { | |
"candidate_name": response.candidate_name, | |
"candidate_url": response.candidate_url, | |
"candidate_summary": response.candidate_summary, | |
"candidate_location": response.candidate_location, | |
"fit_score": response.fit_score, | |
"justification": response.justification | |
} | |
except Exception as e: | |
st.error(f"Error calling LLM: {e}") | |
# Fallback to a default response | |
return { | |
"candidate_name": candidate_data.get("Name", "Unknown"), | |
"candidate_url": candidate_data.get("URL", ""), | |
"candidate_summary": "Error processing candidate profile", | |
"candidate_location": candidate_data.get("Location", "Unknown"), | |
"fit_score": 0.0, | |
"justification": f"Error in LLM processing: {str(e)}" | |
} | |
def process_candidates_for_job(job_row, candidates_df, llm_chain=None): | |
"""Process candidates for a specific job using the LLM""" | |
if llm_chain is None: | |
with st.spinner("Setting up LLM..."): | |
llm_chain = setup_llm() | |
selected_candidates = [] | |
try: | |
# Get job-specific data | |
job_data = { | |
"Company": job_row["Company"], | |
"Role": job_row["Role"], | |
"desc": job_row.get("One liner", ""), | |
"Locations": job_row.get("Locations", ""), | |
"Tech_Stack": job_row["Tech Stack"], | |
"Industry": job_row.get("Industry", "") | |
} | |
# Find matching candidates for this job | |
with st.spinner("Finding matching candidates based on tech stack..."): | |
matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df) | |
if not matching_candidates: | |
st.warning("No candidates with matching tech stack found for this job.") | |
return [] | |
st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.") | |
# Create progress elements | |
candidates_progress = st.progress(0) | |
candidate_status = st.empty() | |
# Process each candidate | |
for i, candidate_data in enumerate(matching_candidates): | |
# Update progress | |
candidates_progress.progress((i + 1) / len(matching_candidates)) | |
candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}") | |
# Process the candidate with the LLM | |
response = call_llm(candidate_data, job_data, llm_chain) | |
response_dict = { | |
"Name": response["candidate_name"], | |
"LinkedIn": response["candidate_url"], | |
"summary": response["candidate_summary"], | |
"Location": response["candidate_location"], | |
"Fit Score": response["fit_score"], | |
"justification": response["justification"], | |
# Add back original candidate data for context | |
"Educational Background": candidate_data.get("Degree & Education", ""), | |
"Years of Experience": candidate_data.get("Years of Experience", ""), | |
"Current Title & Company": candidate_data.get("Current Title & Company", "") | |
} | |
# Add to selected candidates if score is high enough | |
if response["fit_score"] >= 8.8: | |
selected_candidates.append(response_dict) | |
st.markdown(response_dict) | |
else: | |
st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}") | |
# Clear progress indicators | |
candidates_progress.empty() | |
candidate_status.empty() | |
# Show results | |
if selected_candidates: | |
st.success(f"β Found {len(selected_candidates)} suitable candidates for this job!") | |
else: | |
st.info("No candidates met the minimum fit score threshold for this job.") | |
return selected_candidates | |
except Exception as e: | |
st.error(f"Error processing job: {e}") | |
return [] | |
def main(): | |
st.title("π¨βπ» Candidate Matching App") | |
# Initialize session state | |
if 'processed_jobs' not in st.session_state: | |
st.session_state.processed_jobs = {} | |
st.write(""" | |
This app matches job listings with candidate profiles based on tech stack and other criteria. | |
Select a job to find matching candidates. | |
""") | |
# API Key input | |
with st.sidebar: | |
st.header("API Configuration") | |
api_key = st.text_input("Enter OpenAI API Key", type="password") | |
if api_key: | |
os.environ["OPENAI_API_KEY"] = api_key | |
st.success("API Key set!") | |
else: | |
st.warning("Please enter OpenAI API Key to use LLM features") | |
# Show API key warning if not set | |
secret_content = os.getenv("GCP_SERVICE_ACCOUNT") | |
# secret_content = secret_content.replace("\n", "\\n") | |
secret_content = json.loads(secret_content) | |
SCOPES = ['https://www.googleapis.com/auth/spreadsheets'] | |
creds = service_account.Credentials.from_service_account_info(secret_content, scopes=SCOPES) | |
gc = gspread.authorize(creds) | |
job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') | |
candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4') | |
if not api_key: | |
st.warning("β οΈ You need to provide an OpenAI API key in the sidebar to use this app.") | |
if api_key: | |
try: | |
# Load data from Google Sheets | |
job_worksheet = job_sheet.worksheet('paraform_jobs_formatted') | |
job_data = job_worksheet.get_all_values() | |
candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated') | |
candidate_data = candidate_worksheet.get_all_values() | |
# Convert to DataFrames | |
jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0]) | |
candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0]) | |
candidates_df = candidates_df.fillna("Unknown") | |
# Display data preview | |
with st.expander("Preview uploaded data"): | |
st.subheader("Jobs Data Preview") | |
st.dataframe(jobs_df.head(3)) | |
st.subheader("Candidates Data Preview") | |
st.dataframe(candidates_df.head(3)) | |
# Map column names if needed | |
column_mapping = { | |
"Full Name": "Full Name", | |
"LinkedIn URL": "LinkedIn URL", | |
"Current Title & Company": "Current Title & Company", | |
"Years of Experience": "Years of Experience", | |
"Degree & University": "Degree & University", | |
"Key Tech Stack": "Key Tech Stack", | |
"Key Highlights": "Key Highlights", | |
"Location (from most recent experience)": "Location (from most recent experience)" | |
} | |
# Rename columns if they don't match expected | |
candidates_df = candidates_df.rename(columns={ | |
col: mapping for col, mapping in column_mapping.items() | |
if col in candidates_df.columns and col != mapping | |
}) | |
# Now, instead of processing all jobs upfront, we'll display job selection | |
# and only process the selected job when the user chooses it | |
display_job_selection(jobs_df, candidates_df) | |
except Exception as e: | |
st.error(f"Error processing files: {e}") | |
st.divider() | |
def display_job_selection(jobs_df, candidates_df): | |
# Store the LLM chain as a session state to avoid recreating it | |
if 'llm_chain' not in st.session_state: | |
st.session_state.llm_chain = None | |
st.subheader("Select a job to view potential matches") | |
# Create job options - but don't compute matches yet | |
job_options = [] | |
for i, row in jobs_df.iterrows(): | |
job_options.append(f"{row['Role']} at {row['Company']}") | |
if job_options: | |
selected_job_index = st.selectbox("Jobs:", | |
range(len(job_options)), | |
format_func=lambda x: job_options[x]) | |
# Display job details | |
job_row = jobs_df.iloc[selected_job_index] | |
# Parse tech stack for display | |
job_row_stack = parse_tech_stack(job_row["Tech Stack"]) | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
st.subheader(f"Job Details: {job_row['Role']}") | |
job_details = { | |
"Company": job_row["Company"], | |
"Role": job_row["Role"], | |
"Description": job_row.get("One liner", "N/A"), | |
"Locations": job_row.get("Locations", "N/A"), | |
"Industry": job_row.get("Industry", "N/A"), | |
"Tech Stack": display_tech_stack(job_row_stack) | |
} | |
for key, value in job_details.items(): | |
st.markdown(f"**{key}:** {value}") | |
# Create a key for this job in session state | |
job_key = f"job_{selected_job_index}_processed" | |
if job_key not in st.session_state: | |
st.session_state[job_key] = False | |
# Add a process button for this job | |
if not st.session_state[job_key]: | |
if st.button(f"Find Matching Candidates for this Job"): | |
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]: | |
st.error("Please enter your OpenAI API key in the sidebar before processing") | |
else: | |
# Process candidates for this job (only when requested) | |
selected_candidates = process_candidates_for_job( | |
job_row, | |
candidates_df, | |
st.session_state.llm_chain | |
) | |
# Store the results and set as processed | |
if 'Selected_Candidates' not in st.session_state: | |
st.session_state.Selected_Candidates = {} | |
st.session_state.Selected_Candidates[selected_job_index] = selected_candidates | |
st.session_state[job_key] = True | |
# Store the LLM chain for reuse | |
if st.session_state.llm_chain is None: | |
st.session_state.llm_chain = setup_llm() | |
# Force refresh | |
st.rerun() | |
# Display selected candidates if already processed | |
if st.session_state[job_key] and 'Selected_Candidates' in st.session_state: | |
selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, []) | |
# Display selected candidates | |
st.subheader("Selected Candidates") | |
if len(selected_candidates) > 0: | |
for i, candidate in enumerate(selected_candidates): | |
with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"): | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
st.markdown(f"**Summary:** {candidate['summary']}") | |
st.markdown(f"**Current:** {candidate['Current Title & Company']}") | |
st.markdown(f"**Education:** {candidate['Educational Background']}") | |
st.markdown(f"**Experience:** {candidate['Years of Experience']}") | |
st.markdown(f"**Location:** {candidate['Location']}") | |
st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**") | |
with col2: | |
st.markdown(f"**Fit Score:** {candidate['Fit Score']}") | |
st.markdown("**Justification:**") | |
st.info(candidate['justification']) | |
else: | |
st.info("No candidates met the minimum score threshold (8.8) for this job.") | |
# We don't show tech-matched candidates here since they are generated | |
# during the LLM matching process now | |
# Add a reset button to start over | |
if st.button("Reset and Process Again"): | |
st.session_state[job_key] = False | |
st.rerun() | |
if __name__ == "__main__": | |
main() |