import streamlit as st import pandas as pd import json import os from pydantic import BaseModel, Field from typing import List, Set, Dict, Any, Optional import time from langchain_openai import ChatOpenAI from langchain_core.messages import HumanMessage from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import PromptTemplate import gspread from google.oauth2 import service_account st.set_page_config( page_title="Candidate Matching App", page_icon="👨‍💻🎯", layout="wide" ) # Define pydantic model for structured output class Shortlist(BaseModel): fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.") candidate_name: str = Field(description="The name of the candidate.") candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.") candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.") candidate_location: str = Field(description="The location of the candidate.") justification: str = Field(description="Justification for the shortlisted candidate with the fit score") # Function to parse and normalize tech stacks def parse_tech_stack(stack): if pd.isna(stack) or stack == "" or stack is None: return set() if isinstance(stack, set): return stack try: # Handle potential string representation of sets if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"): # This could be a string representation of a set items = stack.strip("{}").split(",") return set(item.strip().strip("'\"") for item in items if item.strip()) return set(map(lambda x: x.strip().lower(), str(stack).split(','))) except Exception as e: st.error(f"Error parsing tech stack: {e}") return set() def display_tech_stack(stack_set): if isinstance(stack_set, set): return ", ".join(sorted(stack_set)) return str(stack_set) def get_matching_candidates(job_stack, candidates_df): """Find candidates with matching tech stack for a specific job""" matched = [] job_stack_set = parse_tech_stack(job_stack) for _, candidate in candidates_df.iterrows(): candidate_stack = parse_tech_stack(candidate['Key Tech Stack']) common = job_stack_set & candidate_stack if len(common) >= 2: matched.append({ "Name": candidate["Full Name"], "URL": candidate["LinkedIn URL"], "Degree & Education": candidate["Degree & University"], "Years of Experience": candidate["Years of Experience"], "Current Title & Company": candidate['Current Title & Company'], "Key Highlights": candidate["Key Highlights"], "Location": candidate["Location (from most recent experience)"], "Experience": str(candidate["Experience"]), "Tech Stack": candidate_stack }) return matched def setup_llm(): """Set up the LangChain LLM with structured output""" # Create LLM instance llm = ChatOpenAI( model="gpt-4o-mini", temperature=0, max_tokens=None, timeout=None, max_retries=2, ) # Create structured output sum_llm = llm.with_structured_output(Shortlist) # Create system prompt system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the the profile is according to job. Try to ensure following points while estimating the candidate's fit score: For education: Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points Tier3 - Unknown or unranked institutions - Lower points or reject Startup Experience Requirement: Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D) preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc. The fit score signifies based on following metrics: 1–5 - Poor Fit - Auto-reject 6–7 - Weak Fit - Auto-reject 8.0–8.7 - Moderate Fit - Auto-reject 8.8–10 - STRONG Fit - Include in results """ # Create query prompt query_prompt = ChatPromptTemplate.from_messages([ ("system", system), ("human", """ You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not. For this you will be provided with the follwing inputs of job and candidates: Job Details Company: {Company} Role: {Role} About Company: {desc} Locations: {Locations} Tech Stack: {Tech_Stack} Industry: {Industry} Candidate Details: Full Name: {Full_Name} LinkedIn URL: {LinkedIn_URL} Current Title & Company: {Current_Title_Company} Years of Experience: {Years_of_Experience} Degree & University: {Degree_University} Key Tech Stack: {Key_Tech_Stack} Key Highlights: {Key_Highlights} Location (from most recent experience): {cand_Location} Past_Experience: {Experience} Answer in the structured manner as per the schema. If any parameter is Unknown try not to include in the summary, only include those parameters which are known. """), ]) # Chain the prompt and LLM cat_class = query_prompt | sum_llm return cat_class def call_llm(candidate_data, job_data, llm_chain): """Call the actual LLM to evaluate the candidate""" try: # Convert tech stacks to strings for the LLM payload job_tech_stack = job_data.get("Tech_Stack", set()) candidate_tech_stack = candidate_data.get("Tech Stack", set()) if isinstance(job_tech_stack, set): job_tech_stack = ", ".join(sorted(job_tech_stack)) if isinstance(candidate_tech_stack, set): candidate_tech_stack = ", ".join(sorted(candidate_tech_stack)) # Prepare payload for LLM payload = { "Company": job_data.get("Company", ""), "Role": job_data.get("Role", ""), "desc": job_data.get("desc", ""), "Locations": job_data.get("Locations", ""), "Tech_Stack": job_tech_stack, "Industry": job_data.get("Industry", ""), "Full_Name": candidate_data.get("Name", ""), "LinkedIn_URL": candidate_data.get("URL", ""), "Current_Title_Company": candidate_data.get("Current Title & Company", ""), "Years_of_Experience": candidate_data.get("Years of Experience", ""), "Degree_University": candidate_data.get("Degree & Education", ""), "Key_Tech_Stack": candidate_tech_stack, "Key_Highlights": candidate_data.get("Key Highlights", ""), "cand_Location": candidate_data.get("Location", ""), "Experience": candidate_data.get("Experience", "") } # Call LLM response = llm_chain.invoke(payload) print(candidate_data.get("Experience", "")) # Return response in expected format return { "candidate_name": response.candidate_name, "candidate_url": response.candidate_url, "candidate_summary": response.candidate_summary, "candidate_location": response.candidate_location, "fit_score": response.fit_score, "justification": response.justification } except Exception as e: st.error(f"Error calling LLM: {e}") # Fallback to a default response return { "candidate_name": candidate_data.get("Name", "Unknown"), "candidate_url": candidate_data.get("URL", ""), "candidate_summary": "Error processing candidate profile", "candidate_location": candidate_data.get("Location", "Unknown"), "fit_score": 0.0, "justification": f"Error in LLM processing: {str(e)}" } def process_candidates_for_job(job_row, candidates_df, llm_chain=None): """Process candidates for a specific job using the LLM""" if llm_chain is None: with st.spinner("Setting up LLM..."): llm_chain = setup_llm() selected_candidates = [] try: # Get job-specific data job_data = { "Company": job_row["Company"], "Role": job_row["Role"], "desc": job_row.get("One liner", ""), "Locations": job_row.get("Locations", ""), "Tech_Stack": job_row["Tech Stack"], "Industry": job_row.get("Industry", "") } # Find matching candidates for this job with st.spinner("Finding matching candidates based on tech stack..."): matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df) if not matching_candidates: st.warning("No candidates with matching tech stack found for this job.") return [] st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.") # Create progress elements candidates_progress = st.progress(0) candidate_status = st.empty() # Process each candidate for i, candidate_data in enumerate(matching_candidates): # Update progress candidates_progress.progress((i + 1) / len(matching_candidates)) candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}") # Process the candidate with the LLM response = call_llm(candidate_data, job_data, llm_chain) response_dict = { "Name": response["candidate_name"], "LinkedIn": response["candidate_url"], "summary": response["candidate_summary"], "Location": response["candidate_location"], "Fit Score": response["fit_score"], "justification": response["justification"], # Add back original candidate data for context "Educational Background": candidate_data.get("Degree & Education", ""), "Years of Experience": candidate_data.get("Years of Experience", ""), "Current Title & Company": candidate_data.get("Current Title & Company", "") } # Add to selected candidates if score is high enough if response["fit_score"] >= 8.8: selected_candidates.append(response_dict) st.markdown(response_dict) else: st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}") # Clear progress indicators candidates_progress.empty() candidate_status.empty() # Show results if selected_candidates: st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!") else: st.info("No candidates met the minimum fit score threshold for this job.") return selected_candidates except Exception as e: st.error(f"Error processing job: {e}") return [] def main(): st.title("👨‍💻 Candidate Matching App") # Initialize session state if 'processed_jobs' not in st.session_state: st.session_state.processed_jobs = {} st.write(""" This app matches job listings with candidate profiles based on tech stack and other criteria. Select a job to find matching candidates. """) # API Key input with st.sidebar: st.header("API Configuration") api_key = st.text_input("Enter OpenAI API Key", type="password") if api_key: os.environ["OPENAI_API_KEY"] = api_key st.success("API Key set!") else: st.warning("Please enter OpenAI API Key to use LLM features") # Show API key warning if not set secret_content = os.getenv("GCP_SERVICE_ACCOUNT") # secret_content = secret_content.replace("\n", "\\n") secret_content = json.loads(secret_content) SCOPES = ['https://www.googleapis.com/auth/spreadsheets'] creds = service_account.Credentials.from_service_account_info(secret_content, scopes=SCOPES) gc = gspread.authorize(creds) job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4') if not api_key: st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.") if api_key: try: # Load data from Google Sheets job_worksheet = job_sheet.worksheet('paraform_jobs_formatted') job_data = job_worksheet.get_all_values() candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated') candidate_data = candidate_worksheet.get_all_values() # Convert to DataFrames jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0]) candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0]) candidates_df = candidates_df.fillna("Unknown") # Display data preview with st.expander("Preview uploaded data"): st.subheader("Jobs Data Preview") st.dataframe(jobs_df.head(3)) st.subheader("Candidates Data Preview") st.dataframe(candidates_df.head(3)) # Map column names if needed column_mapping = { "Full Name": "Full Name", "LinkedIn URL": "LinkedIn URL", "Current Title & Company": "Current Title & Company", "Years of Experience": "Years of Experience", "Degree & University": "Degree & University", "Key Tech Stack": "Key Tech Stack", "Key Highlights": "Key Highlights", "Location (from most recent experience)": "Location (from most recent experience)" } # Rename columns if they don't match expected candidates_df = candidates_df.rename(columns={ col: mapping for col, mapping in column_mapping.items() if col in candidates_df.columns and col != mapping }) # Now, instead of processing all jobs upfront, we'll display job selection # and only process the selected job when the user chooses it display_job_selection(jobs_df, candidates_df) except Exception as e: st.error(f"Error processing files: {e}") st.divider() def display_job_selection(jobs_df, candidates_df): # Store the LLM chain as a session state to avoid recreating it if 'llm_chain' not in st.session_state: st.session_state.llm_chain = None st.subheader("Select a job to view potential matches") # Create job options - but don't compute matches yet job_options = [] for i, row in jobs_df.iterrows(): job_options.append(f"{row['Role']} at {row['Company']}") if job_options: selected_job_index = st.selectbox("Jobs:", range(len(job_options)), format_func=lambda x: job_options[x]) # Display job details job_row = jobs_df.iloc[selected_job_index] # Parse tech stack for display job_row_stack = parse_tech_stack(job_row["Tech Stack"]) col1, col2 = st.columns([2, 1]) with col1: st.subheader(f"Job Details: {job_row['Role']}") job_details = { "Company": job_row["Company"], "Role": job_row["Role"], "Description": job_row.get("One liner", "N/A"), "Locations": job_row.get("Locations", "N/A"), "Industry": job_row.get("Industry", "N/A"), "Tech Stack": display_tech_stack(job_row_stack) } for key, value in job_details.items(): st.markdown(f"**{key}:** {value}") # Create a key for this job in session state job_key = f"job_{selected_job_index}_processed" if job_key not in st.session_state: st.session_state[job_key] = False # Add a process button for this job if not st.session_state[job_key]: if st.button(f"Find Matching Candidates for this Job"): if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]: st.error("Please enter your OpenAI API key in the sidebar before processing") else: # Process candidates for this job (only when requested) selected_candidates = process_candidates_for_job( job_row, candidates_df, st.session_state.llm_chain ) # Store the results and set as processed if 'Selected_Candidates' not in st.session_state: st.session_state.Selected_Candidates = {} st.session_state.Selected_Candidates[selected_job_index] = selected_candidates st.session_state[job_key] = True # Store the LLM chain for reuse if st.session_state.llm_chain is None: st.session_state.llm_chain = setup_llm() # Force refresh st.rerun() # Display selected candidates if already processed if st.session_state[job_key] and 'Selected_Candidates' in st.session_state: selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, []) # Display selected candidates st.subheader("Selected Candidates") if len(selected_candidates) > 0: for i, candidate in enumerate(selected_candidates): with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"): col1, col2 = st.columns([3, 1]) with col1: st.markdown(f"**Summary:** {candidate['summary']}") st.markdown(f"**Current:** {candidate['Current Title & Company']}") st.markdown(f"**Education:** {candidate['Educational Background']}") st.markdown(f"**Experience:** {candidate['Years of Experience']}") st.markdown(f"**Location:** {candidate['Location']}") st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**") with col2: st.markdown(f"**Fit Score:** {candidate['Fit Score']}") st.markdown("**Justification:**") st.info(candidate['justification']) else: st.info("No candidates met the minimum score threshold (8.8) for this job.") # We don't show tech-matched candidates here since they are generated # during the LLM matching process now # Add a reset button to start over if st.button("Reset and Process Again"): st.session_state[job_key] = False st.rerun() if __name__ == "__main__": main()