Spaces:

ak0601
/

lightweight-job

Running

App Files Files Community

ak0601 commited on May 16

Commit

e465a59

verified ·

1 Parent(s): 0cf6316

Update src/app_job_copy_1.py

Browse files

Files changed (1) hide show

src/app_job_copy_1.py +1035 -415

src/app_job_copy_1.py CHANGED Viewed

@@ -1,15 +1,660 @@
 import streamlit as st
 import pandas as pd
 import json
 import os
 from pydantic import BaseModel, Field
-from typing import List, Set, Dict, Any, Optional
-import time
 from langchain_openai import ChatOpenAI
-from langchain_core.messages import HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import PromptTemplate
 import gspread
 import tempfile
 from google.oauth2 import service_account
@@ -22,7 +667,6 @@ st.set_page_config(
 )
 os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
 os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
 # Define pydantic model for structured output
 class Shortlist(BaseModel):
     fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
@@ -34,25 +678,19 @@ class Shortlist(BaseModel):
 # Function to calculate tokens
 def calculate_tokens(text, model="gpt-4o-mini"):
-    """Calculate the number of tokens in a given text for a specific model"""
     try:
-        # Get the encoding for the model
         if "gpt-4" in model:
             encoding = tiktoken.encoding_for_model("gpt-4o-mini")
         elif "gpt-3.5" in model:
             encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
         else:
-            encoding = tiktoken.get_encoding("cl100k_base")  # Default for newer models
-        # Encode the text and return the token count
         return len(encoding.encode(text))
     except Exception as e:
-        # If there's an error, make a rough estimate (1 token ≈ 4 chars)
         return len(text) // 4
 # Function to display token usage
 def display_token_usage():
-    """Display token usage statistics"""
     if 'total_input_tokens' not in st.session_state:
         st.session_state.total_input_tokens = 0
     if 'total_output_tokens' not in st.session_state:
@@ -62,46 +700,35 @@ def display_token_usage():
     total_output = st.session_state.total_output_tokens
     total_tokens = total_input + total_output
-    # Estimate cost based on model
-    if st.session_state.model_name == "gpt-4o-mini":
-        input_cost_per_1k = 0.0003  # $0.0003 per 1K input tokens
-        output_cost_per_1k = 0.0006  # $$0.0006 per 1K output tokens
-    elif "gpt-4" in st.session_state.model_name:
-        input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
-        output_cost_per_1k = 0.60  # $0.60 per 1K output tokens
     else:  # Assume gpt-3.5-turbo pricing
-        input_cost_per_1k = 0.0015  # $0.0015 per 1K input tokens
-        output_cost_per_1k = 0.015   # $0.002 per 1K output tokens
     estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
-    st.subheader("📊 Token Usage Statistics")
     col1, col2, col3 = st.columns(3)
-    with col1:
-        st.metric("Input Tokens", f"{total_input:,}")
-    with col2:
-        st.metric("Output Tokens", f"{total_output:,}")
-    with col3:
-        st.metric("Total Tokens", f"{total_tokens:,}")
     st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
     return total_tokens
 # Function to parse and normalize tech stacks
 def parse_tech_stack(stack):
-    if pd.isna(stack) or stack == "" or stack is None:
-        return set()
-    if isinstance(stack, set):
-        return stack
     try:
-        # Handle potential string representation of sets
         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
-            # This could be a string representation of a set
             items = stack.strip("{}").split(",")
             return set(item.strip().strip("'\"") for item in items if item.strip())
         return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
@@ -110,29 +737,24 @@ def parse_tech_stack(stack):
         return set()
 def display_tech_stack(stack_set):
-    if isinstance(stack_set, set):
-        return ", ".join(sorted(stack_set))
-    return str(stack_set)
 def get_matching_candidates(job_stack, candidates_df):
-    """Find candidates with matching tech stack for a specific job"""
     matched = []
     job_stack_set = parse_tech_stack(job_stack)
     for _, candidate in candidates_df.iterrows():
         candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
         common = job_stack_set & candidate_stack
-        if len(common) >= 2:
             matched.append({
-                "Name": candidate["Full Name"],
-                "URL": candidate["LinkedIn URL"],
                 "Degree & Education": candidate["Degree & University"],
                 "Years of Experience": candidate["Years of Experience"],
                 "Current Title & Company": candidate['Current Title & Company'],
                 "Key Highlights": candidate["Key Highlights"],
                 "Location": candidate["Location (from most recent experience)"],
-                "Experience": str(candidate["Experience"]),
-                "Tech Stack": candidate_stack
             })
     return matched
@@ -160,25 +782,21 @@ def setup_llm():
     # Create system prompt
     system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
     the profile is according to job.
 Try to ensure following points while estimating the candidate's fit score:
 For education:
 Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
 Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
 Tier3 - Unknown or unranked institutions - Lower points or reject
 Startup Experience Requirement:
 Candidates must have worked  as a direct employee at a VC-backed startup (Seed to series C/D)
 preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
-Apart from this the candidate must reside near or on the job location. If it is not immediately give a fit score below 5.
     The fit score signifies based on following metrics:
     1–5 - Poor Fit - Auto-reject
     6–7 - Weak Fit - Auto-reject
     8.0–8.7 - Moderate Fit - Auto-reject
     8.8–10 - STRONG Fit - Include in results
     Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
     """
@@ -198,7 +816,6 @@ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **
     Tech Stack: {Tech_Stack}
     Industry: {Industry}
     Candidate Details:
     Full Name: {Full_Name}
     LinkedIn URL: {LinkedIn_URL}
@@ -209,8 +826,6 @@ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **
     Key Highlights: {Key_Highlights}
     Location (from most recent experience): {cand_Location}
     Past_Experience: {Experience}
     Answer in the structured manner as per the schema.
     If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
     The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
@@ -223,420 +838,425 @@ Avoid rounding to whole or one-decimal numbers. Every candidate should have a **
     return cat_class
 def call_llm(candidate_data, job_data, llm_chain):
-    """Call the actual LLM to evaluate the candidate"""
     try:
-        # Convert tech stacks to strings for the LLM payload
-        job_tech_stack = job_data.get("Tech_Stack", set())
-        candidate_tech_stack = candidate_data.get("Tech Stack", set())
-        if isinstance(job_tech_stack, set):
-            job_tech_stack = ", ".join(sorted(job_tech_stack))
-        if isinstance(candidate_tech_stack, set):
-            candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
-        # Prepare payload for LLM
         payload = {
-            "Company": job_data.get("Company", ""),
-            "Role": job_data.get("Role", ""),
-            "desc": job_data.get("desc", ""),
-            "Locations": job_data.get("Locations", ""),
-            "Tech_Stack": job_tech_stack,
-            "Industry": job_data.get("Industry", ""),
-            "Full_Name": candidate_data.get("Name", ""),
-            "LinkedIn_URL": candidate_data.get("URL", ""),
             "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
             "Years_of_Experience": candidate_data.get("Years of Experience", ""),
             "Degree_University": candidate_data.get("Degree & Education", ""),
-            "Key_Tech_Stack": candidate_tech_stack,
-            "Key_Highlights": candidate_data.get("Key Highlights", ""),
-            "cand_Location": candidate_data.get("Location", ""),
-            "Experience": candidate_data.get("Experience", "")
         }
-        # Convert payload to a string for token calculation
         payload_str = json.dumps(payload)
-        # Calculate input tokens
         input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
-        # Call LLM
         response = llm_chain.invoke(payload)
-        print(candidate_data.get("Experience", ""))
-        # Convert response to string for token calculation
-        response_str = f"""
-        candidate_name: {response.candidate_name}
-        candidate_url: {response.candidate_url}
-        candidate_summary: {response.candidate_summary}
-        candidate_location: {response.candidate_location}
-        fit_score: {float(f"{response.fit_score:.3f}")}
-        justification: {response.justification}
-        """
-        # Calculate output tokens
         output_tokens = calculate_tokens(response_str, st.session_state.model_name)
-        # Update token counts in session state
-        if 'total_input_tokens' not in st.session_state:
-            st.session_state.total_input_tokens = 0
-        if 'total_output_tokens' not in st.session_state:
-            st.session_state.total_output_tokens = 0
         st.session_state.total_input_tokens += input_tokens
         st.session_state.total_output_tokens += output_tokens
-        # Return response in expected format
         return {
-            "candidate_name": response.candidate_name,
-            "candidate_url": response.candidate_url,
-            "candidate_summary": response.candidate_summary,
-            "candidate_location": response.candidate_location,
-            "fit_score": response.fit_score,
-            "justification": response.justification
         }
     except Exception as e:
-        st.error(f"Error calling LLM: {e}")
-        # Fallback to a default response
         return {
-            "candidate_name": candidate_data.get("Name", "Unknown"),
-            "candidate_url": candidate_data.get("URL", ""),
-            "candidate_summary": "Error processing candidate profile",
-            "candidate_location": candidate_data.get("Location", "Unknown"),
-            "fit_score": 0.0,
-            "justification": f"Error in LLM processing: {str(e)}"
         }
 def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
-    """Process candidates for a specific job using the LLM"""
-    # Reset token counters for this job
-    st.session_state.total_input_tokens = 0
     st.session_state.total_output_tokens = 0
     if llm_chain is None:
-        with st.spinner("Setting up LLM..."):
-            llm_chain = setup_llm()
     selected_candidates = []
-    try:
-        # Get job-specific data
-        job_data = {
-            "Company": job_row["Company"],
-            "Role": job_row["Role"],
-            "desc": job_row.get("One liner", ""),
-            "Locations": job_row.get("Locations", ""),
-            "Tech_Stack": job_row["Tech Stack"],
-            "Industry": job_row.get("Industry", "")
-        }
-        # Find matching candidates for this job
-        with st.spinner("Finding matching candidates based on tech stack..."):
-            matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
-        if not matching_candidates:
-            st.warning("No candidates with matching tech stack found for this job.")
-            return []
-        st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
-        # Create progress elements
-        candidates_progress = st.progress(0)
-        candidate_status = st.empty()
-        # Process each candidate
-        for i, candidate_data in enumerate(matching_candidates):
-            # Update progress
-            candidates_progress.progress((i + 1) / len(matching_candidates))
-            candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
-            # Process the candidate with the LLM
-            response = call_llm(candidate_data, job_data, llm_chain)
-            response_dict = {
-                "Name": response["candidate_name"],
-                "LinkedIn": response["candidate_url"],
-                "summary": response["candidate_summary"],
-                "Location": response["candidate_location"],
-                "Fit Score": float(f"{response['fit_score']:.3f}"),
-                "justification": response["justification"],
-                # Add back original candidate data for context
-                "Educational Background": candidate_data.get("Degree & Education", ""),
-                "Years of Experience": candidate_data.get("Years of Experience", ""),
-                "Current Title & Company": candidate_data.get("Current Title & Company", "")
-            }
-            # Add to selected candidates if score is high enough
-            if response["fit_score"] >= 8.800:
-                selected_candidates.append(response_dict)
-                st.markdown(response_dict)
-            else:
-                st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
-        # Clear progress indicators
-        candidates_progress.empty()
-        candidate_status.empty()
-        # Show results
         if selected_candidates:
-            st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
         else:
-            st.info("No candidates met the minimum fit score threshold for this job.")
-        # Token usage is now displayed in display_job_selection when showing results
-        return selected_candidates
-    except Exception as e:
-        st.error(f"Error processing job: {e}")
-        return []
 def main():
     st.title("👨‍💻 Candidate Matching App")
-    # Initialize session state
-    if 'processed_jobs' not in st.session_state:
-        st.session_state.processed_jobs = {}
-    st.write("""
-    This app matches job listings with candidate profiles based on tech stack and other criteria.
-    Select a job to find matching candidates.
-    """)
-    # API Key input
     with st.sidebar:
         st.header("API Configuration")
-        api_key = st.text_input("Enter OpenAI API Key", type="password")
         if api_key:
             os.environ["OPENAI_API_KEY"] = api_key
-            st.success("API Key set!")
         else:
             st.warning("Please enter OpenAI API Key to use LLM features")
-    # Show API key warning if not set
-    SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json'
-    SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
-    creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
-    gc = gspread.authorize(creds)
-    job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
-    candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
-    if not api_key:
         st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
-    if api_key:
-        try:
-            # Load data from Google Sheets
-            job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
-            job_data = job_worksheet.get_all_values()
-            candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
-            candidate_data = candidate_worksheet.get_all_values()
-            # Convert to DataFrames
-            jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
-            jobs_df = jobs_df.drop(["Link"],axis = 1)
-            candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
-            candidates_df = candidates_df.fillna("Unknown")
-            # Display data preview
-            with st.expander("Preview uploaded data"):
-                st.subheader("Jobs Data Preview")
-                st.dataframe(jobs_df.head(3))
-                st.subheader("Candidates Data Preview")
-                st.dataframe(candidates_df.head(3))
-            # Map column names if needed
-            column_mapping = {
-                "Full Name": "Full Name",
-                "LinkedIn URL": "LinkedIn URL",
-                "Current Title & Company": "Current Title & Company",
-                "Years of Experience": "Years of Experience",
-                "Degree & University": "Degree & University",
-                "Key Tech Stack": "Key Tech Stack",
-                "Key Highlights": "Key Highlights",
-                "Location (from most recent experience)": "Location (from most recent experience)"
-            }
-            # Rename columns if they don't match expected
-            candidates_df = candidates_df.rename(columns={
-                col: mapping for col, mapping in column_mapping.items()
-                if col in candidates_df.columns and col != mapping
-            })
-            # Now, instead of processing all jobs upfront, we'll display job selection
-            # and only process the selected job when the user chooses it
-            display_job_selection(jobs_df, candidates_df, job_sheet)
-        except Exception as e:
-            st.error(f"Error processing files: {e}")
     st.divider()
-def display_job_selection(jobs_df, candidates_df, sh):
-    # Initialize session state variables if they don't exist
-    if 'Selected_Candidates' not in st.session_state:
-        st.session_state.Selected_Candidates = {}
-    if 'llm_chain' not in st.session_state:
-        st.session_state.llm_chain = setup_llm()
-    st.subheader("Select a job to view potential matches")
-    # Create job options
-    job_options = []
-    for i, row in jobs_df.iterrows():
-        job_options.append(f"{row['Role']} at {row['Company']}")
-    if job_options:
-        selected_job_index = st.selectbox("Jobs:",
-                                      range(len(job_options)),
-                                      format_func=lambda x: job_options[x])
-        # Display job details
-        job_row = jobs_df.iloc[selected_job_index]
-        # Parse tech stack for display
-        job_row_stack = parse_tech_stack(job_row["Tech Stack"])
-        col1, col2 = st.columns([2, 1])
-        with col1:
-            st.subheader(f"Job Details: {job_row['Role']}")
-            job_details = {
-                "Company": job_row["Company"],
-                "Role": job_row["Role"],
-                "Description": job_row.get("One liner", "N/A"),
-                "Locations": job_row.get("Locations", "N/A"),
-                "Industry": job_row.get("Industry", "N/A"),
-                "Tech Stack": display_tech_stack(job_row_stack)
-            }
-            for key, value in job_details.items():
-                st.markdown(f"**{key}:** {value}")
-        # Create a key for this job in session state
-        job_key = f"job_{selected_job_index}_processed"
-        if job_key not in st.session_state:
-            st.session_state[job_key] = False
-        # Create worksheet name
-        sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
-        # Check if worksheet exists and has data
-        worksheet_exists = False
-        existing_candidates = []
-        try:
-            cand_worksheet = sh.worksheet(sheet_name)
-            worksheet_exists = True
-            # Get existing data if worksheet exists
-            existing_data = cand_worksheet.get_all_values()
-            if len(existing_data) > 1:  # Has data beyond header
-                existing_candidates = existing_data[1:]
-                st.session_state[job_key] = True
-                # Don't show the info message about existing data
-        except gspread.exceptions.WorksheetNotFound:
-            pass
-        # Add a process button for this job
-        if not st.session_state[job_key]:
-            if st.button(f"Find Matching Candidates for this Job"):
-                if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
-                    st.error("Please enter your OpenAI API key in the sidebar before processing")
-                else:
-                    # Process candidates for this job (only when requested)
-                    with st.spinner("Processing candidates..."):
-                        selected_candidates = process_candidates_for_job(
-                            job_row,
-                            candidates_df,
-                            st.session_state.llm_chain
-                        )
-                        selected_candidates.sort(key=lambda x: x["Fit Score"], reverse=True)
-                        # Only create worksheet if we have candidates
-                        if selected_candidates:
-                            try:
-                                if not worksheet_exists:
-                                    cand_worksheet = sh.add_worksheet(title=sheet_name, rows=10000, cols=50)
-                                # Prepare data for Google Sheet
-                                headers = list(selected_candidates[0].keys())
-                                rows = [headers] + [list(candidate.values()) for candidate in selected_candidates]
-                                # Clear existing data if any
-                                cand_worksheet.clear()
-                                # Write data to the worksheet
-                                cand_worksheet.update('A1', rows)
-                                st.success(f"Successfully processed {len(selected_candidates)} candidates")
-                            except Exception as e:
-                                st.error(f"Error writing to Google Sheet: {e}")
-                        # Store the results and set as processed
-                        st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
-                        st.session_state[job_key] = True
-                        # Force refresh
-                        st.rerun()
-        # Display selected candidates if already processed
-        if st.session_state[job_key]:
-            if existing_candidates:
-                # Convert existing worksheet data to our format
-                headers = existing_data[0]
-                selected_candidates = []
-                for row in existing_data[1:]:
-                    candidate = dict(zip(headers, row))
-                    selected_candidates.append(candidate)
-                st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
-            elif 'Selected_Candidates' in st.session_state:
-                selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
             else:
-                selected_candidates = []
-            # Display selected candidates
-            st.subheader("Selected Candidates")
-            # Display token usage statistics (only if we processed with LLM)
-            if not existing_candidates and 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
-                display_token_usage()
-            if len(selected_candidates) > 0:
-                for i, candidate in enumerate(selected_candidates):
-                    with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate.get('Fit Score', 'N/A')})"):
-                        col1, col2 = st.columns([3, 1])
-                        with col1:
-                            st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
-                            st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
-                            st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
-                            st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
-                            st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
-                            if 'LinkedIn' in candidate:
-                                st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
-                        with col2:
-                            if 'Fit Score' in candidate:
-                                st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
-                        if 'justification' in candidate:
-                            st.markdown("**Justification:**")
-                            st.info(candidate['justification'])
-            else:
-                st.info("No candidates found for this job.")
-            # Add a reset button to start over
-            if st.button("Reset and Process Again"):
-                # Reset this job's processing state
-                st.session_state[job_key] = False
-                if 'Selected_Candidates' in st.session_state and selected_job_index in st.session_state.Selected_Candidates:
-                    del st.session_state.Selected_Candidates[selected_job_index]
-                st.rerun()
 if __name__ == "__main__":
-    main()

+# import streamlit as st
+# import pandas as pd
+# import json
+# import os
+# from pydantic import BaseModel, Field
+# from typing import List, Set, Dict, Any, Optional
+# import time
+# from langchain_openai import ChatOpenAI
+# from langchain_core.messages import HumanMessage
+# from langchain_core.prompts import ChatPromptTemplate
+# from langchain_core.output_parsers import StrOutputParser
+# from langchain_core.prompts import PromptTemplate
+# import gspread
+# import tempfile
+# from google.oauth2 import service_account
+# import tiktoken
+# st.set_page_config(
+#     page_title="Candidate Matching App",
+#     page_icon="👨‍💻🎯",
+#     layout="wide"
+# )
+# os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
+# os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
+# # Define pydantic model for structured output
+# class Shortlist(BaseModel):
+#     fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
+#     candidate_name: str = Field(description="The name of the candidate.")
+#     candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
+#     candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
+#     candidate_location: str = Field(description="The location of the candidate.")
+#     justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
+# # Function to calculate tokens
+# def calculate_tokens(text, model="gpt-4o-mini"):
+#     """Calculate the number of tokens in a given text for a specific model"""
+#     try:
+#         # Get the encoding for the model
+#         if "gpt-4" in model:
+#             encoding = tiktoken.encoding_for_model("gpt-4o-mini")
+#         elif "gpt-3.5" in model:
+#             encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+#         else:
+#             encoding = tiktoken.get_encoding("cl100k_base")  # Default for newer models
+#         # Encode the text and return the token count
+#         return len(encoding.encode(text))
+#     except Exception as e:
+#         # If there's an error, make a rough estimate (1 token ≈ 4 chars)
+#         return len(text) // 4
+# # Function to display token usage
+# def display_token_usage():
+#     """Display token usage statistics"""
+#     if 'total_input_tokens' not in st.session_state:
+#         st.session_state.total_input_tokens = 0
+#     if 'total_output_tokens' not in st.session_state:
+#         st.session_state.total_output_tokens = 0
+#     total_input = st.session_state.total_input_tokens
+#     total_output = st.session_state.total_output_tokens
+#     total_tokens = total_input + total_output
+#     # Estimate cost based on model
+#     if st.session_state.model_name == "gpt-4o-mini":
+#         input_cost_per_1k = 0.0003  # $0.0003 per 1K input tokens
+#         output_cost_per_1k = 0.0006  # $$0.0006 per 1K output tokens
+#     elif "gpt-4" in st.session_state.model_name:
+#         input_cost_per_1k = 0.005 # $0.30 per 1K input tokens
+#         output_cost_per_1k = 0.60  # $0.60 per 1K output tokens
+#     else:  # Assume gpt-3.5-turbo pricing
+#         input_cost_per_1k = 0.0015  # $0.0015 per 1K input tokens
+#         output_cost_per_1k = 0.015   # $0.002 per 1K output tokens
+#     estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
+#     st.subheader("📊 Token Usage Statistics")
+#     col1, col2, col3 = st.columns(3)
+#     with col1:
+#         st.metric("Input Tokens", f"{total_input:,}")
+#     with col2:
+#         st.metric("Output Tokens", f"{total_output:,}")
+#     with col3:
+#         st.metric("Total Tokens", f"{total_tokens:,}")
+#     st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
+#     return total_tokens
+# # Function to parse and normalize tech stacks
+# def parse_tech_stack(stack):
+#     if pd.isna(stack) or stack == "" or stack is None:
+#         return set()
+#     if isinstance(stack, set):
+#         return stack
+#     try:
+#         # Handle potential string representation of sets
+#         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
+#             # This could be a string representation of a set
+#             items = stack.strip("{}").split(",")
+#             return set(item.strip().strip("'\"") for item in items if item.strip())
+#         return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
+#     except Exception as e:
+#         st.error(f"Error parsing tech stack: {e}")
+#         return set()
+# def display_tech_stack(stack_set):
+#     if isinstance(stack_set, set):
+#         return ", ".join(sorted(stack_set))
+#     return str(stack_set)
+# def get_matching_candidates(job_stack, candidates_df):
+#     """Find candidates with matching tech stack for a specific job"""
+#     matched = []
+#     job_stack_set = parse_tech_stack(job_stack)
+#     for _, candidate in candidates_df.iterrows():
+#         candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
+#         common = job_stack_set & candidate_stack
+#         if len(common) >= 2:
+#             matched.append({
+#                 "Name": candidate["Full Name"],
+#                 "URL": candidate["LinkedIn URL"],
+#                 "Degree & Education": candidate["Degree & University"],
+#                 "Years of Experience": candidate["Years of Experience"],
+#                 "Current Title & Company": candidate['Current Title & Company'],
+#                 "Key Highlights": candidate["Key Highlights"],
+#                 "Location": candidate["Location (from most recent experience)"],
+#                 "Experience": str(candidate["Experience"]),
+#                 "Tech Stack": candidate_stack
+#             })
+#     return matched
+# def setup_llm():
+#     """Set up the LangChain LLM with structured output"""
+#     # Define the model to use
+#     model_name = "gpt-4o-mini"
+#     # Store model name in session state for token calculation
+#     if 'model_name' not in st.session_state:
+#         st.session_state.model_name = model_name
+#     # Create LLM instance
+#     llm = ChatOpenAI(
+#         model=model_name,
+#         temperature=0.3,
+#         max_tokens=None,
+#         timeout=None,
+#         max_retries=2,
+#     )
+#     # Create structured output
+#     sum_llm = llm.with_structured_output(Shortlist)
+#     # Create system prompt
+#     system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
+#     the profile is according to job.
+# Try to ensure following points while estimating the candidate's fit score:
+# For education:
+# Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
+# Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
+# Tier3 - Unknown or unranked institutions - Lower points or reject
+# Startup Experience Requirement:
+# Candidates must have worked  as a direct employee at a VC-backed startup (Seed to series C/D)
+# preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
+# Apart from this the candidate must reside near or on the job location. If it is not immediately give a fit score below 5.
+#     The fit score signifies based on following metrics:
+#     1–5 - Poor Fit - Auto-reject
+#     6–7 - Weak Fit - Auto-reject
+#     8.0–8.7 - Moderate Fit - Auto-reject
+#     8.8–10 - STRONG Fit - Include in results
+#     Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
+#     """
+#     # Create query prompt
+#     query_prompt = ChatPromptTemplate.from_messages([
+#         ("system", system),
+#         ("human", """
+#     You are an expert Recruitor. Your task is to determine if the candidate matches the given job.
+# Provide the score as a `float` rounded to exactly **three decimal places** (e.g., 8.943, 9.211, etc.).
+# Avoid rounding to whole or one-decimal numbers. Every candidate should have a **unique** fit score.
+#           For this you will be provided with the follwing inputs of job and candidates:
+#     Job Details
+#     Company: {Company}
+#     Role: {Role}
+#     About Company: {desc}
+#     Locations: {Locations}
+#     Tech Stack: {Tech_Stack}
+#     Industry: {Industry}
+#     Candidate Details:
+#     Full Name: {Full_Name}
+#     LinkedIn URL: {LinkedIn_URL}
+#     Current Title & Company: {Current_Title_Company}
+#     Years of Experience: {Years_of_Experience}
+#     Degree & University: {Degree_University}
+#     Key Tech Stack: {Key_Tech_Stack}
+#     Key Highlights: {Key_Highlights}
+#     Location (from most recent experience): {cand_Location}
+#     Past_Experience: {Experience}
+#     Answer in the structured manner as per the schema.
+#     If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
+#     The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
+#     """),
+#     ])
+#     # Chain the prompt and LLM
+#     cat_class = query_prompt | sum_llm
+#     return cat_class
+# def call_llm(candidate_data, job_data, llm_chain):
+#     """Call the actual LLM to evaluate the candidate"""
+#     try:
+#         # Convert tech stacks to strings for the LLM payload
+#         job_tech_stack = job_data.get("Tech_Stack", set())
+#         candidate_tech_stack = candidate_data.get("Tech Stack", set())
+#         if isinstance(job_tech_stack, set):
+#             job_tech_stack = ", ".join(sorted(job_tech_stack))
+#         if isinstance(candidate_tech_stack, set):
+#             candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
+#         # Prepare payload for LLM
+#         payload = {
+#             "Company": job_data.get("Company", ""),
+#             "Role": job_data.get("Role", ""),
+#             "desc": job_data.get("desc", ""),
+#             "Locations": job_data.get("Locations", ""),
+#             "Tech_Stack": job_tech_stack,
+#             "Industry": job_data.get("Industry", ""),
+#             "Full_Name": candidate_data.get("Name", ""),
+#             "LinkedIn_URL": candidate_data.get("URL", ""),
+#             "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
+#             "Years_of_Experience": candidate_data.get("Years of Experience", ""),
+#             "Degree_University": candidate_data.get("Degree & Education", ""),
+#             "Key_Tech_Stack": candidate_tech_stack,
+#             "Key_Highlights": candidate_data.get("Key Highlights", ""),
+#             "cand_Location": candidate_data.get("Location", ""),
+#             "Experience": candidate_data.get("Experience", "")
+#         }
+#         # Convert payload to a string for token calculation
+#         payload_str = json.dumps(payload)
+#         # Calculate input tokens
+#         input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
+#         # Call LLM
+#         response = llm_chain.invoke(payload)
+#         print(candidate_data.get("Experience", ""))
+#         # Convert response to string for token calculation
+#         response_str = f"""
+#         candidate_name: {response.candidate_name}
+#         candidate_url: {response.candidate_url}
+#         candidate_summary: {response.candidate_summary}
+#         candidate_location: {response.candidate_location}
+#         fit_score: {float(f"{response.fit_score:.3f}")}
+#         justification: {response.justification}
+#         """
+#         # Calculate output tokens
+#         output_tokens = calculate_tokens(response_str, st.session_state.model_name)
+#         # Update token counts in session state
+#         if 'total_input_tokens' not in st.session_state:
+#             st.session_state.total_input_tokens = 0
+#         if 'total_output_tokens' not in st.session_state:
+#             st.session_state.total_output_tokens = 0
+#         st.session_state.total_input_tokens += input_tokens
+#         st.session_state.total_output_tokens += output_tokens
+#         # Return response in expected format
+#         return {
+#             "candidate_name": response.candidate_name,
+#             "candidate_url": response.candidate_url,
+#             "candidate_summary": response.candidate_summary,
+#             "candidate_location": response.candidate_location,
+#             "fit_score": response.fit_score,
+#             "justification": response.justification
+#         }
+#     except Exception as e:
+#         st.error(f"Error calling LLM: {e}")
+#         # Fallback to a default response
+#         return {
+#             "candidate_name": candidate_data.get("Name", "Unknown"),
+#             "candidate_url": candidate_data.get("URL", ""),
+#             "candidate_summary": "Error processing candidate profile",
+#             "candidate_location": candidate_data.get("Location", "Unknown"),
+#             "fit_score": 0.0,
+#             "justification": f"Error in LLM processing: {str(e)}"
+#         }
+# def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
+#     """Process candidates for a specific job using the LLM"""
+#     # Reset token counters for this job
+#     st.session_state.total_input_tokens = 0
+#     st.session_state.total_output_tokens = 0
+#     if llm_chain is None:
+#         with st.spinner("Setting up LLM..."):
+#             llm_chain = setup_llm()
+#     selected_candidates = []
+#     try:
+#         # Get job-specific data
+#         job_data = {
+#             "Company": job_row["Company"],
+#             "Role": job_row["Role"],
+#             "desc": job_row.get("One liner", ""),
+#             "Locations": job_row.get("Locations", ""),
+#             "Tech_Stack": job_row["Tech Stack"],
+#             "Industry": job_row.get("Industry", "")
+#         }
+#         # Find matching candidates for this job
+#         with st.spinner("Finding matching candidates based on tech stack..."):
+#             matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
+#         if not matching_candidates:
+#             st.warning("No candidates with matching tech stack found for this job.")
+#             return []
+#         st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
+#         # Create progress elements
+#         candidates_progress = st.progress(0)
+#         candidate_status = st.empty()
+#         # Process each candidate
+#         for i, candidate_data in enumerate(matching_candidates):
+#             # Update progress
+#             candidates_progress.progress((i + 1) / len(matching_candidates))
+#             candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
+#             # Process the candidate with the LLM
+#             response = call_llm(candidate_data, job_data, llm_chain)
+#             response_dict = {
+#                 "Name": response["candidate_name"],
+#                 "LinkedIn": response["candidate_url"],
+#                 "summary": response["candidate_summary"],
+#                 "Location": response["candidate_location"],
+#                 "Fit Score": float(f"{response['fit_score']:.3f}"),
+#                 "justification": response["justification"],
+#                 # Add back original candidate data for context
+#                 "Educational Background": candidate_data.get("Degree & Education", ""),
+#                 "Years of Experience": candidate_data.get("Years of Experience", ""),
+#                 "Current Title & Company": candidate_data.get("Current Title & Company", "")
+#             }
+#             # Add to selected candidates if score is high enough
+#             if response["fit_score"] >= 8.800:
+#                 selected_candidates.append(response_dict)
+#                 st.markdown(response_dict)
+#             else:
+#                 st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
+#         # Clear progress indicators
+#         candidates_progress.empty()
+#         candidate_status.empty()
+#         # Show results
+#         if selected_candidates:
+#             st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
+#         else:
+#             st.info("No candidates met the minimum fit score threshold for this job.")
+#         # Token usage is now displayed in display_job_selection when showing results
+#         return selected_candidates
+#     except Exception as e:
+#         st.error(f"Error processing job: {e}")
+#         return []
+# def main():
+#     st.title("👨‍💻 Candidate Matching App")
+#     # Initialize session state
+#     if 'processed_jobs' not in st.session_state:
+#         st.session_state.processed_jobs = {}
+#     st.write("""
+#     This app matches job listings with candidate profiles based on tech stack and other criteria.
+#     Select a job to find matching candidates.
+#     """)
+#     # API Key input
+#     with st.sidebar:
+#         st.header("API Configuration")
+#         api_key = st.text_input("Enter OpenAI API Key", type="password")
+#         if api_key:
+#             os.environ["OPENAI_API_KEY"] = api_key
+#             st.success("API Key set!")
+#         else:
+#             st.warning("Please enter OpenAI API Key to use LLM features")
+#     # Show API key warning if not set
+#     SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json'
+#     SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
+#     creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
+#     gc = gspread.authorize(creds)
+#     job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
+#     candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
+#     if not api_key:
+#         st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
+#     if api_key:
+#         try:
+#             # Load data from Google Sheets
+#             job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
+#             job_data = job_worksheet.get_all_values()
+#             candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
+#             candidate_data = candidate_worksheet.get_all_values()
+#             # Convert to DataFrames
+#             jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
+#             jobs_df = jobs_df.drop(["Link"],axis = 1)
+#             candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
+#             candidates_df = candidates_df.fillna("Unknown")
+#             # Display data preview
+#             with st.expander("Preview uploaded data"):
+#                 st.subheader("Jobs Data Preview")
+#                 st.dataframe(jobs_df.head(3))
+#                 st.subheader("Candidates Data Preview")
+#                 st.dataframe(candidates_df.head(3))
+#             # Map column names if needed
+#             column_mapping = {
+#                 "Full Name": "Full Name",
+#                 "LinkedIn URL": "LinkedIn URL",
+#                 "Current Title & Company": "Current Title & Company",
+#                 "Years of Experience": "Years of Experience",
+#                 "Degree & University": "Degree & University",
+#                 "Key Tech Stack": "Key Tech Stack",
+#                 "Key Highlights": "Key Highlights",
+#                 "Location (from most recent experience)": "Location (from most recent experience)"
+#             }
+#             # Rename columns if they don't match expected
+#             candidates_df = candidates_df.rename(columns={
+#                 col: mapping for col, mapping in column_mapping.items()
+#                 if col in candidates_df.columns and col != mapping
+#             })
+#             # Now, instead of processing all jobs upfront, we'll display job selection
+#             # and only process the selected job when the user chooses it
+#             display_job_selection(jobs_df, candidates_df, job_sheet)
+#         except Exception as e:
+#             st.error(f"Error processing files: {e}")
+#     st.divider()
+# def display_job_selection(jobs_df, candidates_df, sh):
+#     # Initialize session state variables if they don't exist
+#     if 'Selected_Candidates' not in st.session_state:
+#         st.session_state.Selected_Candidates = {}
+#     if 'llm_chain' not in st.session_state:
+#         st.session_state.llm_chain = setup_llm()
+#     st.subheader("Select a job to view potential matches")
+#     # Create job options
+#     job_options = []
+#     for i, row in jobs_df.iterrows():
+#         job_options.append(f"{row['Role']} at {row['Company']}")
+#     if job_options:
+#         selected_job_index = st.selectbox("Jobs:",
+#                                       range(len(job_options)),
+#                                       format_func=lambda x: job_options[x])
+#         # Display job details
+#         job_row = jobs_df.iloc[selected_job_index]
+#         # Parse tech stack for display
+#         job_row_stack = parse_tech_stack(job_row["Tech Stack"])
+#         col1, col2 = st.columns([2, 1])
+#         with col1:
+#             st.subheader(f"Job Details: {job_row['Role']}")
+#             job_details = {
+#                 "Company": job_row["Company"],
+#                 "Role": job_row["Role"],
+#                 "Description": job_row.get("One liner", "N/A"),
+#                 "Locations": job_row.get("Locations", "N/A"),
+#                 "Industry": job_row.get("Industry", "N/A"),
+#                 "Tech Stack": display_tech_stack(job_row_stack)
+#             }
+#             for key, value in job_details.items():
+#                 st.markdown(f"**{key}:** {value}")
+#         # Create a key for this job in session state
+#         job_key = f"job_{selected_job_index}_processed"
+#         if job_key not in st.session_state:
+#             st.session_state[job_key] = False
+#         # Create worksheet name
+#         sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
+#         # Check if worksheet exists and has data
+#         worksheet_exists = False
+#         existing_candidates = []
+#         try:
+#             cand_worksheet = sh.worksheet(sheet_name)
+#             worksheet_exists = True
+#             # Get existing data if worksheet exists
+#             existing_data = cand_worksheet.get_all_values()
+#             if len(existing_data) > 1:  # Has data beyond header
+#                 existing_candidates = existing_data[1:]
+#                 st.session_state[job_key] = True
+#                 # Don't show the info message about existing data
+#         except gspread.exceptions.WorksheetNotFound:
+#             pass
+#         # Add a process button for this job
+#         if not st.session_state[job_key]:
+#             if st.button(f"Find Matching Candidates for this Job"):
+#                 if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
+#                     st.error("Please enter your OpenAI API key in the sidebar before processing")
+#                 else:
+#                     # Process candidates for this job (only when requested)
+#                     with st.spinner("Processing candidates..."):
+#                         selected_candidates = process_candidates_for_job(
+#                             job_row,
+#                             candidates_df,
+#                             st.session_state.llm_chain
+#                         )
+#                         selected_candidates.sort(key=lambda x: x["Fit Score"], reverse=True)
+#                         # Only create worksheet if we have candidates
+#                         if selected_candidates:
+#                             try:
+#                                 if not worksheet_exists:
+#                                     cand_worksheet = sh.add_worksheet(title=sheet_name, rows=10000, cols=50)
+#                                 # Prepare data for Google Sheet
+#                                 headers = list(selected_candidates[0].keys())
+#                                 rows = [headers] + [list(candidate.values()) for candidate in selected_candidates]
+#                                 # Clear existing data if any
+#                                 cand_worksheet.clear()
+#                                 # Write data to the worksheet
+#                                 cand_worksheet.update('A1', rows)
+#                                 st.success(f"Successfully processed {len(selected_candidates)} candidates")
+#                             except Exception as e:
+#                                 st.error(f"Error writing to Google Sheet: {e}")
+#                         # Store the results and set as processed
+#                         st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
+#                         st.session_state[job_key] = True
+#                         # Force refresh
+#                         st.rerun()
+#         # Display selected candidates if already processed
+#         if st.session_state[job_key]:
+#             if existing_candidates:
+#                 # Convert existing worksheet data to our format
+#                 headers = existing_data[0]
+#                 selected_candidates = []
+#                 for row in existing_data[1:]:
+#                     candidate = dict(zip(headers, row))
+#                     selected_candidates.append(candidate)
+#                 st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
+#             elif 'Selected_Candidates' in st.session_state:
+#                 selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
+#             else:
+#                 selected_candidates = []
+#             # Display selected candidates
+#             st.subheader("Selected Candidates")
+#             # Display token usage statistics (only if we processed with LLM)
+#             if not existing_candidates and 'total_input_tokens' in st.session_state and 'total_output_tokens' in st.session_state:
+#                 display_token_usage()
+#             if len(selected_candidates) > 0:
+#                 for i, candidate in enumerate(selected_candidates):
+#                     with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate.get('Fit Score', 'N/A')})"):
+#                         col1, col2 = st.columns([3, 1])
+#                         with col1:
+#                             st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
+#                             st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
+#                             st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
+#                             st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
+#                             st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
+#                             if 'LinkedIn' in candidate:
+#                                 st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
+#                         with col2:
+#                             if 'Fit Score' in candidate:
+#                                 st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
+#                         if 'justification' in candidate:
+#                             st.markdown("**Justification:**")
+#                             st.info(candidate['justification'])
+#             else:
+#                 st.info("No candidates found for this job.")
+#             # Add a reset button to start over
+#             if st.button("Reset and Process Again"):
+#                 # Reset this job's processing state
+#                 st.session_state[job_key] = False
+#                 if 'Selected_Candidates' in st.session_state and selected_job_index in st.session_state.Selected_Candidates:
+#                     del st.session_state.Selected_Candidates[selected_job_index]
+#                 st.rerun()
+# if __name__ == "__main__":
+#     main()
 import streamlit as st
 import pandas as pd
 import json
 import os
 from pydantic import BaseModel, Field
+from typing import List, Set, Dict, Any, Optional # Already have these, but commented for brevity if not all used
+import time # Added for potential small delays if needed
 from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage # Not directly used in provided snippet
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser # Not directly used in provided snippet
+from langchain_core.prompts import PromptTemplate # Not directly used in provided snippet
 import gspread
 import tempfile
 from google.oauth2 import service_account
 )
 os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
 os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
 # Define pydantic model for structured output
 class Shortlist(BaseModel):
     fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
 # Function to calculate tokens
 def calculate_tokens(text, model="gpt-4o-mini"):
     try:
         if "gpt-4" in model:
             encoding = tiktoken.encoding_for_model("gpt-4o-mini")
         elif "gpt-3.5" in model:
             encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
         else:
+            encoding = tiktoken.get_encoding("cl100k_base")
         return len(encoding.encode(text))
     except Exception as e:
         return len(text) // 4
 # Function to display token usage
 def display_token_usage():
     if 'total_input_tokens' not in st.session_state:
         st.session_state.total_input_tokens = 0
     if 'total_output_tokens' not in st.session_state:
     total_output = st.session_state.total_output_tokens
     total_tokens = total_input + total_output
+    model_to_check = st.session_state.get('model_name', "gpt-4o-mini") # Use a default if not set
+    if model_to_check == "gpt-4o-mini":
+        input_cost_per_1k = 0.00015 # Adjusted to example rates ($0.15 / 1M tokens)
+        output_cost_per_1k = 0.0006  # Adjusted to example rates ($0.60 / 1M tokens)
+    elif "gpt-4" in model_to_check: # Fallback for other gpt-4
+        input_cost_per_1k = 0.005
+        output_cost_per_1k = 0.015 # General gpt-4 pricing can vary
     else:  # Assume gpt-3.5-turbo pricing
+        input_cost_per_1k = 0.0005  # $0.0005 per 1K input tokens
+        output_cost_per_1k = 0.0015   # $0.0015 per 1K output tokens
     estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
+    st.subheader("📊 Token Usage Statistics (for last processed job)")
     col1, col2, col3 = st.columns(3)
+    with col1: st.metric("Input Tokens", f"{total_input:,}")
+    with col2: st.metric("Output Tokens", f"{total_output:,}")
+    with col3: st.metric("Total Tokens", f"{total_tokens:,}")
     st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
     return total_tokens
 # Function to parse and normalize tech stacks
 def parse_tech_stack(stack):
+    if pd.isna(stack) or stack == "" or stack is None: return set()
+    if isinstance(stack, set): return stack
     try:
         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
             items = stack.strip("{}").split(",")
             return set(item.strip().strip("'\"") for item in items if item.strip())
         return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
         return set()
 def display_tech_stack(stack_set):
+    return ", ".join(sorted(list(stack_set))) if isinstance(stack_set, set) else str(stack_set)
 def get_matching_candidates(job_stack, candidates_df):
     matched = []
     job_stack_set = parse_tech_stack(job_stack)
     for _, candidate in candidates_df.iterrows():
         candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
         common = job_stack_set & candidate_stack
+        if len(common) >= 2: # Original condition
             matched.append({
+                "Name": candidate["Full Name"], "URL": candidate["LinkedIn URL"],
                 "Degree & Education": candidate["Degree & University"],
                 "Years of Experience": candidate["Years of Experience"],
                 "Current Title & Company": candidate['Current Title & Company'],
                 "Key Highlights": candidate["Key Highlights"],
                 "Location": candidate["Location (from most recent experience)"],
+                "Experience": str(candidate["Experience"]), "Tech Stack": candidate_stack
             })
     return matched
     # Create system prompt
     system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
     the profile is according to job.
+    First of all check the location of the candidate, if the location is not in the range of the job location then reject the candidate directly without any further analysis.
+    for example if the job location is New York and the candidate is in San Francisco then reject the candidate. Similarly for other states as well.
 Try to ensure following points while estimating the candidate's fit score:
 For education:
 Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
 Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
 Tier3 - Unknown or unranked institutions - Lower points or reject
 Startup Experience Requirement:
 Candidates must have worked  as a direct employee at a VC-backed startup (Seed to series C/D)
 preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
     The fit score signifies based on following metrics:
     1–5 - Poor Fit - Auto-reject
     6–7 - Weak Fit - Auto-reject
     8.0–8.7 - Moderate Fit - Auto-reject
     8.8–10 - STRONG Fit - Include in results
     Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
     """
     Tech Stack: {Tech_Stack}
     Industry: {Industry}
     Candidate Details:
     Full Name: {Full_Name}
     LinkedIn URL: {LinkedIn_URL}
     Key Highlights: {Key_Highlights}
     Location (from most recent experience): {cand_Location}
     Past_Experience: {Experience}
     Answer in the structured manner as per the schema.
     If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
     The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
     return cat_class
 def call_llm(candidate_data, job_data, llm_chain):
     try:
+        job_tech_stack = ", ".join(sorted(list(job_data.get("Tech_Stack", set())))) if isinstance(job_data.get("Tech_Stack"), set) else job_data.get("Tech_Stack", "")
+        candidate_tech_stack = ", ".join(sorted(list(candidate_data.get("Tech Stack", set())))) if isinstance(candidate_data.get("Tech Stack"), set) else candidate_data.get("Tech Stack", "")
         payload = {
+            "Company": job_data.get("Company", ""), "Role": job_data.get("Role", ""),
+            "desc": job_data.get("desc", ""), "Locations": job_data.get("Locations", ""),
+            "Tech_Stack": job_tech_stack, "Industry": job_data.get("Industry", ""),
+            "Full_Name": candidate_data.get("Name", ""), "LinkedIn_URL": candidate_data.get("URL", ""),
             "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
             "Years_of_Experience": candidate_data.get("Years of Experience", ""),
             "Degree_University": candidate_data.get("Degree & Education", ""),
+            "Key_Tech_Stack": candidate_tech_stack, "Key_Highlights": candidate_data.get("Key Highlights", ""),
+            "cand_Location": candidate_data.get("Location", ""), "Experience": candidate_data.get("Experience", "")
         }
         payload_str = json.dumps(payload)
         input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
         response = llm_chain.invoke(payload)
+        # print(candidate_data.get("Experience", "")) # Kept for your debugging if needed
+        response_str = f"candidate_name: {response.candidate_name} ... fit_score: {float(f'{response.fit_score:.3f}')} ..." # Truncated
         output_tokens = calculate_tokens(response_str, st.session_state.model_name)
+        if 'total_input_tokens' not in st.session_state: st.session_state.total_input_tokens = 0
+        if 'total_output_tokens' not in st.session_state: st.session_state.total_output_tokens = 0
         st.session_state.total_input_tokens += input_tokens
         st.session_state.total_output_tokens += output_tokens
         return {
+            "candidate_name": response.candidate_name, "candidate_url": response.candidate_url,
+            "candidate_summary": response.candidate_summary, "candidate_location": response.candidate_location,
+            "fit_score": response.fit_score, "justification": response.justification
         }
     except Exception as e:
+        st.error(f"Error calling LLM for {candidate_data.get('Name', 'Unknown')}: {e}")
         return {
+            "candidate_name": candidate_data.get("Name", "Unknown"), "candidate_url": candidate_data.get("URL", ""),
+            "candidate_summary": "Error processing candidate profile", "candidate_location": candidate_data.get("Location", "Unknown"),
+            "fit_score": 0.0, "justification": f"Error in LLM processing: {str(e)}"
         }
 def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
+    st.session_state.total_input_tokens = 0 # Reset for this job
     st.session_state.total_output_tokens = 0
     if llm_chain is None:
+        with st.spinner("Setting up LLM..."): llm_chain = setup_llm()
     selected_candidates = []
+    job_data = {
+        "Company": job_row["Company"], "Role": job_row["Role"], "desc": job_row.get("One liner", ""),
+        "Locations": job_row.get("Locations", ""), "Tech_Stack": job_row["Tech Stack"], "Industry": job_row.get("Industry", "")
+    }
+    with st.spinner("Finding matching candidates based on tech stack..."):
+        matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
+    if not matching_candidates:
+        st.warning("No candidates with matching tech stack found for this job.")
+        return []
+    st.success(f"Found {len(matching_candidates)} candidates with matching tech stack. Evaluating with LLM...")
+    candidates_progress = st.progress(0)
+    candidate_status = st.empty() # For live updates
+    for i, candidate_data in enumerate(matching_candidates):
+        # *** MODIFICATION: Check for stop flag ***
+        if st.session_state.get('stop_processing_flag', False):
+            candidate_status.warning("Processing stopped by user.")
+            time.sleep(1) # Allow message to be seen
+            break
+        candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
+        response = call_llm(candidate_data, job_data, llm_chain)
+        response_dict = {
+            "Name": response["candidate_name"], "LinkedIn": response["candidate_url"],
+            "summary": response["candidate_summary"], "Location": response["candidate_location"],
+            "Fit Score": float(f"{response['fit_score']:.3f}"), "justification": response["justification"],
+            "Educational Background": candidate_data.get("Degree & Education", ""),
+            "Years of Experience": candidate_data.get("Years of Experience", ""),
+            "Current Title & Company": candidate_data.get("Current Title & Company", "")
+        }
+        # *** MODIFICATION: Live output of candidate dicts - will disappear on rerun after processing ***
+        if response["fit_score"] >= 8.800:
+            selected_candidates.append(response_dict)
+            # This st.markdown will be visible during processing and cleared on the next full script rerun
+            # after this processing block finishes or is stopped.
+            st.markdown(
+    f"**Selected Candidate:** [{response_dict['Name']}]({response_dict['LinkedIn']}) "
+    f"(Score: {response_dict['Fit Score']:.3f}, Location: {response_dict['Location']})"
+)
+        else:
+            # This st.write will also be visible during processing and cleared later.
+            st.write(f"Rejected candidate: {response_dict['Name']} with score: {response_dict['Fit Score']:.3f}, Location: {response_dict['Location']})")
+        candidates_progress.progress((i + 1) / len(matching_candidates))
+    candidates_progress.empty()
+    candidate_status.empty()
+    if not st.session_state.get('stop_processing_flag', False): # Only show if not stopped
         if selected_candidates:
+            st.success(f"✅ LLM evaluation complete. Found {len(selected_candidates)} suitable candidates for this job!")
         else:
+            st.info("LLM evaluation complete. No candidates met the minimum fit score threshold for this job.")
+    return selected_candidates
 def main():
     st.title("👨‍💻 Candidate Matching App")
+    if 'processed_jobs' not in st.session_state: st.session_state.processed_jobs = {} # May not be used with new logic
+    if 'Selected_Candidates' not in st.session_state: st.session_state.Selected_Candidates = {}
+    if 'llm_chain' not in st.session_state: st.session_state.llm_chain = None # Initialize to None
+    # *** MODIFICATION: Initialize stop flag ***
+    if 'stop_processing_flag' not in st.session_state: st.session_state.stop_processing_flag = False
+    st.write("This app matches job listings with candidate profiles...")
     with st.sidebar:
         st.header("API Configuration")
+        api_key = st.text_input("Enter OpenAI API Key", type="password", key="api_key_input")
         if api_key:
             os.environ["OPENAI_API_KEY"] = api_key
+            # Initialize LLM chain once API key is set
+            if st.session_state.llm_chain is None:
+                 with st.spinner("Setting up LLM..."):
+                    st.session_state.llm_chain = setup_llm()
+            st.success("API Key set")
         else:
             st.warning("Please enter OpenAI API Key to use LLM features")
+            st.session_state.llm_chain = None # Clear chain if key removed
+    # ... (rest of your gspread setup) ...
+    try:
+        SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json' # Ensure this path is correct
+        SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
+        creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
+        gc = gspread.authorize(creds)
+        job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
+        candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
+    except Exception as e:
+        st.error(f"Failed to connect to Google Sheets. Ensure '{SERVICE_ACCOUNT_FILE}' is valid and has permissions. Error: {e}")
+        st.stop()
+    if not os.environ.get("OPENAI_API_KEY"):
         st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
+        st.stop()
+    if st.session_state.llm_chain is None and os.environ.get("OPENAI_API_KEY"):
+        with st.spinner("Setting up LLM..."):
+            st.session_state.llm_chain = setup_llm()
+        st.rerun() # Rerun to ensure LLM is ready for the main display logic
+    try:
+        job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
+        job_data = job_worksheet.get_all_values()
+        candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
+        candidate_data = candidate_worksheet.get_all_values()
+        jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0]).drop(["Link"], axis=1, errors='ignore')
+        candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0]).fillna("Unknown")
+        candidates_df.drop_duplicates(subset=['LinkedIn URL'], keep='first', inplace=True)
+        with st.expander("Preview uploaded data"):
+            st.subheader("Jobs Data Preview"); st.dataframe(jobs_df.head(3))
+            st.subheader("Candidates Data Preview"); st.dataframe(candidates_df.head(3))
+        # Column mapping (simplified, ensure your CSVs have these exact names or adjust)
+        # candidates_df = candidates_df.rename(columns={...}) # Add if needed
+        display_job_selection(jobs_df, candidates_df, job_sheet) # job_sheet is 'sh'
+    except Exception as e:
+        st.error(f"Error processing files or data: {e}")
     st.divider()
+def display_job_selection(jobs_df, candidates_df, sh): # 'sh' is the Google Sheets client
+    st.subheader("Select a job to view potential matches")
+    job_options = [f"{row['Role']} at {row['Company']}" for _, row in jobs_df.iterrows()]
+    if not job_options:
+        st.warning("No jobs found to display.")
+        return
+    selected_job_index = st.selectbox("Jobs:", range(len(job_options)), format_func=lambda x: job_options[x], key="job_selectbox")
+    job_row = jobs_df.iloc[selected_job_index]
+    job_row_stack = parse_tech_stack(job_row["Tech Stack"]) # Assuming parse_tech_stack is defined
+    col_job_details_display, _ = st.columns([2,1])
+    with col_job_details_display:
+        st.subheader(f"Job Details: {job_row['Role']}")
+        job_details_dict = {
+            "Company": job_row["Company"], "Role": job_row["Role"], "Description": job_row.get("One liner", "N/A"),
+            "Locations": job_row.get("Locations", "N/A"), "Industry": job_row.get("Industry", "N/A"),
+            "Tech Stack": display_tech_stack(job_row_stack) # Assuming display_tech_stack is defined
+        }
+        for key, value in job_details_dict.items(): st.markdown(f"**{key}:** {value}")
+    # State keys for the selected job
+    job_processed_key = f"job_{selected_job_index}_processed_successfully"
+    job_is_processing_key = f"job_{selected_job_index}_is_currently_processing"
+    # Initialize states if they don't exist for this job
+    if job_processed_key not in st.session_state: st.session_state[job_processed_key] = False
+    if job_is_processing_key not in st.session_state: st.session_state[job_is_processing_key] = False
+    sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
+    worksheet_exists = False
+    existing_candidates_from_sheet = [] # This will store raw data from sheet
+    try:
+        cand_worksheet = sh.worksheet(sheet_name)
+        worksheet_exists = True
+        existing_data = cand_worksheet.get_all_values() # Get all values as list of lists
+        if len(existing_data) > 1: # Has data beyond header
+            existing_candidates_from_sheet = existing_data # Store raw data
+    except gspread.exceptions.WorksheetNotFound:
+        pass
+    # --- Processing Control Area ---
+    # Show controls if not successfully processed in this session OR if sheet exists (allow re-process/overwrite)
+    if not st.session_state.get(job_processed_key, False) or existing_candidates_from_sheet:
+        if existing_candidates_from_sheet and not st.session_state.get(job_is_processing_key, False) and not st.session_state.get(job_processed_key, False):
+             st.info(f"Processing ('{sheet_name}')")
+        col_find, col_stop = st.columns(2)
+        with col_find:
+            if st.button(f"Find Matching Candidates for this Job", key=f"find_btn_{selected_job_index}", disabled=st.session_state.get(job_is_processing_key, False)):
+                if not os.environ.get("OPENAI_API_KEY") or st.session_state.llm_chain is None: # Assuming llm_chain is in session_state
+                    st.error("OpenAI API key not set or LLM not initialized. Please check sidebar.")
+                else:
+                    st.session_state[job_is_processing_key] = True
+                    st.session_state.stop_processing_flag = False # Reset for new run, assuming stop_processing_flag is used
+                    st.session_state.Selected_Candidates[selected_job_index] = [] # Clear previous run for this job
+                    st.session_state[job_processed_key] = False # Mark as not successfully processed yet for this attempt
+                    st.rerun()
+        with col_stop:
+            if st.session_state.get(job_is_processing_key, False): # Show STOP only if "Find" was clicked and currently processing
+                if st.button("STOP Processing", key=f"stop_btn_{selected_job_index}"):
+                    st.session_state.stop_processing_flag = True # Assuming stop_processing_flag is used
+                    st.warning("Stop request sent. Processing will halt shortly.")
+    # --- Actual Processing Logic ---
+    if st.session_state.get(job_is_processing_key, False):
+        with st.spinner(f"Processing candidates for {job_row['Role']} at {job_row['Company']}..."):
+            # Assuming process_candidates_for_job is defined and handles stop_processing_flag
+            processed_candidates_list = process_candidates_for_job(
+                job_row, candidates_df, st.session_state.llm_chain # Assuming llm_chain from session_state
+            )
+        st.session_state[job_is_processing_key] = False # Mark as no longer actively processing
+        if not st.session_state.get('stop_processing_flag', False): # If processing was NOT stopped
+            if processed_candidates_list:
+                # Ensure Fit Score is float for reliable sorting
+                for cand in processed_candidates_list:
+                    if 'Fit Score' in cand and isinstance(cand['Fit Score'], str):
+                        try: cand['Fit Score'] = float(cand['Fit Score'])
+                        except ValueError: cand['Fit Score'] = 0.0 # Default if conversion fails
+                    elif 'Fit Score' not in cand:
+                        cand['Fit Score'] = 0.0
+                processed_candidates_list.sort(key=lambda x: x.get("Fit Score", 0.0), reverse=True)
+                st.session_state.Selected_Candidates[selected_job_index] = processed_candidates_list
+                st.session_state[job_processed_key] = True # Mark as successfully processed
+                # Save to Google Sheet
+                try:
+                    target_worksheet = None
+                    if not worksheet_exists:
+                        target_worksheet = sh.add_worksheet(title=sheet_name, rows=max(100, len(processed_candidates_list) + 10), cols=20)
+                    else:
+                        target_worksheet = sh.worksheet(sheet_name)
+                    headers = list(processed_candidates_list[0].keys())
+                    # Ensure all values are converted to strings for gspread
+                    rows_to_write = [headers] + [[str(candidate.get(h, "")) for h in headers] for candidate in processed_candidates_list]
+                    target_worksheet.clear()
+                    target_worksheet.update('A1', rows_to_write)
+                    st.success(f"Results saved to Google Sheet: '{sheet_name}'")
+                except Exception as e:
+                    st.error(f"Error writing to Google Sheet '{sheet_name}': {e}")
             else:
+                st.info("No suitable candidates found after processing.")
+                st.session_state.Selected_Candidates[selected_job_index] = []
+                st.session_state[job_processed_key] = True # Mark as processed, even if no results
+        else: # If processing WAS stopped
+            st.info("Processing was stopped by user. Results (if any) were not saved. You can try processing again.")
+            st.session_state.Selected_Candidates[selected_job_index] = [] # Clear any partial results
+            st.session_state[job_processed_key] = False # Not successfully processed
+        st.session_state.pop('stop_processing_flag', None) # Clean up flag
+        st.rerun() # Rerun to update UI based on new state
+    # --- Display Results Area ---
+    should_display_results_area = False
+    final_candidates_to_display = [] # Initialize to ensure it's always defined
+    if st.session_state.get(job_is_processing_key, False):
+        should_display_results_area = False # Not if actively processing
+    elif st.session_state.get(job_processed_key, False): # If successfully processed in this session
+        should_display_results_area = True
+        final_candidates_to_display = st.session_state.Selected_Candidates.get(selected_job_index, [])
+    elif existing_candidates_from_sheet: # If not processed in this session, but sheet has data
+        should_display_results_area = True
+        headers = existing_candidates_from_sheet[0]
+        parsed_sheet_candidates = []
+        for row_idx, row_data in enumerate(existing_candidates_from_sheet[1:]): # Skip header row
+            candidate_dict = {}
+            for col_idx, header_name in enumerate(headers):
+                candidate_dict[header_name] = row_data[col_idx] if col_idx < len(row_data) else None
+            # Convert Fit Score from string to float for consistent handling
+            if 'Fit Score' in candidate_dict and isinstance(candidate_dict['Fit Score'], str):
+                try:
+                    candidate_dict['Fit Score'] = float(candidate_dict['Fit Score'])
+                except ValueError:
+                    st.warning(f"Could not convert Fit Score '{candidate_dict['Fit Score']}' to float for candidate in sheet row {row_idx+2}.")
+                    candidate_dict['Fit Score'] = 0.0 # Default if conversion fails
+            elif 'Fit Score' not in candidate_dict:
+                 candidate_dict['Fit Score'] = 0.0
+            parsed_sheet_candidates.append(candidate_dict)
+        final_candidates_to_display = sorted(parsed_sheet_candidates, key=lambda x: x.get("Fit Score", 0.0), reverse=True)
+        if not st.session_state.get(job_processed_key, False): # Inform if loading from sheet and not explicitly processed
+             st.info(f"Displaying: '{sheet_name}'.")
+    if should_display_results_area:
+        st.subheader("Selected Candidates")
+        # Display token usage if it was just processed (job_processed_key is True and tokens exist)
+        if st.session_state.get(job_processed_key, False) and \
+           (st.session_state.get('total_input_tokens', 0) > 0 or st.session_state.get('total_output_tokens', 0) > 0):
+            display_token_usage() # Assuming display_token_usage is defined
+        if final_candidates_to_display:
+            for i, candidate in enumerate(final_candidates_to_display):
+                score_display = candidate.get('Fit Score', 'N/A')
+                if isinstance(score_display, (float, int)):
+                    score_display = f"{score_display:.3f}"
+                # If score_display is still a string (e.g. 'N/A' or failed float conversion), it will be displayed as is.
+                expander_title = f"{i+1}. {candidate.get('Name', 'N/A')} (Score: {score_display})"
+                with st.expander(expander_title):
+                    text_to_copy = f"""Candidate: {candidate.get('Name', 'N/A')} (Score: {score_display})
+Summary: {candidate.get('summary', 'N/A')}
+Current: {candidate.get('Current Title & Company', 'N/A')}
+Education: {candidate.get('Educational Background', 'N/A')}
+Experience: {candidate.get('Years of Experience', 'N/A')}
+Location: {candidate.get('Location', 'N/A')}
+LinkedIn: {candidate.get('LinkedIn', 'N/A')}
+Justification: {candidate.get('justification', 'N/A')}
+"""
+                    js_text_to_copy = json.dumps(text_to_copy)
+                    button_unique_id = f"copy_btn_job{selected_job_index}_cand{i}"
+                    copy_button_html = f"""
+                    <script>
+                        function copyToClipboard_{button_unique_id}() {{
+                            const textToCopy = {js_text_to_copy};
+                            navigator.clipboard.writeText(textToCopy).then(function() {{
+                                const btn = document.getElementById('{button_unique_id}');
+                                if (btn) {{ // Check if button exists
+                                    const originalText = btn.innerText;
+                                    btn.innerText = 'Copied!';
+                                    setTimeout(function() {{ btn.innerText = originalText; }}, 1500);
+                                }}
+                            }}, function(err) {{
+                                console.error('Could not copy text: ', err);
+                                alert('Failed to copy text. Please use Ctrl+C or your browser\\'s copy function.');
+                            }});
+                        }}
+                    </script>
+                    <button id="{button_unique_id}" onclick="copyToClipboard_{button_unique_id}()">📋 Copy Details</button>
+                    """
+                    expander_cols = st.columns([0.82, 0.18])
+                    with expander_cols[1]:
+                        st.components.v1.html(copy_button_html, height=40)
+                    with expander_cols[0]:
+                        st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
+                        st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
+                        st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
+                        st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
+                        st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
+                        if 'LinkedIn' in candidate and candidate.get('LinkedIn'):
+                            st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
+                        else:
+                            st.markdown("**LinkedIn Profile:** N/A")
+                    if 'justification' in candidate and candidate.get('justification'):
+                        st.markdown("**Justification:**")
+                        st.info(candidate['justification'])
+        elif st.session_state.get(job_processed_key, False): # Processed but no candidates
+            st.info("No candidates met the criteria for this job after processing.")
+        # This "Reset" button is now governed by should_display_results_area
+        if st.button("Reset and Process Again", key=f"reset_btn_{selected_job_index}"):
+            st.session_state[job_processed_key] = False
+            st.session_state.pop(job_is_processing_key, None)
+            if selected_job_index in st.session_state.Selected_Candidates:
+                del st.session_state.Selected_Candidates[selected_job_index]
+            try:
+                sh.worksheet(sheet_name).clear()
+                st.info(f"Cleared Google Sheet '{sheet_name}' as part of reset.")
+            except: pass # Ignore if sheet not found or error
+            st.rerun()
 if __name__ == "__main__":
+    main()