ak0601 commited on
Commit
766675c
Β·
verified Β·
1 Parent(s): 0cf3e7f

Update src/app_job_copy_1.py

Browse files
Files changed (1) hide show
  1. src/app_job_copy_1.py +0 -476
src/app_job_copy_1.py CHANGED
@@ -1,479 +1,3 @@
1
- # import streamlit as st
2
- # import pandas as pd
3
- # import json
4
- # import os
5
- # from pydantic import BaseModel, Field
6
- # from typing import List, Set, Dict, Any, Optional
7
- # import time
8
- # from langchain_openai import ChatOpenAI
9
- # from langchain_core.messages import HumanMessage
10
- # from langchain_core.prompts import ChatPromptTemplate
11
- # from langchain_core.output_parsers import StrOutputParser
12
- # from langchain_core.prompts import PromptTemplate
13
- # import gspread
14
- # from google.oauth2 import service_account
15
-
16
- # st.set_page_config(
17
- # page_title="Candidate Matching App",
18
- # page_icon="πŸ‘¨β€πŸ’»πŸŽ―",
19
- # layout="wide"
20
- # )
21
-
22
- # # Define pydantic model for structured output
23
- # class Shortlist(BaseModel):
24
- # fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
25
- # candidate_name: str = Field(description="The name of the candidate.")
26
- # candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
27
- # candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
28
- # candidate_location: str = Field(description="The location of the candidate.")
29
- # justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
30
-
31
- # # Function to parse and normalize tech stacks
32
- # def parse_tech_stack(stack):
33
- # if pd.isna(stack) or stack == "" or stack is None:
34
- # return set()
35
- # if isinstance(stack, set):
36
- # return stack
37
- # try:
38
- # # Handle potential string representation of sets
39
- # if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
40
- # # This could be a string representation of a set
41
- # items = stack.strip("{}").split(",")
42
- # return set(item.strip().strip("'\"") for item in items if item.strip())
43
- # return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
44
- # except Exception as e:
45
- # st.error(f"Error parsing tech stack: {e}")
46
- # return set()
47
-
48
- # def display_tech_stack(stack_set):
49
- # if isinstance(stack_set, set):
50
- # return ", ".join(sorted(stack_set))
51
- # return str(stack_set)
52
-
53
- # def get_matching_candidates(job_stack, candidates_df):
54
- # """Find candidates with matching tech stack for a specific job"""
55
- # matched = []
56
- # job_stack_set = parse_tech_stack(job_stack)
57
-
58
- # for _, candidate in candidates_df.iterrows():
59
- # candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
60
- # common = job_stack_set & candidate_stack
61
- # if len(common) >= 2:
62
- # matched.append({
63
- # "Name": candidate["Full Name"],
64
- # "URL": candidate["LinkedIn URL"],
65
- # "Degree & Education": candidate["Degree & University"],
66
- # "Years of Experience": candidate["Years of Experience"],
67
- # "Current Title & Company": candidate['Current Title & Company'],
68
- # "Key Highlights": candidate["Key Highlights"],
69
- # "Location": candidate["Location (from most recent experience)"],
70
- # "Experience": str(candidate["Experience"]),
71
- # "Tech Stack": candidate_stack
72
- # })
73
- # return matched
74
-
75
- # def setup_llm():
76
- # """Set up the LangChain LLM with structured output"""
77
- # # Create LLM instance
78
- # llm = ChatOpenAI(
79
- # model="gpt-4o-mini",
80
- # temperature=0,
81
- # max_tokens=None,
82
- # timeout=None,
83
- # max_retries=2,
84
- # )
85
-
86
- # # Create structured output
87
- # sum_llm = llm.with_structured_output(Shortlist)
88
-
89
- # # Create system prompt
90
- # system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
91
- # the profile is according to job.
92
- # Try to ensure following points while estimating the candidate's fit score:
93
- # For education:
94
- # Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
95
- # Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
96
- # Tier3 - Unknown or unranked institutions - Lower points or reject
97
-
98
- # Startup Experience Requirement:
99
- # Candidates must have worked as a direct employee at a VC-backed startup (Seed to series C/D)
100
- # preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
101
-
102
- # The fit score signifies based on following metrics:
103
- # 1–5 - Poor Fit - Auto-reject
104
- # 6–7 - Weak Fit - Auto-reject
105
- # 8.0–8.7 - Moderate Fit - Auto-reject
106
- # 8.8–10 - STRONG Fit - Include in results
107
- # """
108
-
109
- # # Create query prompt
110
- # query_prompt = ChatPromptTemplate.from_messages([
111
- # ("system", system),
112
- # ("human", """
113
- # You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
114
- # For this you will be provided with the follwing inputs of job and candidates:
115
- # Job Details
116
- # Company: {Company}
117
- # Role: {Role}
118
- # About Company: {desc}
119
- # Locations: {Locations}
120
- # Tech Stack: {Tech_Stack}
121
- # Industry: {Industry}
122
-
123
-
124
- # Candidate Details:
125
- # Full Name: {Full_Name}
126
- # LinkedIn URL: {LinkedIn_URL}
127
- # Current Title & Company: {Current_Title_Company}
128
- # Years of Experience: {Years_of_Experience}
129
- # Degree & University: {Degree_University}
130
- # Key Tech Stack: {Key_Tech_Stack}
131
- # Key Highlights: {Key_Highlights}
132
- # Location (from most recent experience): {cand_Location}
133
- # Past_Experience: {Experience}
134
-
135
-
136
- # Answer in the structured manner as per the schema.
137
- # If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
138
- # """),
139
- # ])
140
-
141
- # # Chain the prompt and LLM
142
- # cat_class = query_prompt | sum_llm
143
-
144
- # return cat_class
145
-
146
- # def call_llm(candidate_data, job_data, llm_chain):
147
- # """Call the actual LLM to evaluate the candidate"""
148
- # try:
149
- # # Convert tech stacks to strings for the LLM payload
150
- # job_tech_stack = job_data.get("Tech_Stack", set())
151
- # candidate_tech_stack = candidate_data.get("Tech Stack", set())
152
-
153
- # if isinstance(job_tech_stack, set):
154
- # job_tech_stack = ", ".join(sorted(job_tech_stack))
155
-
156
- # if isinstance(candidate_tech_stack, set):
157
- # candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
158
-
159
- # # Prepare payload for LLM
160
- # payload = {
161
- # "Company": job_data.get("Company", ""),
162
- # "Role": job_data.get("Role", ""),
163
- # "desc": job_data.get("desc", ""),
164
- # "Locations": job_data.get("Locations", ""),
165
- # "Tech_Stack": job_tech_stack,
166
- # "Industry": job_data.get("Industry", ""),
167
-
168
- # "Full_Name": candidate_data.get("Name", ""),
169
- # "LinkedIn_URL": candidate_data.get("URL", ""),
170
- # "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
171
- # "Years_of_Experience": candidate_data.get("Years of Experience", ""),
172
- # "Degree_University": candidate_data.get("Degree & Education", ""),
173
- # "Key_Tech_Stack": candidate_tech_stack,
174
- # "Key_Highlights": candidate_data.get("Key Highlights", ""),
175
- # "cand_Location": candidate_data.get("Location", ""),
176
- # "Experience": candidate_data.get("Experience", "")
177
- # }
178
-
179
- # # Call LLM
180
- # response = llm_chain.invoke(payload)
181
- # print(candidate_data.get("Experience", ""))
182
-
183
- # # Return response in expected format
184
- # return {
185
- # "candidate_name": response.candidate_name,
186
- # "candidate_url": response.candidate_url,
187
- # "candidate_summary": response.candidate_summary,
188
- # "candidate_location": response.candidate_location,
189
- # "fit_score": response.fit_score,
190
- # "justification": response.justification
191
- # }
192
- # except Exception as e:
193
- # st.error(f"Error calling LLM: {e}")
194
- # # Fallback to a default response
195
- # return {
196
- # "candidate_name": candidate_data.get("Name", "Unknown"),
197
- # "candidate_url": candidate_data.get("URL", ""),
198
- # "candidate_summary": "Error processing candidate profile",
199
- # "candidate_location": candidate_data.get("Location", "Unknown"),
200
- # "fit_score": 0.0,
201
- # "justification": f"Error in LLM processing: {str(e)}"
202
- # }
203
-
204
- # def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
205
- # """Process candidates for a specific job using the LLM"""
206
- # if llm_chain is None:
207
- # with st.spinner("Setting up LLM..."):
208
- # llm_chain = setup_llm()
209
-
210
- # selected_candidates = []
211
-
212
- # try:
213
- # # Get job-specific data
214
- # job_data = {
215
- # "Company": job_row["Company"],
216
- # "Role": job_row["Role"],
217
- # "desc": job_row.get("One liner", ""),
218
- # "Locations": job_row.get("Locations", ""),
219
- # "Tech_Stack": job_row["Tech Stack"],
220
- # "Industry": job_row.get("Industry", "")
221
- # }
222
-
223
- # # Find matching candidates for this job
224
- # with st.spinner("Finding matching candidates based on tech stack..."):
225
- # matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
226
-
227
- # if not matching_candidates:
228
- # st.warning("No candidates with matching tech stack found for this job.")
229
- # return []
230
-
231
- # st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
232
-
233
- # # Create progress elements
234
- # candidates_progress = st.progress(0)
235
- # candidate_status = st.empty()
236
-
237
- # # Process each candidate
238
- # for i, candidate_data in enumerate(matching_candidates):
239
- # # Update progress
240
- # candidates_progress.progress((i + 1) / len(matching_candidates))
241
- # candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
242
-
243
- # # Process the candidate with the LLM
244
- # response = call_llm(candidate_data, job_data, llm_chain)
245
-
246
- # response_dict = {
247
- # "Name": response["candidate_name"],
248
- # "LinkedIn": response["candidate_url"],
249
- # "summary": response["candidate_summary"],
250
- # "Location": response["candidate_location"],
251
- # "Fit Score": response["fit_score"],
252
- # "justification": response["justification"],
253
- # # Add back original candidate data for context
254
- # "Educational Background": candidate_data.get("Degree & Education", ""),
255
- # "Years of Experience": candidate_data.get("Years of Experience", ""),
256
- # "Current Title & Company": candidate_data.get("Current Title & Company", "")
257
- # }
258
-
259
- # # Add to selected candidates if score is high enough
260
- # if response["fit_score"] >= 8.8:
261
- # selected_candidates.append(response_dict)
262
- # st.markdown(response_dict)
263
- # else:
264
- # st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
265
-
266
- # # Clear progress indicators
267
- # candidates_progress.empty()
268
- # candidate_status.empty()
269
-
270
- # # Show results
271
- # if selected_candidates:
272
- # st.success(f"βœ… Found {len(selected_candidates)} suitable candidates for this job!")
273
- # else:
274
- # st.info("No candidates met the minimum fit score threshold for this job.")
275
-
276
- # return selected_candidates
277
-
278
- # except Exception as e:
279
- # st.error(f"Error processing job: {e}")
280
- # return []
281
-
282
- # def main():
283
- # st.title("πŸ‘¨β€πŸ’» Candidate Matching App")
284
-
285
- # # Initialize session state
286
- # if 'processed_jobs' not in st.session_state:
287
- # st.session_state.processed_jobs = {}
288
-
289
- # st.write("""
290
- # This app matches job listings with candidate profiles based on tech stack and other criteria.
291
- # Select a job to find matching candidates.
292
- # """)
293
-
294
- # # API Key input
295
- # with st.sidebar:
296
- # st.header("API Configuration")
297
- # api_key = st.text_input("Enter OpenAI API Key", type="password")
298
- # if api_key:
299
- # os.environ["OPENAI_API_KEY"] = api_key
300
- # st.success("API Key set!")
301
- # else:
302
- # st.warning("Please enter OpenAI API Key to use LLM features")
303
-
304
- # # Show API key warning if not set
305
- # SERVICE_ACCOUNT_FILE = 'synapse-recruitment-e94255ca76fd.json'
306
- # SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
307
- # creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
308
- # gc = gspread.authorize(creds)
309
- # job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
310
- # candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
311
-
312
- # if not api_key:
313
- # st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
314
-
315
- # if api_key:
316
- # try:
317
- # # Load data from Google Sheets
318
- # job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
319
- # job_data = job_worksheet.get_all_values()
320
- # candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
321
- # candidate_data = candidate_worksheet.get_all_values()
322
-
323
- # # Convert to DataFrames
324
- # jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
325
- # candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
326
- # candidates_df = candidates_df.fillna("Unknown")
327
-
328
- # # Display data preview
329
- # with st.expander("Preview uploaded data"):
330
- # st.subheader("Jobs Data Preview")
331
- # st.dataframe(jobs_df.head(3))
332
-
333
- # st.subheader("Candidates Data Preview")
334
- # st.dataframe(candidates_df.head(3))
335
-
336
- # # Map column names if needed
337
- # column_mapping = {
338
- # "Full Name": "Full Name",
339
- # "LinkedIn URL": "LinkedIn URL",
340
- # "Current Title & Company": "Current Title & Company",
341
- # "Years of Experience": "Years of Experience",
342
- # "Degree & University": "Degree & University",
343
- # "Key Tech Stack": "Key Tech Stack",
344
- # "Key Highlights": "Key Highlights",
345
- # "Location (from most recent experience)": "Location (from most recent experience)"
346
- # }
347
-
348
- # # Rename columns if they don't match expected
349
- # candidates_df = candidates_df.rename(columns={
350
- # col: mapping for col, mapping in column_mapping.items()
351
- # if col in candidates_df.columns and col != mapping
352
- # })
353
-
354
- # # Now, instead of processing all jobs upfront, we'll display job selection
355
- # # and only process the selected job when the user chooses it
356
- # display_job_selection(jobs_df, candidates_df)
357
-
358
- # except Exception as e:
359
- # st.error(f"Error processing files: {e}")
360
-
361
- # st.divider()
362
-
363
-
364
- # def display_job_selection(jobs_df, candidates_df):
365
- # # Store the LLM chain as a session state to avoid recreating it
366
- # if 'llm_chain' not in st.session_state:
367
- # st.session_state.llm_chain = None
368
-
369
- # st.subheader("Select a job to view potential matches")
370
-
371
- # # Create job options - but don't compute matches yet
372
- # job_options = []
373
- # for i, row in jobs_df.iterrows():
374
- # job_options.append(f"{row['Role']} at {row['Company']}")
375
-
376
- # if job_options:
377
- # selected_job_index = st.selectbox("Jobs:",
378
- # range(len(job_options)),
379
- # format_func=lambda x: job_options[x])
380
-
381
- # # Display job details
382
- # job_row = jobs_df.iloc[selected_job_index]
383
-
384
- # # Parse tech stack for display
385
- # job_row_stack = parse_tech_stack(job_row["Tech Stack"])
386
-
387
- # col1, col2 = st.columns([2, 1])
388
-
389
- # with col1:
390
- # st.subheader(f"Job Details: {job_row['Role']}")
391
-
392
- # job_details = {
393
- # "Company": job_row["Company"],
394
- # "Role": job_row["Role"],
395
- # "Description": job_row.get("One liner", "N/A"),
396
- # "Locations": job_row.get("Locations", "N/A"),
397
- # "Industry": job_row.get("Industry", "N/A"),
398
- # "Tech Stack": display_tech_stack(job_row_stack)
399
- # }
400
-
401
- # for key, value in job_details.items():
402
- # st.markdown(f"**{key}:** {value}")
403
-
404
- # # Create a key for this job in session state
405
- # job_key = f"job_{selected_job_index}_processed"
406
-
407
- # if job_key not in st.session_state:
408
- # st.session_state[job_key] = False
409
-
410
- # # Add a process button for this job
411
- # if not st.session_state[job_key]:
412
- # if st.button(f"Find Matching Candidates for this Job"):
413
- # if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
414
- # st.error("Please enter your OpenAI API key in the sidebar before processing")
415
- # else:
416
- # # Process candidates for this job (only when requested)
417
- # selected_candidates = process_candidates_for_job(
418
- # job_row,
419
- # candidates_df,
420
- # st.session_state.llm_chain
421
- # )
422
-
423
- # # Store the results and set as processed
424
- # if 'Selected_Candidates' not in st.session_state:
425
- # st.session_state.Selected_Candidates = {}
426
- # st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
427
- # st.session_state[job_key] = True
428
-
429
- # # Store the LLM chain for reuse
430
- # if st.session_state.llm_chain is None:
431
- # st.session_state.llm_chain = setup_llm()
432
-
433
- # # Force refresh
434
- # st.rerun()
435
-
436
- # # Display selected candidates if already processed
437
- # if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
438
- # selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
439
-
440
- # # Display selected candidates
441
- # st.subheader("Selected Candidates")
442
-
443
- # if len(selected_candidates) > 0:
444
- # for i, candidate in enumerate(selected_candidates):
445
- # with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
446
- # col1, col2 = st.columns([3, 1])
447
-
448
- # with col1:
449
- # st.markdown(f"**Summary:** {candidate['summary']}")
450
- # st.markdown(f"**Current:** {candidate['Current Title & Company']}")
451
- # st.markdown(f"**Education:** {candidate['Educational Background']}")
452
- # st.markdown(f"**Experience:** {candidate['Years of Experience']}")
453
- # st.markdown(f"**Location:** {candidate['Location']}")
454
- # st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
455
-
456
- # with col2:
457
- # st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
458
-
459
- # st.markdown("**Justification:**")
460
- # st.info(candidate['justification'])
461
- # else:
462
- # st.info("No candidates met the minimum score threshold (8.8) for this job.")
463
-
464
- # # We don't show tech-matched candidates here since they are generated
465
- # # during the LLM matching process now
466
-
467
- # # Add a reset button to start over
468
- # if st.button("Reset and Process Again"):
469
- # st.session_state[job_key] = False
470
- # st.rerun()
471
-
472
- # if __name__ == "__main__":
473
- # main()
474
-
475
-
476
-
477
  import streamlit as st
478
  import pandas as pd
479
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import json