ak0601 commited on
Commit
13dfe17
·
verified ·
1 Parent(s): 11e0ff1

Update reccomendation.py

Browse files
Files changed (1) hide show
  1. reccomendation.py +1101 -16
reccomendation.py CHANGED
@@ -1,3 +1,957 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import requests
3
  from pydantic import BaseModel, Field
@@ -22,14 +976,16 @@ import asyncio
22
  from contextlib import asynccontextmanager
23
  import logging
24
  from sqlalchemy.pool import NullPool
 
 
25
 
26
  # Load environment variables
27
  load_dotenv()
28
 
29
  # Configure logging for Cloud Run
30
  logging.basicConfig(
31
- level=logging.INFO,
32
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
33
  )
34
  logger = logging.getLogger(__name__)
35
 
@@ -156,7 +1112,7 @@ def get_access_token():
156
  }
157
 
158
  # Add timeout to prevent hanging
159
- login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=30)
160
 
161
  if login_response.status_code == 200:
162
  login_result = login_response.json()
@@ -180,6 +1136,95 @@ def get_access_token():
180
  logger.error(f"Unexpected error getting access token: {e}")
181
  return None
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  class structure(BaseModel):
184
  name: str = Field(description="Name of the candidate")
185
  location: str = Field(description="The location of the candidate. Extract city and state if possible.")
@@ -557,6 +1602,9 @@ def create_job_description(job_row: pd.Series) -> str:
557
 
558
  if pd.notna(job_row.get('company_culture')):
559
  description_parts.append(f"Company Culture: {job_row['company_culture']}")
 
 
 
560
 
561
  if pd.notna(job_row.get('requirements')):
562
  description_parts.append(f"Requirements: {job_row['requirements']}")
@@ -569,6 +1617,20 @@ def create_job_description(job_row: pd.Series) -> str:
569
 
570
  return "\n\n".join(description_parts)
571
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  def clean_analysis_result(analysis_result: dict) -> dict:
573
  """
574
  Clean up the analysis result to only include final_score and summary
@@ -633,13 +1695,13 @@ def sort_jobs_by_score(job_analyses: list) -> list:
633
 
634
  return sorted(job_analyses, key=extract_score, reverse=True)
635
 
636
- async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str, max_retries: int = 3) -> dict:
637
  """
638
  Analyze job-candidate fit with retry logic for resilience
639
  """
640
  for attempt in range(max_retries):
641
  try:
642
- result = analyze_job_fit(job_description, resume_file_path)
643
  if "error" not in result:
644
  return result
645
 
@@ -651,6 +1713,12 @@ async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str
651
  await asyncio.sleep(2 ** attempt) # Exponential backoff
652
  continue
653
 
 
 
 
 
 
 
654
  return result
655
  except Exception as e:
656
  logger.error(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
@@ -658,7 +1726,7 @@ async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str
658
  return {"error": f"Failed after {max_retries} attempts: {str(e)}"}
659
  await asyncio.sleep(2 ** attempt)
660
 
661
- def analyze_job_fit(job_description: str, resume_file_path: str) -> dict:
662
  """
663
  Analyze job-candidate fit using the external API
664
  """
@@ -686,9 +1754,27 @@ def analyze_job_fit(job_description: str, resume_file_path: str) -> dict:
686
  'jd_text': job_description
687
  }
688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  try:
690
- # Make the API request with longer timeout for cloud environments
691
- response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
692
 
693
  # If we get an authentication error, try to get a fresh token and retry once
694
  if response.status_code == 401:
@@ -701,7 +1787,7 @@ def analyze_job_fit(job_description: str, resume_file_path: str) -> dict:
701
  # Close the previous file and reopen
702
  files['resume'][1].close()
703
  files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
704
- response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
705
  else:
706
  # If we can't get a fresh token, return error
707
  return {"error": "Authentication failed and could not obtain fresh token"}
@@ -717,8 +1803,8 @@ def analyze_job_fit(job_description: str, resume_file_path: str) -> dict:
717
  return {"error": f"API call failed with status {response.status_code}", "details": response.text}
718
 
719
  except requests.exceptions.Timeout:
720
- logger.error("API request timed out")
721
- return {"error": "API request timed out"}
722
  except Exception as e:
723
  logger.error(f"Exception occurred: {str(e)}")
724
  return {"error": f"Exception occurred: {str(e)}"}
@@ -818,7 +1904,8 @@ async def process_resume_and_recommend_jobs(
818
  # Analyze job fit for each filtered job
819
  job_analyses = []
820
 
821
- for _, job_row in jobs_to_analyze.head(20).iterrows(): # Analyze top 20 jobs
 
822
  job_id = job_row.get('id')
823
 
824
  # Check if we have an existing submission for this candidate and job
@@ -851,7 +1938,7 @@ async def process_resume_and_recommend_jobs(
851
  else:
852
  # Call API for new analysis with retry logic
853
  job_description = create_job_description(job_row)
854
- analysis_result = await analyze_job_fit_with_retry(job_description, tmp_file_path)
855
  analysis_result['source'] = 'api_call'
856
 
857
  # Clean up the analysis result
@@ -946,6 +2033,4 @@ if __name__ == "__main__":
946
  import uvicorn
947
  port = int(os.getenv("PORT", 8080))
948
  logger.info(f"Starting server on port {port}")
949
- uvicorn.run(app, host="0.0.0.0", port=port)
950
-
951
-
 
1
+ # import pandas as pd
2
+ # import requests
3
+ # from pydantic import BaseModel, Field
4
+ # from typing import List, Tuple, Optional
5
+ # from langchain_openai import ChatOpenAI
6
+ # from langchain_core.prompts import ChatPromptTemplate
7
+ # import os
8
+ # from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Depends, Header, Request
9
+ # from fastapi.responses import JSONResponse
10
+ # from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
11
+ # from fastapi.middleware.cors import CORSMiddleware
12
+ # import json
13
+ # import tempfile
14
+ # import shutil
15
+ # import PyPDF2
16
+ # from dotenv import load_dotenv
17
+ # import pdfplumber
18
+ # import re
19
+ # from db import *
20
+ # import time
21
+ # import asyncio
22
+ # from contextlib import asynccontextmanager
23
+ # import logging
24
+ # from sqlalchemy.pool import NullPool
25
+
26
+ # # Load environment variables
27
+ # load_dotenv()
28
+
29
+ # # Configure logging for Cloud Run
30
+ # logging.basicConfig(
31
+ # level=logging.INFO,
32
+ # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
33
+ # )
34
+ # logger = logging.getLogger(__name__)
35
+
36
+ # # Global variable to store access token
37
+ # access_token = None
38
+
39
+ # # Startup/shutdown events
40
+ # @asynccontextmanager
41
+ # async def lifespan(app: FastAPI):
42
+ # # Startup
43
+ # logger.info("Starting up Job Recommendation API...")
44
+ # # You can initialize connection pools here if needed
45
+ # yield
46
+ # # Shutdown
47
+ # logger.info("Shutting down Job Recommendation API...")
48
+ # # Close any open connections here
49
+
50
+ # # Initialize FastAPI app with lifespan
51
+ # app = FastAPI(
52
+ # title="Job Recommendation API",
53
+ # description="API for processing resumes and recommending jobs",
54
+ # lifespan=lifespan
55
+ # )
56
+
57
+ # # Add CORS middleware for cloud deployment
58
+ # app.add_middleware(
59
+ # CORSMiddleware,
60
+ # allow_origins=["*"], # Configure based on your needs
61
+ # allow_credentials=True,
62
+ # allow_methods=["*"],
63
+ # allow_headers=["*"],
64
+ # )
65
+
66
+ # # Add request ID middleware for better tracing
67
+ # @app.middleware("http")
68
+ # async def add_request_id(request: Request, call_next):
69
+ # request_id = f"{time.time()}-{request.client.host}"
70
+ # request.state.request_id = request_id
71
+
72
+ # # Log the request
73
+ # logger.info(f"Request ID: {request_id} - {request.method} {request.url.path}")
74
+
75
+ # try:
76
+ # response = await call_next(request)
77
+ # response.headers["X-Request-ID"] = request_id
78
+ # return response
79
+ # except Exception as e:
80
+ # logger.error(f"Request ID: {request_id} - Error: {str(e)}")
81
+ # raise
82
+
83
+ # # Security configuration
84
+ # API_KEY = os.getenv("API_KEY")
85
+ # security = HTTPBearer()
86
+
87
+ # def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
88
+ # """
89
+ # Verify the API key from the Authorization header
90
+ # """
91
+ # if not API_KEY:
92
+ # logger.error("API key not configured")
93
+ # raise HTTPException(
94
+ # status_code=500,
95
+ # detail="API key not configured",
96
+ # )
97
+
98
+ # if credentials.credentials != API_KEY:
99
+ # logger.warning("Invalid API key attempt")
100
+ # raise HTTPException(
101
+ # status_code=401,
102
+ # detail="Invalid API key",
103
+ # headers={"WWW-Authenticate": "Bearer"},
104
+ # )
105
+ # return credentials.credentials
106
+
107
+ # # Initialize OpenAI client with error handling
108
+ # try:
109
+ # llm = ChatOpenAI(
110
+ # model="gpt-4o-mini",
111
+ # temperature=0,
112
+ # api_key=os.getenv("OPENAI_API_KEY")
113
+ # )
114
+ # logger.info("OpenAI client initialized successfully")
115
+ # except Exception as e:
116
+ # logger.error(f"Failed to initialize OpenAI client: {e}")
117
+ # raise
118
+
119
+ # # Initialize database engine with connection pooling suitable for Cloud Run
120
+ # def get_engine():
121
+ # """
122
+ # Get database engine with NullPool for Cloud Run
123
+ # """
124
+ # try:
125
+ # conn_string = f"postgresql://{DB_PARAMS['user']}:{DB_PARAMS['password']}@{DB_PARAMS['host']}:{DB_PARAMS['port']}/{DB_PARAMS['dbname']}"
126
+ # # Use NullPool for Cloud Run to avoid connection issues
127
+ # engine = create_engine(conn_string, poolclass=NullPool, pool_pre_ping=True)
128
+ # logger.info("Database engine created successfully")
129
+ # return engine
130
+ # except Exception as e:
131
+ # logger.error(f"Failed to create database engine: {e}")
132
+ # raise
133
+
134
+ # # Initialize database engine
135
+ # engine = get_engine()
136
+
137
+ # def get_access_token():
138
+ # """
139
+ # Get access token for the external API with better error handling
140
+ # """
141
+ # global access_token
142
+
143
+ # # If we already have a token, return it
144
+ # if access_token:
145
+ # return access_token
146
+
147
+ # try:
148
+ # login_url = "https://fitscore-agent-535960463668.us-central1.run.app/auth/login"
149
+ # login_data = {
150
+ # "email": "[email protected]",
151
+ # "password": "Password@123"
152
+ # }
153
+ # login_headers = {
154
+ # 'accept': 'application/json',
155
+ # 'Content-Type': 'application/json'
156
+ # }
157
+
158
+ # # Add timeout to prevent hanging
159
+ # login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=30)
160
+
161
+ # if login_response.status_code == 200:
162
+ # login_result = login_response.json()
163
+ # access_token = login_result.get('data', {}).get('tokens', {}).get('accessToken')
164
+ # if access_token:
165
+ # logger.info("Successfully obtained access token")
166
+ # return access_token
167
+ # else:
168
+ # logger.error("Login successful but no access token found in response")
169
+ # return None
170
+ # else:
171
+ # logger.error(f"Login failed with status {login_response.status_code}: {login_response.text}")
172
+ # return None
173
+ # except requests.exceptions.Timeout:
174
+ # logger.error("Login request timed out")
175
+ # return None
176
+ # except requests.exceptions.RequestException as e:
177
+ # logger.error(f"Network error during login: {e}")
178
+ # return None
179
+ # except Exception as e:
180
+ # logger.error(f"Unexpected error getting access token: {e}")
181
+ # return None
182
+
183
+ # class structure(BaseModel):
184
+ # name: str = Field(description="Name of the candidate")
185
+ # location: str = Field(description="The location of the candidate. Extract city and state if possible.")
186
+ # skills: List[str] = Field(description="List of individual skills of the candidate")
187
+ # ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
188
+ # email: str = Field(description="The email of the candidate")
189
+ # yoe: str = Field(description="Years of experience of the candidate.")
190
+ # experience: str = Field(description="A brief summary of the candidate's past experience.")
191
+ # industry: str = Field(description="The industry the candidate has experience in.(Tech,Legal,Finance/Accounting,Healthcare,Industrial,Logistics,Telecom,Admin,Other)")
192
+
193
+ # class JobAnalysis(BaseModel):
194
+ # job_title: str
195
+ # company_name: str
196
+ # analysis: dict
197
+
198
+ # def extract_text_from_pdf(pdf_file_path: str) -> str:
199
+ # """
200
+ # Extract text from PDF file using multiple methods for better accuracy
201
+ # """
202
+ # text = ""
203
+
204
+ # # Method 1: Try pdfplumber (better for complex layouts)
205
+ # try:
206
+ # with pdfplumber.open(pdf_file_path) as pdf:
207
+ # for page in pdf.pages:
208
+ # page_text = page.extract_text()
209
+ # if page_text:
210
+ # text += page_text + "\n"
211
+ # if text.strip():
212
+ # logger.info(f"Successfully extracted text using pdfplumber: {len(text)} characters")
213
+ # return text.strip()
214
+ # except Exception as e:
215
+ # logger.warning(f"pdfplumber failed: {e}")
216
+
217
+ # # Method 2: Try PyPDF2 (fallback)
218
+ # try:
219
+ # with open(pdf_file_path, 'rb') as file:
220
+ # pdf_reader = PyPDF2.PdfReader(file)
221
+ # for page in pdf_reader.pages:
222
+ # page_text = page.extract_text()
223
+ # if page_text:
224
+ # text += page_text + "\n"
225
+ # if text.strip():
226
+ # logger.info(f"Successfully extracted text using PyPDF2: {len(text)} characters")
227
+ # return text.strip()
228
+ # except Exception as e:
229
+ # logger.error(f"PyPDF2 failed: {e}")
230
+
231
+ # # If both methods fail, return empty string
232
+ # logger.error("Failed to extract text from PDF")
233
+ # return ""
234
+
235
+ # def extract_resume_info(resume_text: str) -> structure:
236
+ # """
237
+ # Extract structured information from resume using LLM
238
+ # """
239
+ # prompt = ChatPromptTemplate.from_template("""
240
+ # You are an expert resume parser. Extract the following information from the resume text provided and return it in a structured JSON format.
241
+
242
+ # Resume Text:
243
+ # {resume_text}
244
+
245
+ # Please extract and structure the information according to the following schema:
246
+ # - name: Full name of the candidate
247
+ # - location: City and state if available, otherwise general location
248
+ # - skills: List of technical skills, tools, technologies, programming languages, etc.
249
+ # - ideal_jobs: Based on their experience, what types of jobs would be ideal for this candidate
250
+ # - email: Email address of the candidate (if found in resume)
251
+ # - yoe: Years of experience (extract from work history)
252
+ # - experience: Brief summary of their work experience and background
253
+ # - industry: Categorize into one of these industries: Tech, Legal, Finance/Accounting, Healthcare, Industrial, Logistics, Telecom, Admin, Other
254
+
255
+ # Return ONLY a valid JSON object with these fields. Do not include any other text or explanations.
256
+ # """)
257
+
258
+ # try:
259
+ # str_llm = llm.with_structured_output(structure)
260
+ # chain = prompt | str_llm
261
+ # response = chain.invoke({"resume_text": resume_text})
262
+
263
+ # validated_data = {
264
+ # 'name': response.name,
265
+ # 'location': response.location,
266
+ # 'email': response.email,
267
+ # 'skills': response.skills,
268
+ # 'ideal_jobs': response.ideal_jobs,
269
+ # 'yoe': response.yoe,
270
+ # 'experience': response.experience,
271
+ # 'industry': response.industry
272
+ # }
273
+
274
+ # logger.info(f"Successfully extracted resume info for: {validated_data['name']}")
275
+ # return validated_data
276
+
277
+ # except Exception as e:
278
+ # logger.error(f"Failed to extract resume info: {e}")
279
+ # return {
280
+ # 'name': "Unknown",
281
+ # 'location': "Unknown",
282
+ # 'email': "",
283
+ # 'skills': [],
284
+ # 'ideal_jobs': "Software Engineer",
285
+ # 'yoe': "0",
286
+ # 'experience': "No experience listed",
287
+ # 'industry': "Tech"
288
+ # }
289
+
290
+ # def filter_jobs_by_industry(jobs_df: pd.DataFrame, target_industry: str) -> pd.DataFrame:
291
+ # """
292
+ # Filter jobs by industry
293
+ # """
294
+ # # Map the extracted industry to database industry values
295
+ # industry_mapping = {
296
+ # 'Tech': ['technology', 'VC Tech'],
297
+ # 'Legal': ['Legal'],
298
+ # 'Finance/Accounting': ['finance/Accounting'],
299
+ # 'Healthcare': ['healthcare'],
300
+ # 'Industrial': ['industrial'],
301
+ # 'Logistics': ['logistics'],
302
+ # 'Telecom': ['telecom'],
303
+ # 'Admin': ['admin'],
304
+ # 'Other': ['Other']
305
+ # }
306
+
307
+ # target_industries = industry_mapping.get(target_industry, ['Tech'])
308
+
309
+ # # Filter jobs by industry (using database column name 'industry')
310
+ # filtered_jobs = jobs_df[jobs_df['industry'].isin(target_industries)]
311
+
312
+ # logger.info(f"Filtered {len(filtered_jobs)} jobs for industry: {target_industry}")
313
+ # return filtered_jobs
314
+
315
+ # def filter_jobs_by_location(jobs_df: pd.DataFrame, candidate_location: str) -> pd.DataFrame:
316
+ # """
317
+ # Filter jobs by location matching the candidate's location
318
+ # """
319
+ # if not candidate_location or candidate_location.lower() in ['unknown', 'n/a', '']:
320
+ # logger.info(f"No location info provided, returning all {len(jobs_df)} jobs")
321
+ # return jobs_df # Return all jobs if no location info
322
+
323
+ # # Clean and normalize candidate location
324
+ # candidate_location = candidate_location.lower().strip()
325
+ # logger.info(f"Filtering jobs for candidate location: {candidate_location}")
326
+
327
+ # # Extract state abbreviations and full names
328
+ # state_mapping = {
329
+ # 'alabama': 'al', 'alaska': 'ak', 'arizona': 'az', 'arkansas': 'ar', 'california': 'ca',
330
+ # 'colorado': 'co', 'connecticut': 'ct', 'delaware': 'de', 'district of columbia': 'dc', 'florida': 'fl', 'georgia': 'ga',
331
+ # 'hawaii': 'hi', 'idaho': 'id', 'illinois': 'il', 'indiana': 'in', 'iowa': 'ia',
332
+ # 'kansas': 'ks', 'kentucky': 'ky', 'louisiana': 'la', 'maine': 'me', 'maryland': 'md',
333
+ # 'massachusetts': 'ma', 'michigan': 'mi', 'minnesota': 'mn', 'mississippi': 'ms', 'missouri': 'mo',
334
+ # 'montana': 'mt', 'nebraska': 'ne', 'nevada': 'nv', 'new hampshire': 'nh', 'new jersey': 'nj',
335
+ # 'new mexico': 'nm', 'new york': 'ny', 'north carolina': 'nc', 'north dakota': 'nd', 'ohio': 'oh',
336
+ # 'oklahoma': 'ok', 'oregon': 'or', 'pennsylvania': 'pa', 'rhode island': 'ri', 'south carolina': 'sc',
337
+ # 'south dakota': 'sd', 'tennessee': 'tn', 'texas': 'tx', 'utah': 'ut', 'vermont': 'vt',
338
+ # 'virginia': 'va', 'washington': 'wa', 'west virginia': 'wv', 'wisconsin': 'wi', 'wyoming': 'wy'
339
+ # }
340
+
341
+ # # Create location patterns to match
342
+ # location_patterns = []
343
+
344
+ # # Add the original location
345
+ # location_patterns.append(candidate_location)
346
+
347
+ # # Add state variations
348
+ # for state_name, state_abbr in state_mapping.items():
349
+ # if state_name in candidate_location or state_abbr in candidate_location:
350
+ # location_patterns.extend([state_name, state_abbr])
351
+
352
+ # # Add common city variations (extract city name)
353
+ # city_match = re.search(r'^([^,]+)', candidate_location)
354
+ # if city_match:
355
+ # city_name = city_match.group(1).strip()
356
+ # location_patterns.append(city_name)
357
+
358
+ # # Add remote/anywhere patterns if location is remote
359
+ # if 'remote' in candidate_location or 'anywhere' in candidate_location:
360
+ # location_patterns.extend(['remote', 'anywhere', 'work from home', 'wfh'])
361
+
362
+ # logger.info(f"Location patterns to match: {location_patterns}")
363
+
364
+ # # Filter jobs by location
365
+ # matching_jobs = []
366
+
367
+ # for _, job_row in jobs_df.iterrows():
368
+ # job_location = str(job_row.get('job_location', '')).lower()
369
+
370
+ # # Check if any location pattern matches
371
+ # location_matches = any(pattern in job_location for pattern in location_patterns)
372
+
373
+ # # Also check for remote jobs if candidate location includes remote
374
+ # if 'remote' in candidate_location and any(remote_term in job_location for remote_term in ['remote', 'anywhere', 'work from home', 'wfh']):
375
+ # location_matches = True
376
+
377
+ # # Check for exact city/state matches
378
+ # if candidate_location in job_location or job_location in candidate_location:
379
+ # location_matches = True
380
+
381
+ # if location_matches:
382
+ # matching_jobs.append(job_row)
383
+
384
+ # result_df = pd.DataFrame(matching_jobs) if matching_jobs else jobs_df
385
+ # logger.info(f"Found {len(matching_jobs)} jobs matching location out of {len(jobs_df)} total jobs")
386
+
387
+ # return result_df
388
+
389
+ # def extract_experience_requirement(requirements_text: str) -> dict:
390
+ # """
391
+ # Extract experience requirements from job requirements text
392
+ # Returns a dictionary with min_years, max_years, and level
393
+ # """
394
+ # if not requirements_text or pd.isna(requirements_text):
395
+ # return {'min_years': 0, 'max_years': 999, 'level': 'any'}
396
+
397
+ # requirements_text = str(requirements_text).lower()
398
+
399
+ # # Common experience patterns
400
+ # experience_patterns = [
401
+ # # Specific year ranges
402
+ # r'(\d+)[\-\+]\s*(\d+)\s*years?\s*experience',
403
+ # r'(\d+)\s*to\s*(\d+)\s*years?\s*experience',
404
+ # r'(\d+)\s*-\s*(\d+)\s*years?\s*experience',
405
+
406
+ # # Minimum years
407
+ # r'(\d+)\+?\s*years?\s*experience',
408
+ # r'minimum\s*(\d+)\s*years?\s*experience',
409
+ # r'at\s*least\s*(\d+)\s*years?\s*experience',
410
+
411
+ # # Level-based patterns
412
+ # r'(entry\s*level|junior|associate)',
413
+ # r'(mid\s*level|intermediate|mid\s*senior)',
414
+ # r'(senior|lead|principal|staff)',
415
+ # r'(executive|director|vp|chief|c\s*level)',
416
+
417
+ # # Specific year mentions
418
+ # r'(\d+)\s*years?\s*in\s*the\s*field',
419
+ # r'(\d+)\s*years?\s*of\s*professional\s*experience',
420
+ # r'(\d+)\s*years?\s*of\s*relevant\s*experience'
421
+ # ]
422
+
423
+ # min_years = 0
424
+ # max_years = 999
425
+ # level = 'any'
426
+
427
+ # # Check for specific year ranges
428
+ # for pattern in experience_patterns[:3]: # First 3 patterns are for ranges
429
+ # matches = re.findall(pattern, requirements_text)
430
+ # if matches:
431
+ # try:
432
+ # min_years = int(matches[0][0])
433
+ # max_years = int(matches[0][1])
434
+ # break
435
+ # except (ValueError, IndexError):
436
+ # continue
437
+
438
+ # # Check for minimum years if no range found
439
+ # if min_years == 0:
440
+ # for pattern in experience_patterns[3:6]: # Minimum year patterns
441
+ # matches = re.findall(pattern, requirements_text)
442
+ # if matches:
443
+ # try:
444
+ # min_years = int(matches[0])
445
+ # break
446
+ # except (ValueError, IndexError):
447
+ # continue
448
+
449
+ # # Check for level-based requirements
450
+ # for pattern in experience_patterns[6:10]: # Level patterns
451
+ # matches = re.findall(pattern, requirements_text)
452
+ # if matches:
453
+ # level_match = matches[0].lower()
454
+ # if 'entry' in level_match or 'junior' in level_match or 'associate' in level_match:
455
+ # level = 'entry'
456
+ # if min_years == 0:
457
+ # min_years = 0
458
+ # max_years = 2
459
+ # elif 'mid' in level_match or 'intermediate' in level_match:
460
+ # level = 'mid'
461
+ # if min_years == 0:
462
+ # min_years = 2
463
+ # max_years = 5
464
+ # elif 'senior' in level_match or 'lead' in level_match or 'principal' in level_match or 'staff' in level_match:
465
+ # level = 'senior'
466
+ # if min_years == 0:
467
+ # min_years = 5
468
+ # max_years = 10
469
+ # elif 'executive' in level_match or 'director' in level_match or 'vp' in level_match or 'chief' in level_match:
470
+ # level = 'executive'
471
+ # if min_years == 0:
472
+ # min_years = 10
473
+ # max_years = 999
474
+ # break
475
+
476
+ # # Check for specific year mentions if still no match
477
+ # if min_years == 0:
478
+ # for pattern in experience_patterns[10:]: # Specific year mention patterns
479
+ # matches = re.findall(pattern, requirements_text)
480
+ # if matches:
481
+ # try:
482
+ # min_years = int(matches[0])
483
+ # max_years = min_years + 2 # Add buffer
484
+ # break
485
+ # except (ValueError, IndexError):
486
+ # continue
487
+
488
+ # return {
489
+ # 'min_years': min_years,
490
+ # 'max_years': max_years,
491
+ # 'level': level
492
+ # }
493
+
494
+ # def filter_jobs_by_experience(jobs_df: pd.DataFrame, candidate_yoe: str) -> pd.DataFrame:
495
+ # """
496
+ # Filter jobs by experience level matching the candidate's years of experience
497
+ # """
498
+ # if not candidate_yoe or candidate_yoe.lower() in ['unknown', 'n/a', '']:
499
+ # logger.info(f"No experience info provided, returning all {len(jobs_df)} jobs")
500
+ # return jobs_df
501
+
502
+ # # Extract numeric years from candidate experience
503
+ # try:
504
+ # # Handle various formats like "5 years", "5+ years", "5-7 years", etc.
505
+ # yoe_match = re.search(r'(\d+(?:\.\d+)?)', str(candidate_yoe))
506
+ # if yoe_match:
507
+ # candidate_years = float(yoe_match.group(1))
508
+ # else:
509
+ # logger.warning(f"Could not extract years from: {candidate_yoe}")
510
+ # return jobs_df
511
+ # except (ValueError, TypeError):
512
+ # logger.error(f"Invalid experience format: {candidate_yoe}")
513
+ # return jobs_df
514
+
515
+ # logger.info(f"Filtering jobs for candidate with {candidate_years} years of experience")
516
+
517
+ # # Filter jobs by experience requirements
518
+ # matching_jobs = []
519
+
520
+ # for _, job_row in jobs_df.iterrows():
521
+ # requirements_text = str(job_row.get('requirements', ''))
522
+ # experience_req = extract_experience_requirement(requirements_text)
523
+
524
+ # # Check if candidate's experience matches the job requirements
525
+ # if (candidate_years >= experience_req['min_years'] and
526
+ # candidate_years <= experience_req['max_years']):
527
+ # matching_jobs.append(job_row)
528
+
529
+ # result_df = pd.DataFrame(matching_jobs) if matching_jobs else jobs_df
530
+ # logger.info(f"Found {len(matching_jobs)} jobs matching experience out of {len(jobs_df)} total jobs")
531
+
532
+ # return result_df
533
+
534
+ # def filter_jobs_by_priority(jobs_df: pd.DataFrame) -> pd.DataFrame:
535
+ # """
536
+ # Filter jobs to only include high priority jobs
537
+ # """
538
+ # if jobs_df.empty:
539
+ # logger.info("No jobs to filter by priority")
540
+ # return jobs_df
541
+
542
+ # # Filter jobs by priority - only include high priority jobs
543
+ # priority_filtered_jobs = jobs_df[jobs_df['priority'].str.lower() == 'high']
544
+
545
+ # logger.info(f"Found {len(priority_filtered_jobs)} high priority jobs out of {len(jobs_df)} total jobs")
546
+
547
+ # return priority_filtered_jobs
548
+
549
+ # def create_job_description(job_row: pd.Series) -> str:
550
+ # """
551
+ # Create a comprehensive job description from job data
552
+ # """
553
+ # description_parts = []
554
+
555
+ # if pd.notna(job_row.get('company_blurb')):
556
+ # description_parts.append(f"Company: {job_row['company_blurb']}")
557
+
558
+ # if pd.notna(job_row.get('company_culture')):
559
+ # description_parts.append(f"Company Culture: {job_row['company_culture']}")
560
+
561
+ # if pd.notna(job_row.get('requirements')):
562
+ # description_parts.append(f"Requirements: {job_row['requirements']}")
563
+
564
+ # if pd.notna(job_row.get('role_responsibilities')):
565
+ # description_parts.append(f"Role Responsibilities: {job_row['role_responsibilities']}")
566
+
567
+ # if pd.notna(job_row.get('job_location')):
568
+ # description_parts.append(f"Location: {job_row['job_location']}")
569
+
570
+ # return "\n\n".join(description_parts)
571
+
572
+ # def clean_analysis_result(analysis_result: dict) -> dict:
573
+ # """
574
+ # Clean up the analysis result to only include final_score and summary
575
+ # """
576
+ # if not isinstance(analysis_result, dict):
577
+ # return analysis_result
578
+
579
+ # # Remove user_context if present
580
+ # if 'user_context' in analysis_result:
581
+ # del analysis_result['user_context']
582
+
583
+ # # Clean up final_response if present
584
+ # if 'final_response' in analysis_result:
585
+ # try:
586
+ # # Handle both string and dict formats
587
+ # if isinstance(analysis_result['final_response'], str):
588
+ # final_response = json.loads(analysis_result['final_response'])
589
+ # else:
590
+ # final_response = analysis_result['final_response']
591
+
592
+ # # Extract and format the evaluation data
593
+ # if 'evaluation' in final_response and len(final_response['evaluation']) > 0:
594
+ # evaluation = final_response['evaluation'][0]
595
+
596
+ # # Create a minimal structure with only final_score and summary
597
+ # cleaned_response = {
598
+ # 'final_score': evaluation.get('final_score', 0),
599
+ # 'summary': {}
600
+ # }
601
+
602
+ # # Extract summary information
603
+ # if 'summary' in evaluation and len(evaluation['summary']) > 0:
604
+ # summary = evaluation['summary'][0]
605
+ # cleaned_response['summary'] = {
606
+ # 'strengths': summary.get('strengths', []),
607
+ # 'weaknesses': summary.get('weaknesses', []),
608
+ # 'opportunities': summary.get('opportunities', []),
609
+ # 'recommendations': summary.get('recommendations', [])
610
+ # }
611
+
612
+ # analysis_result['final_response'] = cleaned_response
613
+
614
+ # except (json.JSONDecodeError, KeyError, IndexError) as e:
615
+ # logger.error(f"Error cleaning analysis result: {e}")
616
+ # # Keep original if cleaning fails
617
+ # pass
618
+
619
+ # return analysis_result
620
+
621
+ # def sort_jobs_by_score(job_analyses: list) -> list:
622
+ # """
623
+ # Sort jobs by final_score in descending order (highest scores first)
624
+ # """
625
+ # def extract_score(job_analysis):
626
+ # try:
627
+ # analysis = job_analysis.get('analysis', {})
628
+ # if 'final_response' in analysis and isinstance(analysis['final_response'], dict):
629
+ # return analysis['final_response'].get('final_score', 0)
630
+ # return 0
631
+ # except:
632
+ # return 0
633
+
634
+ # return sorted(job_analyses, key=extract_score, reverse=True)
635
+
636
+ # async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str, max_retries: int = 3) -> dict:
637
+ # """
638
+ # Analyze job-candidate fit with retry logic for resilience
639
+ # """
640
+ # for attempt in range(max_retries):
641
+ # try:
642
+ # result = analyze_job_fit(job_description, resume_file_path)
643
+ # if "error" not in result:
644
+ # return result
645
+
646
+ # # If authentication error and not last attempt, retry
647
+ # if "Authentication failed" in result.get("error", "") and attempt < max_retries - 1:
648
+ # logger.warning(f"Authentication failed, retrying... (attempt {attempt + 1}/{max_retries})")
649
+ # global access_token
650
+ # access_token = None # Reset token to force refresh
651
+ # await asyncio.sleep(2 ** attempt) # Exponential backoff
652
+ # continue
653
+
654
+ # return result
655
+ # except Exception as e:
656
+ # logger.error(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
657
+ # if attempt == max_retries - 1:
658
+ # return {"error": f"Failed after {max_retries} attempts: {str(e)}"}
659
+ # await asyncio.sleep(2 ** attempt)
660
+
661
+ # def analyze_job_fit(job_description: str, resume_file_path: str) -> dict:
662
+ # """
663
+ # Analyze job-candidate fit using the external API
664
+ # """
665
+
666
+ # url = "https://fitscore-agent-535960463668.us-central1.run.app/analyze"
667
+
668
+ # # Check if resume file exists
669
+ # if not os.path.exists(resume_file_path):
670
+ # logger.error(f"Resume file not found: {resume_file_path}")
671
+ # return {"error": f"Resume file not found: {resume_file_path}"}
672
+
673
+
674
+ # # Prepare headers with authentication
675
+ # headers = {
676
+ # 'accept': 'application/json',
677
+ # 'Authorization': f'Bearer {get_access_token()}'
678
+ # }
679
+
680
+ # # Prepare form data
681
+ # files = {
682
+ # 'resume': (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
683
+ # }
684
+
685
+ # data = {
686
+ # 'jd_text': job_description
687
+ # }
688
+
689
+ # try:
690
+ # # Make the API request with longer timeout for cloud environments
691
+ # response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
692
+
693
+ # # If we get an authentication error, try to get a fresh token and retry once
694
+ # if response.status_code == 401:
695
+ # logger.warning("Authentication failed, getting fresh token...")
696
+ # global access_token
697
+ # access_token = None # Reset the token
698
+ # new_token = get_access_token()
699
+ # if new_token:
700
+ # headers['Authorization'] = f'Bearer {new_token}'
701
+ # # Close the previous file and reopen
702
+ # files['resume'][1].close()
703
+ # files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
704
+ # response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
705
+ # else:
706
+ # # If we can't get a fresh token, return error
707
+ # return {"error": "Authentication failed and could not obtain fresh token"}
708
+
709
+ # if response.status_code == 200:
710
+ # logger.info("Job fit analysis completed successfully")
711
+ # return response.json()
712
+ # elif response.status_code == 401:
713
+ # # If we still get 401 after fresh token, return error
714
+ # return {"error": "Authentication failed even with fresh token"}
715
+ # else:
716
+ # logger.error(f"API call failed with status {response.status_code}")
717
+ # return {"error": f"API call failed with status {response.status_code}", "details": response.text}
718
+
719
+ # except requests.exceptions.Timeout:
720
+ # logger.error("API request timed out")
721
+ # return {"error": "API request timed out"}
722
+ # except Exception as e:
723
+ # logger.error(f"Exception occurred: {str(e)}")
724
+ # return {"error": f"Exception occurred: {str(e)}"}
725
+ # finally:
726
+ # # Ensure the file is closed
727
+ # if 'resume' in files:
728
+ # try:
729
+ # files['resume'][1].close()
730
+ # except:
731
+ # pass
732
+
733
+ # @app.post("/process_resume_and_recommend_jobs")
734
+ # async def process_resume_and_recommend_jobs(
735
+ # resume: UploadFile = File(...),
736
+ # resume_text: str = Form(""),
737
+ # api_key: str = Depends(verify_api_key)
738
+ # ):
739
+ # """
740
+ # Process resume, extract information, filter jobs by industry, and analyze fit
741
+ # """
742
+ # request_start_time = time.time()
743
+
744
+ # try:
745
+ # logger.info(f"Processing resume: {resume.filename}")
746
+
747
+ # # Save uploaded file temporarily
748
+ # with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
749
+ # shutil.copyfileobj(resume.file, tmp_file)
750
+ # tmp_file_path = tmp_file.name
751
+
752
+ # try:
753
+ # # Extract text from PDF if no resume_text provided
754
+ # if not resume_text:
755
+ # resume_text = extract_text_from_pdf(tmp_file_path)
756
+ # if not resume_text:
757
+ # logger.error("Could not extract text from PDF file")
758
+ # return JSONResponse(
759
+ # status_code=400,
760
+ # content={"error": "Could not extract text from PDF file"}
761
+ # )
762
+
763
+ # # Extract resume information using LLM
764
+ # resume_info = extract_resume_info(resume_text)
765
+
766
+ # # Load jobs data from PostgreSQL database
767
+ # try:
768
+ # jobs_df = pd.read_sql_table("jobs", con=engine)
769
+ # candidates_df = pd.read_sql_table("candidates", con=engine)
770
+ # submissions_df = pd.read_sql_table("candidate_submissions", con=engine)
771
+ # logger.info(f"Loaded {len(jobs_df)} jobs, {len(candidates_df)} candidates, {len(submissions_df)} submissions")
772
+ # except Exception as db_error:
773
+ # logger.error(f"Database error: {db_error}")
774
+ # return JSONResponse(
775
+ # status_code=500,
776
+ # content={"error": "Database connection error"}
777
+ # )
778
+
779
+ # # Filter jobs by industry
780
+ # filtered_jobs = filter_jobs_by_industry(jobs_df, resume_info['industry'])
781
+
782
+ # if filtered_jobs.empty:
783
+ # logger.warning(f"No jobs found for industry: {resume_info['industry']}")
784
+ # return JSONResponse(
785
+ # status_code=404,
786
+ # content={"message": f"No jobs found for industry: {resume_info['industry']}"}
787
+ # )
788
+
789
+ # # Filter jobs by location
790
+ # location_filtered_jobs = filter_jobs_by_location(filtered_jobs, resume_info['location'])
791
+
792
+ # # Filter jobs by experience level
793
+ # experience_filtered_jobs = filter_jobs_by_experience(location_filtered_jobs, resume_info['yoe'])
794
+
795
+ # # Filter jobs by priority
796
+ # priority_filtered_jobs = filter_jobs_by_priority(experience_filtered_jobs)
797
+
798
+ # # Use priority filtered jobs if available, otherwise fall back to experience filtered jobs, then location filtered jobs
799
+ # if not priority_filtered_jobs.empty:
800
+ # jobs_to_analyze = priority_filtered_jobs
801
+ # elif not experience_filtered_jobs.empty:
802
+ # jobs_to_analyze = experience_filtered_jobs
803
+ # else:
804
+ # jobs_to_analyze = location_filtered_jobs
805
+
806
+ # # Create filtered_submission_df with job_ids from jobs_to_analyze
807
+ # job_ids_to_analyze = jobs_to_analyze['id'].tolist()
808
+ # filtered_submission_df = submissions_df[submissions_df['jobId'].isin(job_ids_to_analyze)]
809
+
810
+ # # Check if candidate email exists in candidates_df
811
+ # candidate_id = None
812
+ # if resume_info.get('email'):
813
+ # candidate_match = candidates_df[candidates_df['email'] == resume_info['email']]
814
+ # if not candidate_match.empty:
815
+ # candidate_id = candidate_match.iloc[0]['id']
816
+ # logger.info(f"Found existing candidate with ID: {candidate_id}")
817
+
818
+ # # Analyze job fit for each filtered job
819
+ # job_analyses = []
820
+
821
+ # for _, job_row in jobs_to_analyze.head(20).iterrows(): # Analyze top 20 jobs
822
+ # job_id = job_row.get('id')
823
+
824
+ # # Check if we have an existing submission for this candidate and job
825
+ # existing_submission = None
826
+ # if candidate_id and job_id:
827
+ # submission_match = filtered_submission_df[
828
+ # (filtered_submission_df['candidate_id'] == candidate_id) &
829
+ # (filtered_submission_df['jobId'] == job_id)
830
+ # ]
831
+ # if not submission_match.empty:
832
+ # existing_submission = submission_match.iloc[0]
833
+ # logger.info(f"Found existing submission for job_id: {job_id}, candidate_id: {candidate_id}")
834
+
835
+ # if existing_submission is not None:
836
+ # # Use existing fit score from submission
837
+ # fit_score = existing_submission.get('fit_score', 0)
838
+ # existing_analysis = {
839
+ # 'final_response': {
840
+ # 'final_score': fit_score,
841
+ # 'summary': {
842
+ # 'strengths': [],
843
+ # 'weaknesses': [],
844
+ # 'opportunities': [],
845
+ # 'recommendations': []
846
+ # }
847
+ # },
848
+ # 'source': 'existing_submission'
849
+ # }
850
+ # analysis_result = existing_analysis
851
+ # else:
852
+ # # Call API for new analysis with retry logic
853
+ # job_description = create_job_description(job_row)
854
+ # analysis_result = await analyze_job_fit_with_retry(job_description, tmp_file_path)
855
+ # analysis_result['source'] = 'api_call'
856
+
857
+ # # Clean up the analysis result
858
+ # cleaned_analysis = clean_analysis_result(analysis_result)
859
+
860
+ # job_analysis = JobAnalysis(
861
+ # job_title=job_row.get('job_title', 'Unknown'),
862
+ # company_name=job_row.get('company_name', 'Unknown'),
863
+ # analysis=cleaned_analysis
864
+ # )
865
+ # job_analyses.append(job_analysis.dict())
866
+
867
+ # # Sort jobs by final_score in descending order (highest scores first)
868
+ # job_analyses = sort_jobs_by_score(job_analyses)
869
+
870
+ # # Count existing submissions vs API calls
871
+ # existing_submissions_count = sum(1 for analysis in job_analyses if analysis.get('analysis', {}).get('source') == 'existing_submission')
872
+ # api_calls_count = sum(1 for analysis in job_analyses if analysis.get('analysis', {}).get('source') == 'api_call')
873
+
874
+ # # Clean up temporary file
875
+ # os.unlink(tmp_file_path)
876
+
877
+ # # Calculate processing time
878
+ # processing_time = time.time() - request_start_time
879
+ # logger.info(f"Request completed in {processing_time:.2f} seconds")
880
+
881
+ # return {
882
+ # "resume_info": resume_info,
883
+ # "industry": resume_info['industry'],
884
+ # "location": resume_info['location'],
885
+ # "experience_years": resume_info['yoe'],
886
+ # "jobs_analyzed": len(job_analyses),
887
+ # "location_filtered": not location_filtered_jobs.empty,
888
+ # "experience_filtered": not experience_filtered_jobs.empty,
889
+ # "priority_filtered": not priority_filtered_jobs.empty,
890
+ # "existing_submissions_used": existing_submissions_count,
891
+ # "api_calls_made": api_calls_count,
892
+ # "candidate_found": candidate_id is not None,
893
+ # "processing_time_seconds": round(processing_time, 2),
894
+ # "job_analyses": job_analyses
895
+ # }
896
+
897
+ # except Exception as e:
898
+ # # Clean up temporary file in case of error
899
+ # if os.path.exists(tmp_file_path):
900
+ # os.unlink(tmp_file_path)
901
+ # raise e
902
+
903
+ # except Exception as e:
904
+ # logger.error(f"Processing failed: {str(e)}", exc_info=True)
905
+ # return JSONResponse(
906
+ # status_code=500,
907
+ # content={"error": f"Processing failed: {str(e)}"}
908
+ # )
909
+
910
+ # @app.get("/health")
911
+ # async def health_check(api_key: str = Depends(verify_api_key)):
912
+ # """
913
+ # Health check endpoint with database connectivity check
914
+ # """
915
+ # health_status = {
916
+ # "status": "healthy",
917
+ # "message": "Job Recommendation API is running",
918
+ # "timestamp": time.time()
919
+ # }
920
+
921
+ # # Check database connectivity
922
+ # try:
923
+ # with engine.connect() as conn:
924
+ # result = conn.execute(text("SELECT 1"))
925
+ # health_status["database"] = "connected"
926
+ # except Exception as e:
927
+ # logger.error(f"Database health check failed: {e}")
928
+ # health_status["database"] = "disconnected"
929
+ # health_status["status"] = "degraded"
930
+
931
+ # return health_status
932
+
933
+ # @app.get("/")
934
+ # async def root():
935
+ # """
936
+ # Root endpoint
937
+ # """
938
+ # return {
939
+ # "message": "Job Recommendation API",
940
+ # "version": "1.0.0",
941
+ # "docs": "/docs",
942
+ # "health": "/health"
943
+ # }
944
+
945
+ # if __name__ == "__main__":
946
+ # import uvicorn
947
+ # port = int(os.getenv("PORT", 8080))
948
+ # logger.info(f"Starting server on port {port}")
949
+ # uvicorn.run(app, host="0.0.0.0", port=port)
950
+
951
+
952
+
953
+
954
+
955
  import pandas as pd
956
  import requests
957
  from pydantic import BaseModel, Field
 
976
  from contextlib import asynccontextmanager
977
  import logging
978
  from sqlalchemy.pool import NullPool
979
+ from cloud_config import *
980
+ import uuid
981
 
982
  # Load environment variables
983
  load_dotenv()
984
 
985
  # Configure logging for Cloud Run
986
  logging.basicConfig(
987
+ level=getattr(logging, LOG_LEVEL),
988
+ format=LOG_FORMAT
989
  )
990
  logger = logging.getLogger(__name__)
991
 
 
1112
  }
1113
 
1114
  # Add timeout to prevent hanging
1115
+ login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=LOGIN_TIMEOUT)
1116
 
1117
  if login_response.status_code == 200:
1118
  login_result = login_response.json()
 
1136
  logger.error(f"Unexpected error getting access token: {e}")
1137
  return None
1138
 
1139
+ def generate_smart_hiring_collateral(job_description_text: str) -> tuple[str, str]:
1140
+ """
1141
+ Generate collateral using the smart-hiring/generate endpoint
1142
+ Returns a tuple of (collateral, job_id)
1143
+ """
1144
+ try:
1145
+ url = "https://fitscore-agent-535960463668.us-central1.run.app/smart-hiring/generate"
1146
+
1147
+ # Generate a unique job ID using UUID
1148
+ job_id = str(uuid.uuid4())
1149
+
1150
+ # Prepare headers with authentication
1151
+ headers = {
1152
+ 'accept': 'application/json',
1153
+ 'Authorization': f'Bearer {get_access_token()}'
1154
+ }
1155
+
1156
+ # Prepare payload
1157
+ payload = {
1158
+ 'job_id': job_id,
1159
+ 'job_description_text': job_description_text
1160
+ }
1161
+
1162
+ # Make the API request
1163
+ response = requests.post(url, headers=headers, data=payload, timeout=EXTERNAL_API_TIMEOUT)
1164
+
1165
+ if response.status_code == 200:
1166
+ logger.info("Smart hiring collateral generated successfully")
1167
+ # Parse the response to extract smart_hiring_criteria
1168
+ try:
1169
+ response_data = response.json()
1170
+ if response_data.get('success') and 'data' in response_data:
1171
+ smart_hiring_criteria = response_data['data'].get('smart_hiring_criteria', '')
1172
+ if smart_hiring_criteria:
1173
+ logger.info("Successfully extracted smart hiring criteria")
1174
+ return smart_hiring_criteria, job_id
1175
+ else:
1176
+ logger.warning("No smart_hiring_criteria found in response")
1177
+ return "", job_id
1178
+ else:
1179
+ logger.warning("Invalid response format from smart hiring API")
1180
+ return "", job_id
1181
+ except json.JSONDecodeError as e:
1182
+ logger.error(f"Failed to parse smart hiring response as JSON: {e}")
1183
+ return "", job_id
1184
+ elif response.status_code == 401:
1185
+ logger.warning("Authentication failed for smart hiring, getting fresh token...")
1186
+ global access_token
1187
+ access_token = None # Reset the token
1188
+ new_token = get_access_token()
1189
+ if new_token:
1190
+ headers['Authorization'] = f'Bearer {new_token}'
1191
+ response = requests.post(url, headers=headers, data=payload, timeout=EXTERNAL_API_TIMEOUT)
1192
+ if response.status_code == 200:
1193
+ logger.info("Smart hiring collateral generated successfully with fresh token")
1194
+ # Parse the response to extract smart_hiring_criteria
1195
+ try:
1196
+ response_data = response.json()
1197
+ if response_data.get('success') and 'data' in response_data:
1198
+ smart_hiring_criteria = response_data['data'].get('smart_hiring_criteria', '')
1199
+ if smart_hiring_criteria:
1200
+ logger.info("Successfully extracted smart hiring criteria with fresh token")
1201
+ return smart_hiring_criteria, job_id
1202
+ else:
1203
+ logger.warning("No smart_hiring_criteria found in response with fresh token")
1204
+ return "", job_id
1205
+ else:
1206
+ logger.warning("Invalid response format from smart hiring API with fresh token")
1207
+ return "", job_id
1208
+ except json.JSONDecodeError as e:
1209
+ logger.error(f"Failed to parse smart hiring response as JSON with fresh token: {e}")
1210
+ return "", job_id
1211
+ else:
1212
+ logger.error(f"Smart hiring API call failed with status {response.status_code}")
1213
+ return "", job_id
1214
+ else:
1215
+ logger.error("Could not obtain fresh token for smart hiring")
1216
+ return "", job_id
1217
+ else:
1218
+ logger.error(f"Smart hiring API call failed with status {response.status_code}: {response.text}")
1219
+ return "", job_id
1220
+
1221
+ except requests.exceptions.Timeout:
1222
+ logger.error(f"Smart hiring API request timed out after {EXTERNAL_API_TIMEOUT} seconds")
1223
+ return "", ""
1224
+ except Exception as e:
1225
+ logger.error(f"Exception occurred in smart hiring generation: {str(e)}")
1226
+ return "", ""
1227
+
1228
  class structure(BaseModel):
1229
  name: str = Field(description="Name of the candidate")
1230
  location: str = Field(description="The location of the candidate. Extract city and state if possible.")
 
1602
 
1603
  if pd.notna(job_row.get('company_culture')):
1604
  description_parts.append(f"Company Culture: {job_row['company_culture']}")
1605
+
1606
+ if pd.notna(job_row.get('description')):
1607
+ description_parts.append(f"Description: {job_row['description']}")
1608
 
1609
  if pd.notna(job_row.get('requirements')):
1610
  description_parts.append(f"Requirements: {job_row['requirements']}")
 
1617
 
1618
  return "\n\n".join(description_parts)
1619
 
1620
+ def create_jd_smart_hiring(job_row: pd.Series) -> str:
1621
+ """
1622
+ Create a smart hiring job description from job data
1623
+ """
1624
+ description_parts = []
1625
+ if pd.notna(job_row.get('description')):
1626
+ description_parts.append(f"Description: {job_row['description']}")
1627
+ if pd.notna(job_row.get('requirements')):
1628
+ description_parts.append(f"Requirements: {job_row['requirements']}")
1629
+
1630
+ return "\n\n".join(description_parts)
1631
+
1632
+
1633
+
1634
  def clean_analysis_result(analysis_result: dict) -> dict:
1635
  """
1636
  Clean up the analysis result to only include final_score and summary
 
1695
 
1696
  return sorted(job_analyses, key=extract_score, reverse=True)
1697
 
1698
+ async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str, job_row: pd.Series = None, max_retries: int = 3) -> dict:
1699
  """
1700
  Analyze job-candidate fit with retry logic for resilience
1701
  """
1702
  for attempt in range(max_retries):
1703
  try:
1704
+ result = analyze_job_fit(job_description, resume_file_path, job_row)
1705
  if "error" not in result:
1706
  return result
1707
 
 
1713
  await asyncio.sleep(2 ** attempt) # Exponential backoff
1714
  continue
1715
 
1716
+ # If timeout error and not last attempt, retry with longer timeout
1717
+ if "timed out" in result.get("error", "").lower() and attempt < max_retries - 1:
1718
+ logger.warning(f"Request timed out, retrying with longer timeout... (attempt {attempt + 1}/{max_retries})")
1719
+ await asyncio.sleep(2 ** attempt) # Exponential backoff
1720
+ continue
1721
+
1722
  return result
1723
  except Exception as e:
1724
  logger.error(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
 
1726
  return {"error": f"Failed after {max_retries} attempts: {str(e)}"}
1727
  await asyncio.sleep(2 ** attempt)
1728
 
1729
+ def analyze_job_fit(job_description: str, resume_file_path: str, job_row: pd.Series = None) -> dict:
1730
  """
1731
  Analyze job-candidate fit using the external API
1732
  """
 
1754
  'jd_text': job_description
1755
  }
1756
 
1757
+ # Generate collateral if job_row is provided
1758
+ if job_row is not None:
1759
+ try:
1760
+ job_description_text = create_jd_smart_hiring(job_row)
1761
+ if job_description_text:
1762
+ collateral, job_id = generate_smart_hiring_collateral(job_description_text)
1763
+ if collateral:
1764
+ data['collateral'] = collateral
1765
+ data['job_id'] = job_id
1766
+ logger.info(f"Added collateral and job_id ({job_id}) to job fit analysis request")
1767
+ elif job_id:
1768
+ # Even if collateral is empty, we can still use the job_id
1769
+ data['job_id'] = job_id
1770
+ logger.info(f"Added job_id ({job_id}) to job fit analysis request (no collateral)")
1771
+ except Exception as e:
1772
+ logger.warning(f"Failed to generate collateral: {e}")
1773
+ # Continue without collateral if generation fails
1774
+
1775
  try:
1776
+ # Make the API request with configured timeout
1777
+ response = requests.post(url, headers=headers, files=files, data=data, timeout=EXTERNAL_API_TIMEOUT)
1778
 
1779
  # If we get an authentication error, try to get a fresh token and retry once
1780
  if response.status_code == 401:
 
1787
  # Close the previous file and reopen
1788
  files['resume'][1].close()
1789
  files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
1790
+ response = requests.post(url, headers=headers, files=files, data=data, timeout=EXTERNAL_API_TIMEOUT)
1791
  else:
1792
  # If we can't get a fresh token, return error
1793
  return {"error": "Authentication failed and could not obtain fresh token"}
 
1803
  return {"error": f"API call failed with status {response.status_code}", "details": response.text}
1804
 
1805
  except requests.exceptions.Timeout:
1806
+ logger.error(f"API request timed out after {EXTERNAL_API_TIMEOUT} seconds")
1807
+ return {"error": f"API request timed out after {EXTERNAL_API_TIMEOUT} seconds"}
1808
  except Exception as e:
1809
  logger.error(f"Exception occurred: {str(e)}")
1810
  return {"error": f"Exception occurred: {str(e)}"}
 
1904
  # Analyze job fit for each filtered job
1905
  job_analyses = []
1906
 
1907
+ # Use configured number of jobs to analyze
1908
+ for _, job_row in jobs_to_analyze.head(MAX_JOBS_TO_ANALYZE).iterrows():
1909
  job_id = job_row.get('id')
1910
 
1911
  # Check if we have an existing submission for this candidate and job
 
1938
  else:
1939
  # Call API for new analysis with retry logic
1940
  job_description = create_job_description(job_row)
1941
+ analysis_result = await analyze_job_fit_with_retry(job_description, tmp_file_path, job_row)
1942
  analysis_result['source'] = 'api_call'
1943
 
1944
  # Clean up the analysis result
 
2033
  import uvicorn
2034
  port = int(os.getenv("PORT", 8080))
2035
  logger.info(f"Starting server on port {port}")
2036
+ uvicorn.run(app, host="0.0.0.0", port=port)