Spaces:

Agents-MCP-Hackathon
/

job-hunting-ai

Running

App Files Files Community

job-hunting-ai / agents /job_lookup_agent.py

mananshah296

Update agents/job_lookup_agent.py

d753599 verified 3 months ago

raw

history blame contribute delete

16.7 kB

	import os
	import sys
	import json
	from typing import Any, Dict, Optional, List
	import re

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

	from langchain.agents import initialize_agent
	from langchain.agents.types import AgentType
	from langchain_core.tools import Tool
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import PromptTemplate
	from langchain.agents.mrkl.output_parser import MRKLOutputParser
	from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
	from dotenv import load_dotenv
	from langchain_community.cache import InMemoryCache
	from langchain.globals import set_llm_cache

	from agent_api.serpjob import scrape_job_profile

	set_llm_cache(InMemoryCache())
	load_dotenv()

	def extract_json_from_text(text: str) -> str:
	"""Extract JSON array from text by finding the first [ and last ]"""
	try:
	start = text.find('[')
	end = text.rfind(']') + 1
	if start != -1 and end != 0:
	return text[start:end]
	return "[]"
	except:
	return "[]"

	class CustomMRKLOutputParser(MRKLOutputParser):
	"""Custom output parser that handles JSON responses better"""

	def parse(self, text: str) -> Any:
	try:
	return super().parse(text)
	except Exception:
	cleaned_text = text.strip()

	if cleaned_text.startswith('[') and cleaned_text.endswith(']'):
	try:
	json.loads(cleaned_text)
	from langchain.schema import AgentFinish
	return AgentFinish(
	return_values={"output": cleaned_text},
	log=text
	)
	except json.JSONDecodeError:
	pass

	json_part = extract_json_from_text(cleaned_text)
	if json_part and json_part != "[]":
	try:
	json.loads(json_part)
	from langchain.schema import AgentFinish
	return AgentFinish(
	return_values={"output": json_part},
	log=text
	)
	except json.JSONDecodeError:
	pass

	return super().parse(text)

	def lookup(
	query: str,
	location: str = "Canada",
	remote_only: bool = False,
	serp_api_key: str = None
	) -> str:
	"""
	Enhanced direct lookup with API key parameter
	"""
	try:
	# Clean the query
	query = query.strip()
	if "in" in query and location.lower() in query.lower():
	query = query.replace(f"in {location}", "").replace(f"In {location}", "").strip()

	print(f"🔍 Direct Lookup: Searching for '{query}' in {location} (Remote only: {remote_only})")

	# Use the provided API key for the search
	result = scrape_job_profile(query, location, serp_api_key)

	# Validate result
	if not result:
	print("No results from scrape_job_profile")
	return "[]"

	try:
	jobs_data = json.loads(result)
	if not isinstance(jobs_data, list):
	print("Result is not a list format")
	return "[]"

	print(f"Found {len(jobs_data)} jobs")
	return json.dumps(jobs_data)

	except json.JSONDecodeError as e:
	print(f"JSON decode error in lookup: {e}")
	return "[]"

	except Exception as e:
	print(f"Error in lookup function: {str(e)}")
	import traceback
	traceback.print_exc()
	return "[]"

	def lookup_with_llm(
	query: str,
	location: str = "Canada",
	remote: bool = False,
	level: str = "Senior",
	serp_api_key: str = None,
	nebius_api_key: str = None
	) -> str:
	"""
	Enhanced LLM lookup function with API key parameters
	"""
	try:
	if not nebius_api_key:
	print("Nebius API key is required for LLM search")
	return "[]"

	llm = ChatOpenAI(
	temperature=0.1,
	model_name="meta-llama/Meta-Llama-3.1-405B-Instruct",
	api_key=nebius_api_key,
	base_url="https://api.studio.nebius.com/v1/",
	max_retries=1,
	)

	# Clean the query
	query = query.strip()
	if "in" in query and location.lower() in query.lower():
	query = query.replace(f"in {location}", "").replace(f"In {location}", "").strip()

	print(f"🤖 LLM Agent: Searching for '{query}' \| Location: '{location}' \| Remote: {remote} \| Level: {level}")

	# Create tool that uses provided SerpAPI key
	def job_search_tool(q: str) -> str:
	return lookup(q, location, remote, serp_api_key)

	tools_for_agent = [
	Tool(
	name="JobSearch",
	func=job_search_tool,
	description=f"Searches for {level} level {query} jobs. {'ONLY returns remote work opportunities.' if remote else f'Returns jobs in {location} plus remote opportunities.'}"
	)
	]

	# Enhanced prompt with clearer filtering instructions
	remote_instruction = (
	"MUST return ONLY remote work opportunities, work-from-home positions, and distributed team roles. NO on-site positions."
	if remote else
	f"Return jobs in {location} area that allow working from {location}. Include both on-site and hybrid positions."
	)

	template = """You are an expert job search assistant. Use the JobSearch tool to find jobs matching the exact criteria specified.

	SEARCH CRITERIA:
	- Position: {level} {input}
	- Location Preference: {location}
	- Remote Only: {remote_required}
	- Filtering Rule: {remote_instruction}

	IMPORTANT FILTERING RULES:
	1. The JobSearch tool will automatically apply location and remote filtering
	2. Remote jobs can be worked from anywhere, so they should be included unless location is very specific
	3. On-site jobs should only be included if they match the target location
	4. Trust the tool's filtering - it has been enhanced to handle these cases properly

	INSTRUCTIONS:
	1. Use the JobSearch tool with the query: "{input}"
	2. The tool automatically applies the filtering based on the specified criteria
	3. Return the complete JSON array from the tool without any modifications

	FORMAT:
	Thought: I need to search for jobs with the specified criteria and filtering.
	Action: JobSearch
	Action Input: {input}
	Observation: [tool results will be properly filtered]
	Thought: The tool has returned filtered results. I'll return them exactly as provided.
	Final Answer: [return the exact JSON array from the tool]

	CRITICAL: Your Final Answer must be ONLY the JSON array starting with [ and ending with ]. No explanations or additional text.

	{format_instructions}"""

	prompt = PromptTemplate(
	template=template,
	input_variables=["input", "level", "location", "remote_required", "remote_instruction"],
	partial_variables={"format_instructions": FORMAT_INSTRUCTIONS}
	)

	# Initialize agent
	agent = initialize_agent(
	tools=tools_for_agent,
	llm=llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True,
	handle_parsing_errors=True,
	max_iterations=3,
	early_stopping_method="generate",
	agent_kwargs={
	"output_parser": CustomMRKLOutputParser(),
	"format_instructions": FORMAT_INSTRUCTIONS
	}
	)

	# Build search query
	search_query = f"{level} {query}"

	print(f"🤖 LLM Agent: Executing search with query: '{search_query}'")

	# Execute agent
	result = agent.invoke({
	"input": prompt.format(
	input=search_query,
	level=level,
	location=location,
	remote_required="YES" if remote else "NO",
	remote_instruction=remote_instruction
	)
	})

	# Process result
	output = result.get("output", "")
	print(f"🤖 LLM Agent: Raw output type: {type(output)}")

	if isinstance(output, str):
	cleaned_output = output.strip()

	# Remove common prefixes
	prefixes_to_remove = ["Final Answer:", "Answer:", "Result:"]
	for prefix in prefixes_to_remove:
	if cleaned_output.startswith(prefix):
	cleaned_output = cleaned_output[len(prefix):].strip()

	# Extract JSON
	json_result = extract_json_from_text(cleaned_output)

	try:
	jobs_data = json.loads(json_result)
	if isinstance(jobs_data, list):
	print(f"🤖 LLM Agent: Successfully returned {len(jobs_data)} filtered jobs")
	return json_result
	else:
	print("🤖 LLM Agent: Result is not a list")
	return "[]"
	except json.JSONDecodeError as e:
	print(f"🤖 LLM Agent: JSON decode error: {e}")
	return "[]"
	else:
	print(f"🤖 LLM Agent: Unexpected output type: {type(output)}")
	return "[]"

	except Exception as e:
	print(f"🤖 Error during LLM job search: {e}")
	import traceback
	traceback.print_exc()

	# FALLBACK: Try the direct lookup method
	print("🔄 Falling back to direct lookup method...")
	try:
	return lookup(query, location, remote, serp_api_key)
	except Exception as fallback_error:
	print(f"🤖 Fallback also failed: {fallback_error}")
	return "[]"

	def advanced_job_search(
	query: str,
	location: str = "Canada",
	remote: bool = False,
	level: str = "Senior",
	use_llm: bool = True,
	salary_min: Optional[int] = None,
	job_type: Optional[str] = None,
	company_size: Optional[str] = None,
	serp_api_key: str = None,
	nebius_api_key: str = None
	) -> Dict[str, Any]:
	"""
	Advanced job search function with API key parameters
	"""
	try:
	print(f"🚀 Advanced Job Search Started")
	print(f"Query: '{query}' \| Location: '{location}' \| Level: {level} \| Remote: {remote}")
	print(f"Salary Min: {salary_min} \| Job Type: {job_type} \| Company Size: {company_size}")

	# Validate required API keys
	if not serp_api_key:
	return {
	"success": False,
	"error": "SerpAPI key is required",
	"total_found": 0,
	"jobs": [],
	"raw_results": "[]"
	}

	if use_llm and not nebius_api_key:
	return {
	"success": False,
	"error": "Nebius API key is required for advanced search",
	"total_found": 0,
	"jobs": [],
	"raw_results": "[]"
	}

	# Choose search method
	if use_llm:
	raw_results = lookup_with_llm(
	query=query,
	location=location,
	remote=remote,
	level=level,
	serp_api_key=serp_api_key,
	nebius_api_key=nebius_api_key
	)
	else:
	raw_results = lookup(
	query=query,
	location=location,
	remote_only=remote,
	serp_api_key=serp_api_key
	)

	# Parse results
	try:
	jobs_data = json.loads(raw_results)
	except json.JSONDecodeError:
	jobs_data = []

	print(f"📊 Initial results: {len(jobs_data)} jobs")

	# Apply additional filters
	filtered_jobs = []
	for job in jobs_data:
	if not isinstance(job, dict):
	continue

	# Salary filter
	if salary_min:
	job_salary = job.get('salary', '')
	if job_salary and isinstance(job_salary, str) and job_salary.lower() != 'n/a':
	salary_numbers = re.findall(r'\d+', job_salary.replace(',', ''))
	if salary_numbers:
	max_salary = max([int(x) for x in salary_numbers if len(x) >= 4])
	if max_salary < salary_min:
	print(f" 💰 Filtered out: {job.get('title', 'N/A')} (salary: {max_salary} < {salary_min})")
	continue
	else:
	print(f" 💰 Included: {job.get('title', 'N/A')} (salary: {max_salary} >= {salary_min})")

	# Job type filter
	if job_type and job_type.lower() != 'all':
	job_title = job.get('title', '').lower()
	if job_type.lower() not in job_title:
	print(f" 🏷️ Filtered out: {job.get('title', 'N/A')} (type mismatch)")
	continue
	else:
	print(f" 🏷️ Included: {job.get('title', 'N/A')} (type match)")

	filtered_jobs.append(job)

	# Prepare response
	response = {
	"success": True,
	"total_found": len(filtered_jobs),
	"search_parameters": {
	"query": query,
	"location": location,
	"remote": remote,
	"level": level,
	"salary_min": salary_min,
	"job_type": job_type,
	"company_size": company_size,
	"method": "LLM Agent" if use_llm else "Direct Search"
	},
	"jobs": filtered_jobs,
	"raw_results": json.dumps(filtered_jobs),
	"filtering_applied": {
	"location_filter": True,
	"remote_filter": remote,
	"salary_filter": salary_min is not None,
	"job_type_filter": job_type is not None and job_type.lower() != 'all',
	"duplicate_removal": True
	}
	}

	print(f"🎯 Advanced Search Complete: Found {len(filtered_jobs)} matching jobs after all filters")
	return response

	except Exception as e:
	print(f"❌ Advanced job search failed: {e}")
	import traceback
	traceback.print_exc()

	return {
	"success": False,
	"error": str(e),
	"total_found": 0,
	"jobs": [],
	"raw_results": "[]",
	"filtering_applied": {}
	}

	# Convenience functions with API key parameters
	def search_jobs(
	query: str,
	location: str = "Canada",
	remote: bool = False,
	level: str = "Senior",
	serp_api_key: str = None,
	nebius_api_key: str = None
	) -> str:
	"""
	Main job search function with API key parameters
	"""
	print(f"🔍 Main Search: '{query}' \| Location: '{location}' \| Remote: {remote} \| Level: {level}")

	if not location or location.strip() == "":
	location = "Canada"

	if not serp_api_key:
	return "[]"

	# Use LLM agent if Nebius key is provided
	if nebius_api_key:
	return lookup_with_llm(
	query=query,
	location=location,
	remote=remote,
	level=level,
	serp_api_key=serp_api_key,
	nebius_api_key=nebius_api_key
	)
	else:
	return lookup(
	query=query,
	location=location,
	remote_only=remote,
	serp_api_key=serp_api_key
	)

	# Helper functions with API key parameters
	def search_remote_jobs(
	query: str,
	level: str = "Senior",
	location: str = "Canada",
	serp_api_key: str = None,
	nebius_api_key: str = None
	) -> str:
	"""Quick search for remote jobs ONLY"""
	return lookup_with_llm(
	query=query,
	location=location,
	remote=True,
	level=level,
	serp_api_key=serp_api_key,
	nebius_api_key=nebius_api_key
	)

	def search_entry_level_jobs(
	query: str,
	location: str = "Canada",
	remote: bool = False,
	serp_api_key: str = None,
	nebius_api_key: str = None
	) -> str:
	"""Quick search for entry-level positions"""
	return lookup_with_llm(
	query=query,
	location=location,
	remote=remote,
	level="Junior",
	serp_api_key=serp_api_key,
	nebius_api_key=nebius_api_key
	)