|
import os |
|
import sys |
|
import json |
|
from typing import Any, Dict, Optional, List |
|
import re |
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
|
from langchain.agents import initialize_agent |
|
from langchain.agents.types import AgentType |
|
from langchain_core.tools import Tool |
|
from langchain_openai import ChatOpenAI |
|
from langchain_core.prompts import PromptTemplate |
|
from langchain.agents.mrkl.output_parser import MRKLOutputParser |
|
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS |
|
from dotenv import load_dotenv |
|
from langchain_community.cache import InMemoryCache |
|
from langchain.globals import set_llm_cache |
|
|
|
from agent_api.serpjob import scrape_job_profile |
|
|
|
set_llm_cache(InMemoryCache()) |
|
load_dotenv() |
|
|
|
def extract_json_from_text(text: str) -> str: |
|
"""Extract JSON array from text by finding the first [ and last ]""" |
|
try: |
|
start = text.find('[') |
|
end = text.rfind(']') + 1 |
|
if start != -1 and end != 0: |
|
return text[start:end] |
|
return "[]" |
|
except: |
|
return "[]" |
|
|
|
class CustomMRKLOutputParser(MRKLOutputParser): |
|
"""Custom output parser that handles JSON responses better""" |
|
|
|
def parse(self, text: str) -> Any: |
|
try: |
|
return super().parse(text) |
|
except Exception: |
|
cleaned_text = text.strip() |
|
|
|
if cleaned_text.startswith('[') and cleaned_text.endswith(']'): |
|
try: |
|
json.loads(cleaned_text) |
|
from langchain.schema import AgentFinish |
|
return AgentFinish( |
|
return_values={"output": cleaned_text}, |
|
log=text |
|
) |
|
except json.JSONDecodeError: |
|
pass |
|
|
|
json_part = extract_json_from_text(cleaned_text) |
|
if json_part and json_part != "[]": |
|
try: |
|
json.loads(json_part) |
|
from langchain.schema import AgentFinish |
|
return AgentFinish( |
|
return_values={"output": json_part}, |
|
log=text |
|
) |
|
except json.JSONDecodeError: |
|
pass |
|
|
|
return super().parse(text) |
|
|
|
def lookup( |
|
query: str, |
|
location: str = "Canada", |
|
remote_only: bool = False, |
|
serp_api_key: str = None |
|
) -> str: |
|
""" |
|
Enhanced direct lookup with API key parameter |
|
""" |
|
try: |
|
|
|
query = query.strip() |
|
if "in" in query and location.lower() in query.lower(): |
|
query = query.replace(f"in {location}", "").replace(f"In {location}", "").strip() |
|
|
|
print(f"🔍 Direct Lookup: Searching for '{query}' in {location} (Remote only: {remote_only})") |
|
|
|
|
|
result = scrape_job_profile(query, location, serp_api_key) |
|
|
|
|
|
if not result: |
|
print("No results from scrape_job_profile") |
|
return "[]" |
|
|
|
try: |
|
jobs_data = json.loads(result) |
|
if not isinstance(jobs_data, list): |
|
print("Result is not a list format") |
|
return "[]" |
|
|
|
print(f"Found {len(jobs_data)} jobs") |
|
return json.dumps(jobs_data) |
|
|
|
except json.JSONDecodeError as e: |
|
print(f"JSON decode error in lookup: {e}") |
|
return "[]" |
|
|
|
except Exception as e: |
|
print(f"Error in lookup function: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
return "[]" |
|
|
|
def lookup_with_llm( |
|
query: str, |
|
location: str = "Canada", |
|
remote: bool = False, |
|
level: str = "Senior", |
|
serp_api_key: str = None, |
|
nebius_api_key: str = None |
|
) -> str: |
|
""" |
|
Enhanced LLM lookup function with API key parameters |
|
""" |
|
try: |
|
if not nebius_api_key: |
|
print("Nebius API key is required for LLM search") |
|
return "[]" |
|
|
|
llm = ChatOpenAI( |
|
temperature=0.1, |
|
model_name="meta-llama/Meta-Llama-3.1-405B-Instruct", |
|
api_key=nebius_api_key, |
|
base_url="https://api.studio.nebius.com/v1/", |
|
max_retries=1, |
|
) |
|
|
|
|
|
query = query.strip() |
|
if "in" in query and location.lower() in query.lower(): |
|
query = query.replace(f"in {location}", "").replace(f"In {location}", "").strip() |
|
|
|
print(f"🤖 LLM Agent: Searching for '{query}' | Location: '{location}' | Remote: {remote} | Level: {level}") |
|
|
|
|
|
def job_search_tool(q: str) -> str: |
|
return lookup(q, location, remote, serp_api_key) |
|
|
|
tools_for_agent = [ |
|
Tool( |
|
name="JobSearch", |
|
func=job_search_tool, |
|
description=f"Searches for {level} level {query} jobs. {'ONLY returns remote work opportunities.' if remote else f'Returns jobs in {location} plus remote opportunities.'}" |
|
) |
|
] |
|
|
|
|
|
remote_instruction = ( |
|
"MUST return ONLY remote work opportunities, work-from-home positions, and distributed team roles. NO on-site positions." |
|
if remote else |
|
f"Return jobs in {location} area that allow working from {location}. Include both on-site and hybrid positions." |
|
) |
|
|
|
template = """You are an expert job search assistant. Use the JobSearch tool to find jobs matching the exact criteria specified. |
|
|
|
SEARCH CRITERIA: |
|
- Position: {level} {input} |
|
- Location Preference: {location} |
|
- Remote Only: {remote_required} |
|
- Filtering Rule: {remote_instruction} |
|
|
|
IMPORTANT FILTERING RULES: |
|
1. The JobSearch tool will automatically apply location and remote filtering |
|
2. Remote jobs can be worked from anywhere, so they should be included unless location is very specific |
|
3. On-site jobs should only be included if they match the target location |
|
4. Trust the tool's filtering - it has been enhanced to handle these cases properly |
|
|
|
INSTRUCTIONS: |
|
1. Use the JobSearch tool with the query: "{input}" |
|
2. The tool automatically applies the filtering based on the specified criteria |
|
3. Return the complete JSON array from the tool without any modifications |
|
|
|
FORMAT: |
|
Thought: I need to search for jobs with the specified criteria and filtering. |
|
Action: JobSearch |
|
Action Input: {input} |
|
Observation: [tool results will be properly filtered] |
|
Thought: The tool has returned filtered results. I'll return them exactly as provided. |
|
Final Answer: [return the exact JSON array from the tool] |
|
|
|
CRITICAL: Your Final Answer must be ONLY the JSON array starting with [ and ending with ]. No explanations or additional text. |
|
|
|
{format_instructions}""" |
|
|
|
prompt = PromptTemplate( |
|
template=template, |
|
input_variables=["input", "level", "location", "remote_required", "remote_instruction"], |
|
partial_variables={"format_instructions": FORMAT_INSTRUCTIONS} |
|
) |
|
|
|
|
|
agent = initialize_agent( |
|
tools=tools_for_agent, |
|
llm=llm, |
|
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, |
|
verbose=True, |
|
handle_parsing_errors=True, |
|
max_iterations=3, |
|
early_stopping_method="generate", |
|
agent_kwargs={ |
|
"output_parser": CustomMRKLOutputParser(), |
|
"format_instructions": FORMAT_INSTRUCTIONS |
|
} |
|
) |
|
|
|
|
|
search_query = f"{level} {query}" |
|
|
|
print(f"🤖 LLM Agent: Executing search with query: '{search_query}'") |
|
|
|
|
|
result = agent.invoke({ |
|
"input": prompt.format( |
|
input=search_query, |
|
level=level, |
|
location=location, |
|
remote_required="YES" if remote else "NO", |
|
remote_instruction=remote_instruction |
|
) |
|
}) |
|
|
|
|
|
output = result.get("output", "") |
|
print(f"🤖 LLM Agent: Raw output type: {type(output)}") |
|
|
|
if isinstance(output, str): |
|
cleaned_output = output.strip() |
|
|
|
|
|
prefixes_to_remove = ["Final Answer:", "Answer:", "Result:"] |
|
for prefix in prefixes_to_remove: |
|
if cleaned_output.startswith(prefix): |
|
cleaned_output = cleaned_output[len(prefix):].strip() |
|
|
|
|
|
json_result = extract_json_from_text(cleaned_output) |
|
|
|
try: |
|
jobs_data = json.loads(json_result) |
|
if isinstance(jobs_data, list): |
|
print(f"🤖 LLM Agent: Successfully returned {len(jobs_data)} filtered jobs") |
|
return json_result |
|
else: |
|
print("🤖 LLM Agent: Result is not a list") |
|
return "[]" |
|
except json.JSONDecodeError as e: |
|
print(f"🤖 LLM Agent: JSON decode error: {e}") |
|
return "[]" |
|
else: |
|
print(f"🤖 LLM Agent: Unexpected output type: {type(output)}") |
|
return "[]" |
|
|
|
except Exception as e: |
|
print(f"🤖 Error during LLM job search: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
|
|
|
|
print("🔄 Falling back to direct lookup method...") |
|
try: |
|
return lookup(query, location, remote, serp_api_key) |
|
except Exception as fallback_error: |
|
print(f"🤖 Fallback also failed: {fallback_error}") |
|
return "[]" |
|
|
|
def advanced_job_search( |
|
query: str, |
|
location: str = "Canada", |
|
remote: bool = False, |
|
level: str = "Senior", |
|
use_llm: bool = True, |
|
salary_min: Optional[int] = None, |
|
job_type: Optional[str] = None, |
|
company_size: Optional[str] = None, |
|
serp_api_key: str = None, |
|
nebius_api_key: str = None |
|
) -> Dict[str, Any]: |
|
""" |
|
Advanced job search function with API key parameters |
|
""" |
|
try: |
|
print(f"🚀 Advanced Job Search Started") |
|
print(f"Query: '{query}' | Location: '{location}' | Level: {level} | Remote: {remote}") |
|
print(f"Salary Min: {salary_min} | Job Type: {job_type} | Company Size: {company_size}") |
|
|
|
|
|
if not serp_api_key: |
|
return { |
|
"success": False, |
|
"error": "SerpAPI key is required", |
|
"total_found": 0, |
|
"jobs": [], |
|
"raw_results": "[]" |
|
} |
|
|
|
if use_llm and not nebius_api_key: |
|
return { |
|
"success": False, |
|
"error": "Nebius API key is required for advanced search", |
|
"total_found": 0, |
|
"jobs": [], |
|
"raw_results": "[]" |
|
} |
|
|
|
|
|
if use_llm: |
|
raw_results = lookup_with_llm( |
|
query=query, |
|
location=location, |
|
remote=remote, |
|
level=level, |
|
serp_api_key=serp_api_key, |
|
nebius_api_key=nebius_api_key |
|
) |
|
else: |
|
raw_results = lookup( |
|
query=query, |
|
location=location, |
|
remote_only=remote, |
|
serp_api_key=serp_api_key |
|
) |
|
|
|
|
|
try: |
|
jobs_data = json.loads(raw_results) |
|
except json.JSONDecodeError: |
|
jobs_data = [] |
|
|
|
print(f"📊 Initial results: {len(jobs_data)} jobs") |
|
|
|
|
|
filtered_jobs = [] |
|
for job in jobs_data: |
|
if not isinstance(job, dict): |
|
continue |
|
|
|
|
|
if salary_min: |
|
job_salary = job.get('salary', '') |
|
if job_salary and isinstance(job_salary, str) and job_salary.lower() != 'n/a': |
|
salary_numbers = re.findall(r'\d+', job_salary.replace(',', '')) |
|
if salary_numbers: |
|
max_salary = max([int(x) for x in salary_numbers if len(x) >= 4]) |
|
if max_salary < salary_min: |
|
print(f" 💰 Filtered out: {job.get('title', 'N/A')} (salary: {max_salary} < {salary_min})") |
|
continue |
|
else: |
|
print(f" 💰 Included: {job.get('title', 'N/A')} (salary: {max_salary} >= {salary_min})") |
|
|
|
|
|
if job_type and job_type.lower() != 'all': |
|
job_title = job.get('title', '').lower() |
|
if job_type.lower() not in job_title: |
|
print(f" 🏷️ Filtered out: {job.get('title', 'N/A')} (type mismatch)") |
|
continue |
|
else: |
|
print(f" 🏷️ Included: {job.get('title', 'N/A')} (type match)") |
|
|
|
filtered_jobs.append(job) |
|
|
|
|
|
response = { |
|
"success": True, |
|
"total_found": len(filtered_jobs), |
|
"search_parameters": { |
|
"query": query, |
|
"location": location, |
|
"remote": remote, |
|
"level": level, |
|
"salary_min": salary_min, |
|
"job_type": job_type, |
|
"company_size": company_size, |
|
"method": "LLM Agent" if use_llm else "Direct Search" |
|
}, |
|
"jobs": filtered_jobs, |
|
"raw_results": json.dumps(filtered_jobs), |
|
"filtering_applied": { |
|
"location_filter": True, |
|
"remote_filter": remote, |
|
"salary_filter": salary_min is not None, |
|
"job_type_filter": job_type is not None and job_type.lower() != 'all', |
|
"duplicate_removal": True |
|
} |
|
} |
|
|
|
print(f"🎯 Advanced Search Complete: Found {len(filtered_jobs)} matching jobs after all filters") |
|
return response |
|
|
|
except Exception as e: |
|
print(f"❌ Advanced job search failed: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
|
|
return { |
|
"success": False, |
|
"error": str(e), |
|
"total_found": 0, |
|
"jobs": [], |
|
"raw_results": "[]", |
|
"filtering_applied": {} |
|
} |
|
|
|
|
|
def search_jobs( |
|
query: str, |
|
location: str = "Canada", |
|
remote: bool = False, |
|
level: str = "Senior", |
|
serp_api_key: str = None, |
|
nebius_api_key: str = None |
|
) -> str: |
|
""" |
|
Main job search function with API key parameters |
|
""" |
|
print(f"🔍 Main Search: '{query}' | Location: '{location}' | Remote: {remote} | Level: {level}") |
|
|
|
if not location or location.strip() == "": |
|
location = "Canada" |
|
|
|
if not serp_api_key: |
|
return "[]" |
|
|
|
|
|
if nebius_api_key: |
|
return lookup_with_llm( |
|
query=query, |
|
location=location, |
|
remote=remote, |
|
level=level, |
|
serp_api_key=serp_api_key, |
|
nebius_api_key=nebius_api_key |
|
) |
|
else: |
|
return lookup( |
|
query=query, |
|
location=location, |
|
remote_only=remote, |
|
serp_api_key=serp_api_key |
|
) |
|
|
|
|
|
def search_remote_jobs( |
|
query: str, |
|
level: str = "Senior", |
|
location: str = "Canada", |
|
serp_api_key: str = None, |
|
nebius_api_key: str = None |
|
) -> str: |
|
"""Quick search for remote jobs ONLY""" |
|
return lookup_with_llm( |
|
query=query, |
|
location=location, |
|
remote=True, |
|
level=level, |
|
serp_api_key=serp_api_key, |
|
nebius_api_key=nebius_api_key |
|
) |
|
|
|
def search_entry_level_jobs( |
|
query: str, |
|
location: str = "Canada", |
|
remote: bool = False, |
|
serp_api_key: str = None, |
|
nebius_api_key: str = None |
|
) -> str: |
|
"""Quick search for entry-level positions""" |
|
return lookup_with_llm( |
|
query=query, |
|
location=location, |
|
remote=remote, |
|
level="Junior", |
|
serp_api_key=serp_api_key, |
|
nebius_api_key=nebius_api_key |
|
) |