Commit
Β·
18f9528
1
Parent(s):
9fe1484
Initial commit: Job hunting AI agent with Gradio interface
Browse files- .gitignore +39 -0
- README.md +211 -7
- agent_api/__init__.py +3 -0
- agent_api/serpjob.py +352 -0
- agents/__init__.py +3 -0
- agents/job_lookup_agent.py +484 -0
- app.py +568 -0
- requirements.txt +26 -0
.gitignore
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
env/
|
8 |
+
build/
|
9 |
+
develop-eggs/
|
10 |
+
dist/
|
11 |
+
downloads/
|
12 |
+
eggs/
|
13 |
+
.eggs/
|
14 |
+
lib/
|
15 |
+
lib64/
|
16 |
+
parts/
|
17 |
+
sdist/
|
18 |
+
var/
|
19 |
+
*.egg-info/
|
20 |
+
.installed.cfg
|
21 |
+
*.egg
|
22 |
+
|
23 |
+
# Environment variables
|
24 |
+
.env
|
25 |
+
.venv
|
26 |
+
venv/
|
27 |
+
ENV/
|
28 |
+
|
29 |
+
# IDE
|
30 |
+
.idea/
|
31 |
+
.vscode/
|
32 |
+
*.swp
|
33 |
+
*.swo
|
34 |
+
|
35 |
+
# Logs
|
36 |
+
*.log
|
37 |
+
|
38 |
+
# Local development
|
39 |
+
.DS_Store
|
README.md
CHANGED
@@ -1,14 +1,218 @@
|
|
1 |
---
|
2 |
-
title: Job Hunting
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
short_description: 'Hybrid Job Search Assistant powered by AI '
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Job Hunting AI Assistant
|
3 |
+
emoji: π
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.14.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
11 |
---
|
12 |
|
13 |
+
# Job Hunting AI Assistant π
|
14 |
+
|
15 |
+
This AI-powered tool helps you find different jobs with intelligent filtering and search capabilities using an advanced agent-based architecture.
|
16 |
+
|
17 |
+
## π€ Agent Architecture
|
18 |
+
|
19 |
+
This tool implements a sophisticated multi-agent system:
|
20 |
+
|
21 |
+
### Search Agent
|
22 |
+
- Handles job discovery and initial filtering
|
23 |
+
- Uses LangChain for agent orchestration
|
24 |
+
- Implements MRKL (Modular Reasoning, Knowledge, and Language) architecture
|
25 |
+
- Custom output parser for enhanced JSON handling
|
26 |
+
- Retry mechanisms with exponential backoff
|
27 |
+
|
28 |
+
### Filtering Agent
|
29 |
+
- Intelligent salary parsing and normalization
|
30 |
+
- Remote work detection using contextual analysis
|
31 |
+
- Location-based relevance scoring
|
32 |
+
- Experience level matching
|
33 |
+
|
34 |
+
### Coordination Layer
|
35 |
+
- Asynchronous task handling
|
36 |
+
- Thread-safe API key management
|
37 |
+
- Result aggregation and deduplication
|
38 |
+
- Error recovery and graceful degradation
|
39 |
+
|
40 |
+
## π« Custom Gradio Components
|
41 |
+
|
42 |
+
### Enhanced DataGrid
|
43 |
+
- Custom-styled job listings table
|
44 |
+
- HTML-enabled cells for interactive links
|
45 |
+
- Responsive column sizing
|
46 |
+
- Optimized for job search results
|
47 |
+
- Built-in sorting capabilities
|
48 |
+
|
49 |
+
### API Configuration Section
|
50 |
+
```python
|
51 |
+
with gr.Group(elem_classes=["api-config-section"]):
|
52 |
+
gr.Markdown("### π API Configuration")
|
53 |
+
with gr.Row():
|
54 |
+
serp_api_key = gr.Textbox(
|
55 |
+
label="SerpAPI Key",
|
56 |
+
type="password",
|
57 |
+
scale=2
|
58 |
+
)
|
59 |
+
nebius_api_key = gr.Textbox(
|
60 |
+
label="Nebius API Key",
|
61 |
+
type="password",
|
62 |
+
scale=2
|
63 |
+
)
|
64 |
+
```
|
65 |
+
|
66 |
+
### Status Display Component
|
67 |
+
- Real-time search progress updates
|
68 |
+
- Custom styling with gradients
|
69 |
+
- Animated state transitions
|
70 |
+
- Error handling visualization
|
71 |
+
|
72 |
+
### Search Interface
|
73 |
+
- Custom-designed search parameters
|
74 |
+
- Intelligent default suggestions
|
75 |
+
- Dynamic example population
|
76 |
+
- Responsive layout adaptation
|
77 |
+
|
78 |
+
## π Required API Keys
|
79 |
+
|
80 |
+
1. **SerpAPI Key** (Required for all searches)
|
81 |
+
- Get from [SerpAPI](https://serpapi.com)
|
82 |
+
- Free trial available
|
83 |
+
- Used for job searching
|
84 |
+
|
85 |
+
2. **Nebius API Key** (Required for Advanced Search)
|
86 |
+
- Get from [Nebius](https://nebius.ai)
|
87 |
+
- Used for AI-powered filtering
|
88 |
+
- Required for advanced search mode
|
89 |
+
|
90 |
+
## π Features
|
91 |
+
|
92 |
+
- Multi-agent architecture for intelligent search
|
93 |
+
- Custom Gradio components for enhanced UX
|
94 |
+
- Thread-safe API key handling
|
95 |
+
- Asynchronous search capabilities
|
96 |
+
- Intelligent retry mechanisms
|
97 |
+
- AI-powered job search and filtering
|
98 |
+
- Support for multiple locations
|
99 |
+
- Remote work detection
|
100 |
+
- Salary information (when available)
|
101 |
+
- Experience level filtering
|
102 |
+
- Two search modes:
|
103 |
+
- Advanced (AI-Enhanced)
|
104 |
+
- Basic (Fast)
|
105 |
+
|
106 |
+
## π How to Use
|
107 |
+
|
108 |
+
1. Enter your API keys in the configuration section
|
109 |
+
2. Enter a job search query (e.g., "Python Developer")
|
110 |
+
3. Choose your preferences:
|
111 |
+
- Experience Level
|
112 |
+
- Location
|
113 |
+
- Remote Only option
|
114 |
+
- Salary Information visibility
|
115 |
+
4. Click "Search Jobs"
|
116 |
+
5. View results in the table below
|
117 |
+
|
118 |
+
## π Security
|
119 |
+
|
120 |
+
Your API keys are:
|
121 |
+
- Never stored on our servers
|
122 |
+
- Only kept in memory during active searches
|
123 |
+
- Cleared after each search
|
124 |
+
- Transmitted securely via HTTPS
|
125 |
+
- Not visible to other users
|
126 |
+
- Thread-safe API key handling
|
127 |
+
- Memory-safe key management
|
128 |
+
- Request isolation per user session
|
129 |
+
- Secure error handling
|
130 |
+
|
131 |
+
## π Search Methods
|
132 |
+
|
133 |
+
### π€ Advanced Search (AI-Enhanced)
|
134 |
+
- Uses LLM for intelligent filtering
|
135 |
+
- Better understanding of requirements
|
136 |
+
- More accurate matching
|
137 |
+
- Takes 30-60 seconds
|
138 |
+
- Requires both API keys
|
139 |
+
- Multi-agent coordination
|
140 |
+
- Contextual understanding
|
141 |
+
- Parallel search optimization
|
142 |
+
- Intelligent result merging
|
143 |
+
|
144 |
+
### β‘ Basic Search (Fast)
|
145 |
+
- Direct API-based search
|
146 |
+
- Standard filtering
|
147 |
+
- Quick results (10-30s)
|
148 |
+
- Only requires SerpAPI key
|
149 |
+
- Efficient data processing
|
150 |
+
- Real-time updates
|
151 |
+
- Minimal resource usage
|
152 |
+
|
153 |
+
## π¨ Custom UI Components
|
154 |
+
- Enhanced DataGrid for job listings
|
155 |
+
- Real-time status indicators
|
156 |
+
- Custom-styled API configuration
|
157 |
+
- Responsive layout system
|
158 |
+
- Animated state transitions
|
159 |
+
- Error visualization
|
160 |
+
- Progress tracking
|
161 |
+
|
162 |
+
## π§ Technical Implementation
|
163 |
+
- LangChain for agent orchestration
|
164 |
+
- Custom MRKL architecture
|
165 |
+
- Thread-safe operations
|
166 |
+
- Async/await patterns
|
167 |
+
- Custom Gradio components
|
168 |
+
- Enhanced error handling
|
169 |
+
- Intelligent caching
|
170 |
+
- Request queuing
|
171 |
+
|
172 |
+
## π Tips
|
173 |
+
- Use specific keywords for better results (e.g., "Senior Python Developer" instead of just "Developer")
|
174 |
+
- Try different experience levels to see more opportunities
|
175 |
+
- Enable "Remote Only" for work-from-home opportunities
|
176 |
+
- Use advanced search for complex requirements
|
177 |
+
- Include technology stack in your search query
|
178 |
+
- Combine multiple skills (e.g., "Python React Developer")
|
179 |
+
- Use location filters strategically
|
180 |
+
- Check salary information when available
|
181 |
+
- Use quotes for exact matches
|
182 |
+
- Try both search modes for comprehensive results
|
183 |
+
|
184 |
+
## π» Hardware Requirements
|
185 |
+
This application runs efficiently on CPU-only infrastructure:
|
186 |
+
- All AI processing is done via API calls
|
187 |
+
- No local model inference
|
188 |
+
- Minimal computational requirements
|
189 |
+
- Suitable for basic CPU hosting
|
190 |
+
|
191 |
+
## π Session Information
|
192 |
+
- Current Time: 2025-06-08 19:50:12 UTC
|
193 |
+
- Current User: MananShah-007
|
194 |
+
- Version: 1.0.0
|
195 |
+
- Last Updated: 2025-06-08
|
196 |
+
|
197 |
+
## π License
|
198 |
+
MIT License
|
199 |
+
|
200 |
+
Copyright (c) 2025 MananShah-007
|
201 |
+
|
202 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
203 |
+
of this software and associated documentation files (the "Software"), to deal
|
204 |
+
in the Software without restriction, including without limitation the rights
|
205 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
206 |
+
copies of the Software, and to permit persons to whom the Software is
|
207 |
+
furnished to do so, subject to the following conditions:
|
208 |
+
|
209 |
+
The above copyright notice and this permission notice shall be included in all
|
210 |
+
copies or substantial portions of the Software.
|
211 |
+
|
212 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
213 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
214 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
215 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
216 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
217 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
218 |
+
SOFTWARE.
|
agent_api/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .serpjob import scrape_job_profile
|
2 |
+
|
3 |
+
__all__ = ['scrape_job_profile']
|
agent_api/serpjob.py
ADDED
@@ -0,0 +1,352 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# agent_api/serpjob.py
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import json
|
5 |
+
from serpapi import GoogleSearch
|
6 |
+
import time
|
7 |
+
from typing import List, Dict, Optional, Tuple
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
def test_api_connection(api_key: str = None) -> Tuple[bool, str]:
|
12 |
+
"""Test SerpAPI connection with provided key"""
|
13 |
+
try:
|
14 |
+
if not api_key:
|
15 |
+
return False, "API key is required"
|
16 |
+
|
17 |
+
# Test with minimal query
|
18 |
+
params = {
|
19 |
+
"api_key": api_key,
|
20 |
+
"engine": "google",
|
21 |
+
"q": "test",
|
22 |
+
"num": 1
|
23 |
+
}
|
24 |
+
|
25 |
+
search = GoogleSearch(params)
|
26 |
+
results = search.get_dict()
|
27 |
+
|
28 |
+
if "error" in results:
|
29 |
+
return False, f"API Error: {results['error']}"
|
30 |
+
|
31 |
+
return True, "Connection successful"
|
32 |
+
|
33 |
+
except Exception as e:
|
34 |
+
return False, f"Connection failed: {str(e)}"
|
35 |
+
|
36 |
+
def clean_salary_text(text: str) -> str:
|
37 |
+
"""Clean and format salary information"""
|
38 |
+
if not text or text.lower() in ['n/a', 'not specified', '']:
|
39 |
+
return "Not specified"
|
40 |
+
|
41 |
+
# Clean whitespace
|
42 |
+
cleaned = ' '.join(text.split())
|
43 |
+
|
44 |
+
# Format common salary terms
|
45 |
+
replacements = {
|
46 |
+
'a year': '/year',
|
47 |
+
'an hour': '/hour',
|
48 |
+
'a month': '/month',
|
49 |
+
'per year': '/year',
|
50 |
+
'per hour': '/hour',
|
51 |
+
'per month': '/month'
|
52 |
+
}
|
53 |
+
|
54 |
+
for old, new in replacements.items():
|
55 |
+
cleaned = cleaned.replace(old, new)
|
56 |
+
|
57 |
+
return cleaned
|
58 |
+
|
59 |
+
def is_job_remote(job: Dict) -> bool:
|
60 |
+
"""Determine if a job is remote based on various indicators"""
|
61 |
+
|
62 |
+
# Collect all text fields to analyze
|
63 |
+
text_sources = [
|
64 |
+
job.get("title", ""),
|
65 |
+
job.get("description", ""),
|
66 |
+
job.get("location", ""),
|
67 |
+
job.get("company_name", ""),
|
68 |
+
str(job.get("detected_extensions", {}))
|
69 |
+
]
|
70 |
+
|
71 |
+
# Join all text and convert to lowercase
|
72 |
+
full_text = " ".join(text_sources).lower()
|
73 |
+
|
74 |
+
# Remote work indicators
|
75 |
+
remote_keywords = [
|
76 |
+
"remote", "work from home", "wfh", "virtual",
|
77 |
+
"anywhere", "fully remote", "remote-first", "100% remote",
|
78 |
+
"work remotely", "distributed team", "telecommute",
|
79 |
+
"home-based", "remote position", "flexible location",
|
80 |
+
"work from anywhere", "remote-friendly", "home office"
|
81 |
+
]
|
82 |
+
|
83 |
+
return any(keyword in full_text for keyword in remote_keywords)
|
84 |
+
|
85 |
+
def scrape_job_profile(query: str, location: str = "Canada", api_key: str = None) -> str:
|
86 |
+
"""
|
87 |
+
Scrape job information from Google Jobs using provided SerpAPI key
|
88 |
+
"""
|
89 |
+
print(f"\n{'='*50}")
|
90 |
+
print(f"π STARTING JOB SEARCH")
|
91 |
+
print(f"Query: {query}")
|
92 |
+
print(f"Location: {location}")
|
93 |
+
print(f"{'='*50}")
|
94 |
+
|
95 |
+
try:
|
96 |
+
# Validate API key
|
97 |
+
if not api_key:
|
98 |
+
print("No API key provided")
|
99 |
+
return json.dumps([])
|
100 |
+
|
101 |
+
# Test API connection first
|
102 |
+
api_connected, api_message = test_api_connection(api_key)
|
103 |
+
if not api_connected:
|
104 |
+
print(f"API Connection Failed: {api_message}")
|
105 |
+
return json.dumps([])
|
106 |
+
|
107 |
+
print(f"API Connection: {api_message}")
|
108 |
+
|
109 |
+
# Clean and prepare search query
|
110 |
+
search_query = query.strip()
|
111 |
+
|
112 |
+
# Add remote keyword if not present
|
113 |
+
if "remote" not in search_query.lower():
|
114 |
+
search_query = f"{search_query} remote"
|
115 |
+
|
116 |
+
# Location configuration mapping
|
117 |
+
location_settings = {
|
118 |
+
"United States": {"location": "United States", "gl": "us", "hl": "en"},
|
119 |
+
"Canada": {"location": "Canada", "gl": "ca", "hl": "en"},
|
120 |
+
"United Kingdom": {"location": "United Kingdom", "gl": "gb", "hl": "en"},
|
121 |
+
"Australia": {"location": "Australia", "gl": "au", "hl": "en"},
|
122 |
+
"Germany": {"location": "Germany", "gl": "de", "hl": "en"},
|
123 |
+
"Netherlands": {"location": "Netherlands", "gl": "nl", "hl": "en"},
|
124 |
+
"Remote Worldwide": {"location": "", "gl": "us", "hl": "en"}
|
125 |
+
}
|
126 |
+
|
127 |
+
# Get location settings
|
128 |
+
loc_config = location_settings.get(location, location_settings["Canada"])
|
129 |
+
|
130 |
+
# Add location to query if not worldwide
|
131 |
+
if location != "Remote Worldwide" and loc_config["location"]:
|
132 |
+
search_query = f"{search_query} in {loc_config['location']}"
|
133 |
+
|
134 |
+
print(f"π Final search query: '{search_query}'")
|
135 |
+
|
136 |
+
# Build search parameters
|
137 |
+
search_params = {
|
138 |
+
"api_key": api_key,
|
139 |
+
"engine": "google_jobs",
|
140 |
+
"q": search_query,
|
141 |
+
"hl": loc_config["hl"],
|
142 |
+
"gl": loc_config["gl"],
|
143 |
+
"chips": "date_posted:month",
|
144 |
+
"num": 12 # Reasonable number of results
|
145 |
+
}
|
146 |
+
|
147 |
+
# Add location parameter if specified
|
148 |
+
if loc_config["location"]:
|
149 |
+
search_params["location"] = loc_config["location"]
|
150 |
+
|
151 |
+
print(f"οΏ½οΏ½οΏ½οΏ½ Search parameters:")
|
152 |
+
for key, value in search_params.items():
|
153 |
+
if key != "api_key": # Don't log API key
|
154 |
+
print(f" {key}: {value}")
|
155 |
+
|
156 |
+
# Execute search with retry mechanism
|
157 |
+
max_attempts = 1
|
158 |
+
results = None
|
159 |
+
|
160 |
+
for attempt in range(max_attempts):
|
161 |
+
try:
|
162 |
+
print(f"Search attempt {attempt + 1}/{max_attempts}")
|
163 |
+
|
164 |
+
search = GoogleSearch(search_params)
|
165 |
+
results = search.get_dict()
|
166 |
+
|
167 |
+
# Check for API errors
|
168 |
+
if "error" in results:
|
169 |
+
error_msg = results["error"]
|
170 |
+
print(f"API Error on attempt {attempt + 1}: {error_msg}")
|
171 |
+
|
172 |
+
if attempt < max_attempts - 1:
|
173 |
+
print("β³ Waiting before retry...")
|
174 |
+
time.sleep(2 ** attempt) # Exponential backoff
|
175 |
+
continue
|
176 |
+
else:
|
177 |
+
print("All attempts failed")
|
178 |
+
return json.dumps([])
|
179 |
+
|
180 |
+
# Success
|
181 |
+
print(f"Search successful on attempt {attempt + 1}")
|
182 |
+
break
|
183 |
+
|
184 |
+
except Exception as e:
|
185 |
+
print(f"Attempt {attempt + 1} failed: {str(e)}")
|
186 |
+
if attempt < max_attempts - 1:
|
187 |
+
print("β³ Waiting before retry...")
|
188 |
+
time.sleep(2 ** attempt)
|
189 |
+
continue
|
190 |
+
else:
|
191 |
+
print("All search attempts exhausted")
|
192 |
+
return json.dumps([])
|
193 |
+
|
194 |
+
# Extract job results
|
195 |
+
raw_jobs = results.get("jobs_results", [])
|
196 |
+
print(f"π Raw jobs found: {len(raw_jobs)}")
|
197 |
+
|
198 |
+
if not raw_jobs:
|
199 |
+
print("No jobs found in search results")
|
200 |
+
return json.dumps([])
|
201 |
+
|
202 |
+
# Process and filter jobs
|
203 |
+
processed_jobs = []
|
204 |
+
|
205 |
+
for idx, job in enumerate(raw_jobs):
|
206 |
+
try:
|
207 |
+
print(f"\n Processing job {idx + 1}: {job.get('title', 'Unknown Title')}")
|
208 |
+
|
209 |
+
# Extract basic information
|
210 |
+
title = job.get("title", "N/A")
|
211 |
+
company = job.get("company_name", "N/A")
|
212 |
+
job_location = job.get("location", "N/A")
|
213 |
+
|
214 |
+
print(f" Company: {company}")
|
215 |
+
print(f" Location: {job_location}")
|
216 |
+
|
217 |
+
# Extract salary information
|
218 |
+
salary = "Not specified"
|
219 |
+
if job.get("salary_snippet"):
|
220 |
+
salary = clean_salary_text(job["salary_snippet"].get("text", ""))
|
221 |
+
elif job.get("detected_extensions", {}).get("salary"):
|
222 |
+
salary = clean_salary_text(job["detected_extensions"]["salary"])
|
223 |
+
|
224 |
+
print(f" Salary: {salary}")
|
225 |
+
|
226 |
+
# Determine if job is remote
|
227 |
+
remote_status = is_job_remote(job)
|
228 |
+
print(f" Remote: {'Yes' if remote_status else 'No'}")
|
229 |
+
|
230 |
+
# Extract apply link
|
231 |
+
apply_link = "N/A"
|
232 |
+
|
233 |
+
# Try multiple sources for apply link
|
234 |
+
if job.get("apply_options") and len(job["apply_options"]) > 0:
|
235 |
+
apply_link = job["apply_options"][0].get("link", "N/A")
|
236 |
+
elif job.get("share_link"):
|
237 |
+
apply_link = job["share_link"]
|
238 |
+
elif job.get("link"):
|
239 |
+
apply_link = job["link"]
|
240 |
+
|
241 |
+
# Extract posting date
|
242 |
+
posted_date = "Recently"
|
243 |
+
if job.get("detected_extensions", {}).get("posted_at"):
|
244 |
+
posted_date = job["detected_extensions"]["posted_at"]
|
245 |
+
elif job.get("posted_at"):
|
246 |
+
posted_date = job["posted_at"]
|
247 |
+
|
248 |
+
# Create job entry
|
249 |
+
job_entry = {
|
250 |
+
"title": title,
|
251 |
+
"company_name": company,
|
252 |
+
"location": job_location,
|
253 |
+
"salary": salary,
|
254 |
+
"remote": "Yes" if remote_status else "No",
|
255 |
+
"posted_at": posted_date,
|
256 |
+
"link": apply_link
|
257 |
+
}
|
258 |
+
|
259 |
+
# Apply filtering logic
|
260 |
+
should_include = False
|
261 |
+
|
262 |
+
if location == "Remote Worldwide":
|
263 |
+
# For worldwide remote, only include remote jobs
|
264 |
+
should_include = remote_status
|
265 |
+
filter_reason = "remote status" if remote_status else "not remote"
|
266 |
+
else:
|
267 |
+
# For specific locations, include if matches location OR is remote
|
268 |
+
location_match = location.lower() in job_location.lower()
|
269 |
+
should_include = location_match or remote_status
|
270 |
+
|
271 |
+
if location_match and remote_status:
|
272 |
+
filter_reason = "location match + remote"
|
273 |
+
elif location_match:
|
274 |
+
filter_reason = "location match"
|
275 |
+
elif remote_status:
|
276 |
+
filter_reason = "remote job"
|
277 |
+
else:
|
278 |
+
filter_reason = "no match"
|
279 |
+
|
280 |
+
if should_include:
|
281 |
+
processed_jobs.append(job_entry)
|
282 |
+
print(f" INCLUDED ({filter_reason})")
|
283 |
+
else:
|
284 |
+
print(f" FILTERED OUT ({filter_reason})")
|
285 |
+
|
286 |
+
except Exception as e:
|
287 |
+
print(f"Error processing job {idx + 1}: {str(e)}")
|
288 |
+
continue
|
289 |
+
|
290 |
+
print(f"\n{'='*50}")
|
291 |
+
print(f"π FINAL RESULTS")
|
292 |
+
print(f"Raw jobs found: {len(raw_jobs)}")
|
293 |
+
print(f"Jobs after filtering: {len(processed_jobs)}")
|
294 |
+
print(f"{'='*50}")
|
295 |
+
|
296 |
+
# Sort by posting date (attempt to put recent jobs first)
|
297 |
+
try:
|
298 |
+
processed_jobs.sort(
|
299 |
+
key=lambda x: x.get("posted_at", ""),
|
300 |
+
reverse=True
|
301 |
+
)
|
302 |
+
except:
|
303 |
+
print("β οΈ Could not sort by posting date")
|
304 |
+
|
305 |
+
# Return results as JSON
|
306 |
+
result_json = json.dumps(processed_jobs, indent=2)
|
307 |
+
print(f"Returning {len(processed_jobs)} jobs")
|
308 |
+
|
309 |
+
return result_json
|
310 |
+
|
311 |
+
except Exception as e:
|
312 |
+
print(f"\n CRITICAL ERROR in scrape_job_profile:")
|
313 |
+
print(f"Error: {str(e)}")
|
314 |
+
import traceback
|
315 |
+
traceback.print_exc()
|
316 |
+
return json.dumps([])
|
317 |
+
|
318 |
+
def quick_test(api_key: str = None):
|
319 |
+
"""Quick test function to verify the scraper works"""
|
320 |
+
print("π§ͺ TESTING SCRAPER")
|
321 |
+
print("="*30)
|
322 |
+
|
323 |
+
# Test connection first
|
324 |
+
connected, message = test_api_connection(api_key)
|
325 |
+
print(f"Connection test: {message}")
|
326 |
+
|
327 |
+
if not connected:
|
328 |
+
return False
|
329 |
+
|
330 |
+
# Test search
|
331 |
+
try:
|
332 |
+
result = scrape_job_profile("Python developer", "Canada", api_key)
|
333 |
+
jobs = json.loads(result)
|
334 |
+
|
335 |
+
print(f"Test completed: Found {len(jobs)} jobs")
|
336 |
+
if jobs:
|
337 |
+
sample_job = jobs[0]
|
338 |
+
print(f"Sample job: {sample_job['title']} at {sample_job['company_name']}")
|
339 |
+
|
340 |
+
return True
|
341 |
+
|
342 |
+
except Exception as e:
|
343 |
+
print(f"Test failed: {str(e)}")
|
344 |
+
return False
|
345 |
+
|
346 |
+
# if __name__ == "__main__":
|
347 |
+
# # Only run test if API key is provided
|
348 |
+
# api_key = os.environ.get("SERP_API_KEY")
|
349 |
+
# if api_key:
|
350 |
+
# quick_test(api_key)
|
351 |
+
# else:
|
352 |
+
# print("No API key found in environment variables")
|
agents/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .job_lookup_agent import lookup
|
2 |
+
|
3 |
+
__all__ = ['lookup']
|
agents/job_lookup_agent.py
ADDED
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import json
|
4 |
+
from typing import Any, Dict, Optional, List
|
5 |
+
import re
|
6 |
+
|
7 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
8 |
+
|
9 |
+
from langchain.agents import initialize_agent
|
10 |
+
from langchain.agents.types import AgentType
|
11 |
+
from langchain_core.tools import Tool
|
12 |
+
from langchain_openai import ChatOpenAI
|
13 |
+
from langchain_core.prompts import PromptTemplate
|
14 |
+
from langchain.agents.mrkl.output_parser import MRKLOutputParser
|
15 |
+
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
|
16 |
+
from dotenv import load_dotenv
|
17 |
+
from langchain.cache import InMemoryCache
|
18 |
+
from langchain.globals import set_llm_cache
|
19 |
+
|
20 |
+
from agent_api.serpjob import scrape_job_profile
|
21 |
+
|
22 |
+
set_llm_cache(InMemoryCache())
|
23 |
+
load_dotenv()
|
24 |
+
|
25 |
+
def extract_json_from_text(text: str) -> str:
|
26 |
+
"""Extract JSON array from text by finding the first [ and last ]"""
|
27 |
+
try:
|
28 |
+
start = text.find('[')
|
29 |
+
end = text.rfind(']') + 1
|
30 |
+
if start != -1 and end != 0:
|
31 |
+
return text[start:end]
|
32 |
+
return "[]"
|
33 |
+
except:
|
34 |
+
return "[]"
|
35 |
+
|
36 |
+
class CustomMRKLOutputParser(MRKLOutputParser):
|
37 |
+
"""Custom output parser that handles JSON responses better"""
|
38 |
+
|
39 |
+
def parse(self, text: str) -> Any:
|
40 |
+
try:
|
41 |
+
return super().parse(text)
|
42 |
+
except Exception:
|
43 |
+
cleaned_text = text.strip()
|
44 |
+
|
45 |
+
if cleaned_text.startswith('[') and cleaned_text.endswith(']'):
|
46 |
+
try:
|
47 |
+
json.loads(cleaned_text)
|
48 |
+
from langchain.schema import AgentFinish
|
49 |
+
return AgentFinish(
|
50 |
+
return_values={"output": cleaned_text},
|
51 |
+
log=text
|
52 |
+
)
|
53 |
+
except json.JSONDecodeError:
|
54 |
+
pass
|
55 |
+
|
56 |
+
json_part = extract_json_from_text(cleaned_text)
|
57 |
+
if json_part and json_part != "[]":
|
58 |
+
try:
|
59 |
+
json.loads(json_part)
|
60 |
+
from langchain.schema import AgentFinish
|
61 |
+
return AgentFinish(
|
62 |
+
return_values={"output": json_part},
|
63 |
+
log=text
|
64 |
+
)
|
65 |
+
except json.JSONDecodeError:
|
66 |
+
pass
|
67 |
+
|
68 |
+
return super().parse(text)
|
69 |
+
|
70 |
+
def lookup(
|
71 |
+
query: str,
|
72 |
+
location: str = "Canada",
|
73 |
+
remote_only: bool = False,
|
74 |
+
serp_api_key: str = None
|
75 |
+
) -> str:
|
76 |
+
"""
|
77 |
+
Enhanced direct lookup with API key parameter
|
78 |
+
"""
|
79 |
+
try:
|
80 |
+
# Clean the query
|
81 |
+
query = query.strip()
|
82 |
+
if "in" in query and location.lower() in query.lower():
|
83 |
+
query = query.replace(f"in {location}", "").replace(f"In {location}", "").strip()
|
84 |
+
|
85 |
+
print(f"π Direct Lookup: Searching for '{query}' in {location} (Remote only: {remote_only})")
|
86 |
+
|
87 |
+
# Use the provided API key for the search
|
88 |
+
result = scrape_job_profile(query, location, serp_api_key)
|
89 |
+
|
90 |
+
# Validate result
|
91 |
+
if not result:
|
92 |
+
print("No results from scrape_job_profile")
|
93 |
+
return "[]"
|
94 |
+
|
95 |
+
try:
|
96 |
+
jobs_data = json.loads(result)
|
97 |
+
if not isinstance(jobs_data, list):
|
98 |
+
print("Result is not a list format")
|
99 |
+
return "[]"
|
100 |
+
|
101 |
+
print(f"Found {len(jobs_data)} jobs")
|
102 |
+
return json.dumps(jobs_data)
|
103 |
+
|
104 |
+
except json.JSONDecodeError as e:
|
105 |
+
print(f"JSON decode error in lookup: {e}")
|
106 |
+
return "[]"
|
107 |
+
|
108 |
+
except Exception as e:
|
109 |
+
print(f"Error in lookup function: {str(e)}")
|
110 |
+
import traceback
|
111 |
+
traceback.print_exc()
|
112 |
+
return "[]"
|
113 |
+
|
114 |
+
def lookup_with_llm(
|
115 |
+
query: str,
|
116 |
+
location: str = "Canada",
|
117 |
+
remote: bool = False,
|
118 |
+
level: str = "Senior",
|
119 |
+
serp_api_key: str = None,
|
120 |
+
nebius_api_key: str = None
|
121 |
+
) -> str:
|
122 |
+
"""
|
123 |
+
Enhanced LLM lookup function with API key parameters
|
124 |
+
"""
|
125 |
+
try:
|
126 |
+
if not nebius_api_key:
|
127 |
+
print("Nebius API key is required for LLM search")
|
128 |
+
return "[]"
|
129 |
+
|
130 |
+
llm = ChatOpenAI(
|
131 |
+
temperature=0.1,
|
132 |
+
model_name="meta-llama/Meta-Llama-3.1-405B-Instruct",
|
133 |
+
api_key=nebius_api_key,
|
134 |
+
base_url="https://api.studio.nebius.com/v1/",
|
135 |
+
max_retries=1,
|
136 |
+
)
|
137 |
+
|
138 |
+
# Clean the query
|
139 |
+
query = query.strip()
|
140 |
+
if "in" in query and location.lower() in query.lower():
|
141 |
+
query = query.replace(f"in {location}", "").replace(f"In {location}", "").strip()
|
142 |
+
|
143 |
+
print(f"π€ LLM Agent: Searching for '{query}' | Location: '{location}' | Remote: {remote} | Level: {level}")
|
144 |
+
|
145 |
+
# Create tool that uses provided SerpAPI key
|
146 |
+
def job_search_tool(q: str) -> str:
|
147 |
+
return lookup(q, location, remote, serp_api_key)
|
148 |
+
|
149 |
+
tools_for_agent = [
|
150 |
+
Tool(
|
151 |
+
name="JobSearch",
|
152 |
+
func=job_search_tool,
|
153 |
+
description=f"Searches for {level} level {query} jobs. {'ONLY returns remote work opportunities.' if remote else f'Returns jobs in {location} plus remote opportunities.'}"
|
154 |
+
)
|
155 |
+
]
|
156 |
+
|
157 |
+
# Enhanced prompt with clearer filtering instructions
|
158 |
+
remote_instruction = (
|
159 |
+
"MUST return ONLY remote work opportunities, work-from-home positions, and distributed team roles. NO on-site positions."
|
160 |
+
if remote else
|
161 |
+
f"Return jobs in {location} area that allow working from {location}. Include both on-site and hybrid positions."
|
162 |
+
)
|
163 |
+
|
164 |
+
template = """You are an expert job search assistant. Use the JobSearch tool to find jobs matching the exact criteria specified.
|
165 |
+
|
166 |
+
SEARCH CRITERIA:
|
167 |
+
- Position: {level} {input}
|
168 |
+
- Location Preference: {location}
|
169 |
+
- Remote Only: {remote_required}
|
170 |
+
- Filtering Rule: {remote_instruction}
|
171 |
+
|
172 |
+
IMPORTANT FILTERING RULES:
|
173 |
+
1. The JobSearch tool will automatically apply location and remote filtering
|
174 |
+
2. Remote jobs can be worked from anywhere, so they should be included unless location is very specific
|
175 |
+
3. On-site jobs should only be included if they match the target location
|
176 |
+
4. Trust the tool's filtering - it has been enhanced to handle these cases properly
|
177 |
+
|
178 |
+
INSTRUCTIONS:
|
179 |
+
1. Use the JobSearch tool with the query: "{input}"
|
180 |
+
2. The tool automatically applies the filtering based on the specified criteria
|
181 |
+
3. Return the complete JSON array from the tool without any modifications
|
182 |
+
|
183 |
+
FORMAT:
|
184 |
+
Thought: I need to search for jobs with the specified criteria and filtering.
|
185 |
+
Action: JobSearch
|
186 |
+
Action Input: {input}
|
187 |
+
Observation: [tool results will be properly filtered]
|
188 |
+
Thought: The tool has returned filtered results. I'll return them exactly as provided.
|
189 |
+
Final Answer: [return the exact JSON array from the tool]
|
190 |
+
|
191 |
+
CRITICAL: Your Final Answer must be ONLY the JSON array starting with [ and ending with ]. No explanations or additional text.
|
192 |
+
|
193 |
+
{format_instructions}"""
|
194 |
+
|
195 |
+
prompt = PromptTemplate(
|
196 |
+
template=template,
|
197 |
+
input_variables=["input", "level", "location", "remote_required", "remote_instruction"],
|
198 |
+
partial_variables={"format_instructions": FORMAT_INSTRUCTIONS}
|
199 |
+
)
|
200 |
+
|
201 |
+
# Initialize agent
|
202 |
+
agent = initialize_agent(
|
203 |
+
tools=tools_for_agent,
|
204 |
+
llm=llm,
|
205 |
+
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
206 |
+
verbose=True,
|
207 |
+
handle_parsing_errors=True,
|
208 |
+
max_iterations=3,
|
209 |
+
early_stopping_method="generate",
|
210 |
+
agent_kwargs={
|
211 |
+
"output_parser": CustomMRKLOutputParser(),
|
212 |
+
"format_instructions": FORMAT_INSTRUCTIONS
|
213 |
+
}
|
214 |
+
)
|
215 |
+
|
216 |
+
# Build search query
|
217 |
+
search_query = f"{level} {query}"
|
218 |
+
|
219 |
+
print(f"π€ LLM Agent: Executing search with query: '{search_query}'")
|
220 |
+
|
221 |
+
# Execute agent
|
222 |
+
result = agent.invoke({
|
223 |
+
"input": prompt.format(
|
224 |
+
input=search_query,
|
225 |
+
level=level,
|
226 |
+
location=location,
|
227 |
+
remote_required="YES" if remote else "NO",
|
228 |
+
remote_instruction=remote_instruction
|
229 |
+
)
|
230 |
+
})
|
231 |
+
|
232 |
+
# Process result
|
233 |
+
output = result.get("output", "")
|
234 |
+
print(f"π€ LLM Agent: Raw output type: {type(output)}")
|
235 |
+
|
236 |
+
if isinstance(output, str):
|
237 |
+
cleaned_output = output.strip()
|
238 |
+
|
239 |
+
# Remove common prefixes
|
240 |
+
prefixes_to_remove = ["Final Answer:", "Answer:", "Result:"]
|
241 |
+
for prefix in prefixes_to_remove:
|
242 |
+
if cleaned_output.startswith(prefix):
|
243 |
+
cleaned_output = cleaned_output[len(prefix):].strip()
|
244 |
+
|
245 |
+
# Extract JSON
|
246 |
+
json_result = extract_json_from_text(cleaned_output)
|
247 |
+
|
248 |
+
try:
|
249 |
+
jobs_data = json.loads(json_result)
|
250 |
+
if isinstance(jobs_data, list):
|
251 |
+
print(f"π€ LLM Agent: Successfully returned {len(jobs_data)} filtered jobs")
|
252 |
+
return json_result
|
253 |
+
else:
|
254 |
+
print("π€ LLM Agent: Result is not a list")
|
255 |
+
return "[]"
|
256 |
+
except json.JSONDecodeError as e:
|
257 |
+
print(f"π€ LLM Agent: JSON decode error: {e}")
|
258 |
+
return "[]"
|
259 |
+
else:
|
260 |
+
print(f"π€ LLM Agent: Unexpected output type: {type(output)}")
|
261 |
+
return "[]"
|
262 |
+
|
263 |
+
except Exception as e:
|
264 |
+
print(f"π€ Error during LLM job search: {e}")
|
265 |
+
import traceback
|
266 |
+
traceback.print_exc()
|
267 |
+
|
268 |
+
# FALLBACK: Try the direct lookup method
|
269 |
+
print("π Falling back to direct lookup method...")
|
270 |
+
try:
|
271 |
+
return lookup(query, location, remote, serp_api_key)
|
272 |
+
except Exception as fallback_error:
|
273 |
+
print(f"π€ Fallback also failed: {fallback_error}")
|
274 |
+
return "[]"
|
275 |
+
|
276 |
+
def advanced_job_search(
|
277 |
+
query: str,
|
278 |
+
location: str = "Canada",
|
279 |
+
remote: bool = False,
|
280 |
+
level: str = "Senior",
|
281 |
+
use_llm: bool = True,
|
282 |
+
salary_min: Optional[int] = None,
|
283 |
+
job_type: Optional[str] = None,
|
284 |
+
company_size: Optional[str] = None,
|
285 |
+
serp_api_key: str = None,
|
286 |
+
nebius_api_key: str = None
|
287 |
+
) -> Dict[str, Any]:
|
288 |
+
"""
|
289 |
+
Advanced job search function with API key parameters
|
290 |
+
"""
|
291 |
+
try:
|
292 |
+
print(f"π Advanced Job Search Started")
|
293 |
+
print(f"Query: '{query}' | Location: '{location}' | Level: {level} | Remote: {remote}")
|
294 |
+
print(f"Salary Min: {salary_min} | Job Type: {job_type} | Company Size: {company_size}")
|
295 |
+
|
296 |
+
# Validate required API keys
|
297 |
+
if not serp_api_key:
|
298 |
+
return {
|
299 |
+
"success": False,
|
300 |
+
"error": "SerpAPI key is required",
|
301 |
+
"total_found": 0,
|
302 |
+
"jobs": [],
|
303 |
+
"raw_results": "[]"
|
304 |
+
}
|
305 |
+
|
306 |
+
if use_llm and not nebius_api_key:
|
307 |
+
return {
|
308 |
+
"success": False,
|
309 |
+
"error": "Nebius API key is required for advanced search",
|
310 |
+
"total_found": 0,
|
311 |
+
"jobs": [],
|
312 |
+
"raw_results": "[]"
|
313 |
+
}
|
314 |
+
|
315 |
+
# Choose search method
|
316 |
+
if use_llm:
|
317 |
+
raw_results = lookup_with_llm(
|
318 |
+
query=query,
|
319 |
+
location=location,
|
320 |
+
remote=remote,
|
321 |
+
level=level,
|
322 |
+
serp_api_key=serp_api_key,
|
323 |
+
nebius_api_key=nebius_api_key
|
324 |
+
)
|
325 |
+
else:
|
326 |
+
raw_results = lookup(
|
327 |
+
query=query,
|
328 |
+
location=location,
|
329 |
+
remote_only=remote,
|
330 |
+
serp_api_key=serp_api_key
|
331 |
+
)
|
332 |
+
|
333 |
+
# Parse results
|
334 |
+
try:
|
335 |
+
jobs_data = json.loads(raw_results)
|
336 |
+
except json.JSONDecodeError:
|
337 |
+
jobs_data = []
|
338 |
+
|
339 |
+
print(f"π Initial results: {len(jobs_data)} jobs")
|
340 |
+
|
341 |
+
# Apply additional filters
|
342 |
+
filtered_jobs = []
|
343 |
+
for job in jobs_data:
|
344 |
+
if not isinstance(job, dict):
|
345 |
+
continue
|
346 |
+
|
347 |
+
# Salary filter
|
348 |
+
if salary_min:
|
349 |
+
job_salary = job.get('salary', '')
|
350 |
+
if job_salary and isinstance(job_salary, str) and job_salary.lower() != 'n/a':
|
351 |
+
salary_numbers = re.findall(r'\d+', job_salary.replace(',', ''))
|
352 |
+
if salary_numbers:
|
353 |
+
max_salary = max([int(x) for x in salary_numbers if len(x) >= 4])
|
354 |
+
if max_salary < salary_min:
|
355 |
+
print(f" π° Filtered out: {job.get('title', 'N/A')} (salary: {max_salary} < {salary_min})")
|
356 |
+
continue
|
357 |
+
else:
|
358 |
+
print(f" π° Included: {job.get('title', 'N/A')} (salary: {max_salary} >= {salary_min})")
|
359 |
+
|
360 |
+
# Job type filter
|
361 |
+
if job_type and job_type.lower() != 'all':
|
362 |
+
job_title = job.get('title', '').lower()
|
363 |
+
if job_type.lower() not in job_title:
|
364 |
+
print(f" π·οΈ Filtered out: {job.get('title', 'N/A')} (type mismatch)")
|
365 |
+
continue
|
366 |
+
else:
|
367 |
+
print(f" π·οΈ Included: {job.get('title', 'N/A')} (type match)")
|
368 |
+
|
369 |
+
filtered_jobs.append(job)
|
370 |
+
|
371 |
+
# Prepare response
|
372 |
+
response = {
|
373 |
+
"success": True,
|
374 |
+
"total_found": len(filtered_jobs),
|
375 |
+
"search_parameters": {
|
376 |
+
"query": query,
|
377 |
+
"location": location,
|
378 |
+
"remote": remote,
|
379 |
+
"level": level,
|
380 |
+
"salary_min": salary_min,
|
381 |
+
"job_type": job_type,
|
382 |
+
"company_size": company_size,
|
383 |
+
"method": "LLM Agent" if use_llm else "Direct Search"
|
384 |
+
},
|
385 |
+
"jobs": filtered_jobs,
|
386 |
+
"raw_results": json.dumps(filtered_jobs),
|
387 |
+
"filtering_applied": {
|
388 |
+
"location_filter": True,
|
389 |
+
"remote_filter": remote,
|
390 |
+
"salary_filter": salary_min is not None,
|
391 |
+
"job_type_filter": job_type is not None and job_type.lower() != 'all',
|
392 |
+
"duplicate_removal": True
|
393 |
+
}
|
394 |
+
}
|
395 |
+
|
396 |
+
print(f"π― Advanced Search Complete: Found {len(filtered_jobs)} matching jobs after all filters")
|
397 |
+
return response
|
398 |
+
|
399 |
+
except Exception as e:
|
400 |
+
print(f"β Advanced job search failed: {e}")
|
401 |
+
import traceback
|
402 |
+
traceback.print_exc()
|
403 |
+
|
404 |
+
return {
|
405 |
+
"success": False,
|
406 |
+
"error": str(e),
|
407 |
+
"total_found": 0,
|
408 |
+
"jobs": [],
|
409 |
+
"raw_results": "[]",
|
410 |
+
"filtering_applied": {}
|
411 |
+
}
|
412 |
+
|
413 |
+
# Convenience functions with API key parameters
|
414 |
+
def search_jobs(
|
415 |
+
query: str,
|
416 |
+
location: str = "Canada",
|
417 |
+
remote: bool = False,
|
418 |
+
level: str = "Senior",
|
419 |
+
serp_api_key: str = None,
|
420 |
+
nebius_api_key: str = None
|
421 |
+
) -> str:
|
422 |
+
"""
|
423 |
+
Main job search function with API key parameters
|
424 |
+
"""
|
425 |
+
print(f"π Main Search: '{query}' | Location: '{location}' | Remote: {remote} | Level: {level}")
|
426 |
+
|
427 |
+
if not location or location.strip() == "":
|
428 |
+
location = "Canada"
|
429 |
+
|
430 |
+
if not serp_api_key:
|
431 |
+
return "[]"
|
432 |
+
|
433 |
+
# Use LLM agent if Nebius key is provided
|
434 |
+
if nebius_api_key:
|
435 |
+
return lookup_with_llm(
|
436 |
+
query=query,
|
437 |
+
location=location,
|
438 |
+
remote=remote,
|
439 |
+
level=level,
|
440 |
+
serp_api_key=serp_api_key,
|
441 |
+
nebius_api_key=nebius_api_key
|
442 |
+
)
|
443 |
+
else:
|
444 |
+
return lookup(
|
445 |
+
query=query,
|
446 |
+
location=location,
|
447 |
+
remote_only=remote,
|
448 |
+
serp_api_key=serp_api_key
|
449 |
+
)
|
450 |
+
|
451 |
+
# Helper functions with API key parameters
|
452 |
+
def search_remote_jobs(
|
453 |
+
query: str,
|
454 |
+
level: str = "Senior",
|
455 |
+
location: str = "Canada",
|
456 |
+
serp_api_key: str = None,
|
457 |
+
nebius_api_key: str = None
|
458 |
+
) -> str:
|
459 |
+
"""Quick search for remote jobs ONLY"""
|
460 |
+
return lookup_with_llm(
|
461 |
+
query=query,
|
462 |
+
location=location,
|
463 |
+
remote=True,
|
464 |
+
level=level,
|
465 |
+
serp_api_key=serp_api_key,
|
466 |
+
nebius_api_key=nebius_api_key
|
467 |
+
)
|
468 |
+
|
469 |
+
def search_entry_level_jobs(
|
470 |
+
query: str,
|
471 |
+
location: str = "Canada",
|
472 |
+
remote: bool = False,
|
473 |
+
serp_api_key: str = None,
|
474 |
+
nebius_api_key: str = None
|
475 |
+
) -> str:
|
476 |
+
"""Quick search for entry-level positions"""
|
477 |
+
return lookup_with_llm(
|
478 |
+
query=query,
|
479 |
+
location=location,
|
480 |
+
remote=remote,
|
481 |
+
level="Junior",
|
482 |
+
serp_api_key=serp_api_key,
|
483 |
+
nebius_api_key=nebius_api_key
|
484 |
+
)
|
app.py
ADDED
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import json
|
5 |
+
import gradio as gr
|
6 |
+
import threading
|
7 |
+
from typing import Dict, Tuple, List
|
8 |
+
from datetime import datetime
|
9 |
+
|
10 |
+
# Add current directory to path
|
11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '.')))
|
12 |
+
|
13 |
+
from dotenv import load_dotenv
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
from agents.job_lookup_agent import search_jobs, advanced_job_search
|
17 |
+
|
18 |
+
# Constants
|
19 |
+
CURRENT_UTC_TIME = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
20 |
+
CURRENT_USER = "Job Aspirant"
|
21 |
+
|
22 |
+
def validate_api_keys(serp_api_key: str = None, nebius_api_key: str = None) -> Tuple[bool, str]:
|
23 |
+
"""Validate provided API keys"""
|
24 |
+
if not serp_api_key:
|
25 |
+
return False, "SerpAPI key is required for job searching"
|
26 |
+
if not nebius_api_key:
|
27 |
+
return False, "Nebius API key is required for advanced search"
|
28 |
+
return True, "API keys validated"
|
29 |
+
|
30 |
+
def process_search_with_timeout(
|
31 |
+
query: str,
|
32 |
+
include_salary: bool = True,
|
33 |
+
location: str = "Canada",
|
34 |
+
level: str = "Senior",
|
35 |
+
remote: bool = False,
|
36 |
+
timeout: int = 100,
|
37 |
+
use_llm: bool = True,
|
38 |
+
serp_api_key: str = None,
|
39 |
+
nebius_api_key: str = None
|
40 |
+
) -> Tuple[str, List[List]]:
|
41 |
+
"""Process job search with timeout and API key handling"""
|
42 |
+
|
43 |
+
# Validate API keys first
|
44 |
+
if not serp_api_key:
|
45 |
+
return "Please provide your SerpAPI key", []
|
46 |
+
if use_llm and not nebius_api_key:
|
47 |
+
return "Please provide your Nebius API key for advanced search", []
|
48 |
+
|
49 |
+
# Validate input
|
50 |
+
if not query or not query.strip():
|
51 |
+
return "Please enter a search query", []
|
52 |
+
|
53 |
+
# Create a result container
|
54 |
+
result_container = {"status": "Processing...", "data": []}
|
55 |
+
|
56 |
+
def search_worker():
|
57 |
+
try:
|
58 |
+
# Clean and prepare query
|
59 |
+
search_query = query.strip()
|
60 |
+
|
61 |
+
print(f"Starting search: {search_query}")
|
62 |
+
print(f"Location: {location}, Remote: {remote}, Level: {level}")
|
63 |
+
print(f"Using {'advanced' if use_llm else 'basic'} method")
|
64 |
+
|
65 |
+
if use_llm:
|
66 |
+
# Use advanced search with provided API keys
|
67 |
+
search_result = advanced_job_search(
|
68 |
+
query=search_query,
|
69 |
+
location=location,
|
70 |
+
remote=remote,
|
71 |
+
level=level,
|
72 |
+
use_llm=True,
|
73 |
+
serp_api_key=serp_api_key,
|
74 |
+
nebius_api_key=nebius_api_key
|
75 |
+
)
|
76 |
+
|
77 |
+
if search_result["success"]:
|
78 |
+
raw_results = search_result["raw_results"]
|
79 |
+
jobs_data = search_result["jobs"]
|
80 |
+
else:
|
81 |
+
result_container["status"] = f"Search failed: {search_result.get('error', 'Unknown error')}"
|
82 |
+
result_container["data"] = []
|
83 |
+
return
|
84 |
+
else:
|
85 |
+
# Use basic search with SerpAPI key only
|
86 |
+
raw_results = search_jobs(
|
87 |
+
query=search_query,
|
88 |
+
location=location,
|
89 |
+
remote=remote,
|
90 |
+
level=level,
|
91 |
+
serp_api_key=serp_api_key
|
92 |
+
)
|
93 |
+
if not raw_results or raw_results == "[]":
|
94 |
+
result_container["status"] = "No results found"
|
95 |
+
result_container["data"] = []
|
96 |
+
return
|
97 |
+
|
98 |
+
try:
|
99 |
+
jobs_data = json.loads(raw_results)
|
100 |
+
except json.JSONDecodeError:
|
101 |
+
result_container["status"] = "Error parsing results"
|
102 |
+
result_container["data"] = []
|
103 |
+
return
|
104 |
+
|
105 |
+
if not jobs_data:
|
106 |
+
result_container["status"] = "No jobs found matching your criteria"
|
107 |
+
result_container["data"] = []
|
108 |
+
return
|
109 |
+
|
110 |
+
if not isinstance(jobs_data, list):
|
111 |
+
result_container["status"] = "Invalid data format"
|
112 |
+
result_container["data"] = []
|
113 |
+
return
|
114 |
+
|
115 |
+
formatted_data = []
|
116 |
+
for job in jobs_data:
|
117 |
+
if not isinstance(job, dict):
|
118 |
+
continue
|
119 |
+
|
120 |
+
title = job.get("title") or job.get("job_title") or job.get("Job Title", "N/A")
|
121 |
+
company = job.get("company_name") or job.get("company") or job.get("Company Name", "N/A")
|
122 |
+
job_location = job.get("location") or job.get("Location", "N/A")
|
123 |
+
salary = job.get("salary") or job.get("Salary", "N/A")
|
124 |
+
is_remote = job.get("remote") or job.get("Remote", "No")
|
125 |
+
posted_date = job.get("posted_at") or job.get("posted_date") or job.get("Posted Date", "N/A")
|
126 |
+
apply_link = job.get("link") or job.get("apply_link") or job.get("Apply Link", "#")
|
127 |
+
|
128 |
+
if apply_link and apply_link != "N/A" and apply_link != "#":
|
129 |
+
if '<a href="' in apply_link:
|
130 |
+
apply_link = apply_link.replace('<a href="', '').replace('" target="_blank">Apply</a>', '').replace('"', '')
|
131 |
+
formatted_link = f'<a href="{apply_link}" target="_blank" style="color: #3b82f6; text-decoration: none; font-weight: 500;">Apply β</a>'
|
132 |
+
else:
|
133 |
+
formatted_link = "N/A"
|
134 |
+
|
135 |
+
location_display = job_location
|
136 |
+
if location_display.lower() in ["anywhere", "remote"]:
|
137 |
+
location_display = "π Remote Worldwide"
|
138 |
+
elif "remote" in location_display.lower():
|
139 |
+
location_display = f"π {location_display}"
|
140 |
+
|
141 |
+
remote_status = "No"
|
142 |
+
if str(is_remote).lower() in ["yes", "true", "remote", "1"] or "remote" in job_location.lower():
|
143 |
+
remote_status = "Yes"
|
144 |
+
|
145 |
+
row = [
|
146 |
+
title,
|
147 |
+
company,
|
148 |
+
location_display,
|
149 |
+
salary if include_salary else "",
|
150 |
+
remote_status,
|
151 |
+
posted_date,
|
152 |
+
formatted_link
|
153 |
+
]
|
154 |
+
|
155 |
+
# Remove salary column if not requested
|
156 |
+
if not include_salary:
|
157 |
+
row.pop(3)
|
158 |
+
|
159 |
+
formatted_data.append(row)
|
160 |
+
|
161 |
+
result_container["status"] = f"Found {len(formatted_data)} jobs using {'advanced search' if use_llm else 'basic search'}"
|
162 |
+
result_container["data"] = formatted_data
|
163 |
+
|
164 |
+
except Exception as e:
|
165 |
+
print(f"Search error: {str(e)}")
|
166 |
+
import traceback
|
167 |
+
traceback.print_exc()
|
168 |
+
result_container["status"] = f"Search failed: {str(e)}"
|
169 |
+
result_container["data"] = []
|
170 |
+
|
171 |
+
# Run search in thread with timeout
|
172 |
+
search_thread = threading.Thread(target=search_worker)
|
173 |
+
search_thread.daemon = True
|
174 |
+
search_thread.start()
|
175 |
+
|
176 |
+
# Wait for completion or timeout
|
177 |
+
search_thread.join(timeout)
|
178 |
+
|
179 |
+
if search_thread.is_alive():
|
180 |
+
return "Search timed out. Please try again with a more specific query.", []
|
181 |
+
|
182 |
+
return result_container["status"], result_container["data"]
|
183 |
+
|
184 |
+
def create_interface():
|
185 |
+
"""Create Gradio interface with API key configuration"""
|
186 |
+
|
187 |
+
# Enhanced CSS for better styling
|
188 |
+
css = """
|
189 |
+
/* Container styling */
|
190 |
+
.gradio-container {
|
191 |
+
max-width: 1400px !important;
|
192 |
+
margin: 0 auto !important;
|
193 |
+
}
|
194 |
+
|
195 |
+
/* API key section styling */
|
196 |
+
.api-config-section {
|
197 |
+
margin-bottom: 20px;
|
198 |
+
padding: 15px;
|
199 |
+
border-radius: 8px;
|
200 |
+
background-color: var(--background-fill-secondary);
|
201 |
+
border: 1px solid var(--border-color-primary);
|
202 |
+
}
|
203 |
+
|
204 |
+
.api-config-section input[type="password"] {
|
205 |
+
font-family: monospace;
|
206 |
+
letter-spacing: 2px;
|
207 |
+
}
|
208 |
+
|
209 |
+
/* Status box styling */
|
210 |
+
.status-box {
|
211 |
+
padding: 15px;
|
212 |
+
border-radius: 8px;
|
213 |
+
margin: 15px 0;
|
214 |
+
font-weight: 500;
|
215 |
+
background-color: var(--background-fill-secondary);
|
216 |
+
border: 1px solid var(--border-color-primary);
|
217 |
+
}
|
218 |
+
|
219 |
+
/* Table styling */
|
220 |
+
.results-table {
|
221 |
+
margin-top: 20px;
|
222 |
+
margin-bottom: 30px;
|
223 |
+
}
|
224 |
+
|
225 |
+
.results-table table {
|
226 |
+
width: 100% !important;
|
227 |
+
table-layout: auto !important;
|
228 |
+
border-collapse: separate !important;
|
229 |
+
border-spacing: 0 !important;
|
230 |
+
}
|
231 |
+
|
232 |
+
.results-table th,
|
233 |
+
.results-table td {
|
234 |
+
padding: 12px 8px !important;
|
235 |
+
vertical-align: top !important;
|
236 |
+
word-wrap: break-word !important;
|
237 |
+
white-space: normal !important;
|
238 |
+
}
|
239 |
+
|
240 |
+
/* Column width optimization */
|
241 |
+
.results-table th:nth-child(1), .results-table td:nth-child(1) { width: 25%; }
|
242 |
+
.results-table th:nth-child(2), .results-table td:nth-child(2) { width: 20%; }
|
243 |
+
.results-table th:nth-child(3), .results-table td:nth-child(3) { width: 15%; }
|
244 |
+
.results-table th:nth-child(4), .results-table td:nth-child(4) { width: 15%; }
|
245 |
+
.results-table th:nth-child(5), .results-table td:nth-child(5) { width: 10%; }
|
246 |
+
.results-table th:nth-child(6), .results-table td:nth-child(6) { width: 10%; }
|
247 |
+
.results-table th:nth-child(7), .results-table td:nth-child(7) { width: 5%; }
|
248 |
+
"""
|
249 |
+
|
250 |
+
theme = gr.themes.Default(
|
251 |
+
primary_hue="blue",
|
252 |
+
secondary_hue="slate",
|
253 |
+
neutral_hue="slate",
|
254 |
+
font=gr.themes.GoogleFont("Inter"),
|
255 |
+
)
|
256 |
+
|
257 |
+
with gr.Blocks(title="Job Search Assistant", css=css, theme=theme) as interface:
|
258 |
+
# Header
|
259 |
+
with gr.Group(elem_classes=["header-section"]):
|
260 |
+
gr.Markdown("# π Advanced Job Search Assistant")
|
261 |
+
gr.Markdown("Find software engineering jobs with AI-powered filtering and intelligent search capabilities.")
|
262 |
+
|
263 |
+
# API Configuration Section
|
264 |
+
with gr.Group(elem_classes=["api-config-section"]):
|
265 |
+
gr.Markdown("### π API Configuration")
|
266 |
+
gr.Markdown("""
|
267 |
+
This tool requires two API keys to function:
|
268 |
+
1. [SerpAPI](https://serpapi.com) - For job searching (Required)
|
269 |
+
2. [Nebius](https://nebius.ai) - For AI-powered filtering (Required for Advanced Search)
|
270 |
+
|
271 |
+
Get your API keys from the links above.
|
272 |
+
""")
|
273 |
+
|
274 |
+
with gr.Row():
|
275 |
+
serp_api_key = gr.Textbox(
|
276 |
+
label="SerpAPI Key",
|
277 |
+
placeholder="Enter your SerpAPI key",
|
278 |
+
type="password",
|
279 |
+
value=os.environ.get("SERP_API_KEY", ""),
|
280 |
+
info="Required for all searches"
|
281 |
+
)
|
282 |
+
nebius_api_key = gr.Textbox(
|
283 |
+
label="Nebius API Key",
|
284 |
+
placeholder="Enter your Nebius API key",
|
285 |
+
type="password",
|
286 |
+
value=os.environ.get("NEBIUS_API_KEY", ""),
|
287 |
+
info="Required for advanced AI-powered search"
|
288 |
+
)
|
289 |
+
|
290 |
+
# API Status Display
|
291 |
+
api_status = gr.Markdown("β οΈ Please enter your API keys to start searching")
|
292 |
+
|
293 |
+
with gr.Row():
|
294 |
+
with gr.Column(scale=1):
|
295 |
+
# Input section
|
296 |
+
gr.Markdown("### Search Parameters")
|
297 |
+
|
298 |
+
search_input = gr.Textbox(
|
299 |
+
label="Job Title/Keywords",
|
300 |
+
placeholder="e.g., Python Developer, Full Stack Engineer, DevOps",
|
301 |
+
lines=2,
|
302 |
+
info="Enter job title, skills, or keywords"
|
303 |
+
)
|
304 |
+
|
305 |
+
# Method selection
|
306 |
+
with gr.Group(elem_classes=["method-radio"]):
|
307 |
+
gr.Markdown("**Search Method Selection**")
|
308 |
+
search_method = gr.Radio(
|
309 |
+
choices=["Advanced Search (AI-Enhanced)", "Basic Search (Fast)"],
|
310 |
+
value="Advanced Search (AI-Enhanced)",
|
311 |
+
label="Choose Search Method",
|
312 |
+
info="β’ Advanced: Uses AI for intelligent parsing and better filtering (30-60s)\nβ’ Basic: Fast search with standard filtering (10-30s)",
|
313 |
+
show_label=True
|
314 |
+
)
|
315 |
+
|
316 |
+
with gr.Row():
|
317 |
+
exp_level = gr.Dropdown(
|
318 |
+
choices=["Junior", "Mid-Level", "Senior", "Lead", "Principal"],
|
319 |
+
value="Senior",
|
320 |
+
label="Experience Level",
|
321 |
+
info="Filter by experience level"
|
322 |
+
)
|
323 |
+
|
324 |
+
location = gr.Dropdown(
|
325 |
+
choices=[
|
326 |
+
"Canada",
|
327 |
+
"United States",
|
328 |
+
"United Kingdom",
|
329 |
+
"Australia",
|
330 |
+
"Germany",
|
331 |
+
"Netherlands",
|
332 |
+
"Remote Worldwide"
|
333 |
+
],
|
334 |
+
value="Canada",
|
335 |
+
label="Location",
|
336 |
+
info="Preferred job location"
|
337 |
+
)
|
338 |
+
|
339 |
+
with gr.Row():
|
340 |
+
show_salary = gr.Checkbox(
|
341 |
+
label="Include Salary Info",
|
342 |
+
value=True,
|
343 |
+
info="Show salary information when available"
|
344 |
+
)
|
345 |
+
|
346 |
+
remote_only = gr.Checkbox(
|
347 |
+
label="Remote Positions Only",
|
348 |
+
value=False,
|
349 |
+
info="Filter ONLY for remote work opportunities"
|
350 |
+
)
|
351 |
+
|
352 |
+
search_button = gr.Button(
|
353 |
+
"π Search Jobs",
|
354 |
+
variant="primary",
|
355 |
+
size="lg"
|
356 |
+
)
|
357 |
+
|
358 |
+
# Status display
|
359 |
+
status_display = gr.Textbox(
|
360 |
+
label="Search Status",
|
361 |
+
interactive=False,
|
362 |
+
info="Current search status and results count",
|
363 |
+
elem_classes=["status-box"]
|
364 |
+
)
|
365 |
+
|
366 |
+
# Results section
|
367 |
+
with gr.Group(elem_classes=["section-gap"]):
|
368 |
+
gr.Markdown("### Search Results")
|
369 |
+
|
370 |
+
results_table = gr.DataFrame(
|
371 |
+
label="Job Listings",
|
372 |
+
wrap=False,
|
373 |
+
interactive=False,
|
374 |
+
elem_classes=["results-table"],
|
375 |
+
headers=["Job Title", "Company", "Location", "Salary", "Remote", "Posted", "Apply"],
|
376 |
+
datatype=["str", "str", "str", "str", "str", "str", "html"],
|
377 |
+
max_height=600
|
378 |
+
)
|
379 |
+
|
380 |
+
# Search functionality
|
381 |
+
def handle_search(query, method, salary, loc, level, remote, serp_key, nebius_key):
|
382 |
+
if not serp_key:
|
383 |
+
return "Please enter your SerpAPI key", gr.DataFrame(value=[])
|
384 |
+
if method == "Advanced Search (AI-Enhanced)" and not nebius_key:
|
385 |
+
return "Please enter your Nebius API key for advanced search", gr.DataFrame(value=[])
|
386 |
+
|
387 |
+
if not query or not query.strip():
|
388 |
+
return "Please enter a search query", gr.DataFrame(value=[])
|
389 |
+
|
390 |
+
# Determine which method to use
|
391 |
+
use_advanced = method == "Advanced Search (AI-Enhanced)"
|
392 |
+
|
393 |
+
# Show what we're searching for
|
394 |
+
search_info = f"Searching for: '{query}' | Location: {loc} | Level: {level} | Remote Only: {'Yes' if remote else 'No'}"
|
395 |
+
print(search_info)
|
396 |
+
|
397 |
+
# Perform search with direct API key passing
|
398 |
+
status, data = process_search_with_timeout(
|
399 |
+
query=query,
|
400 |
+
include_salary=salary,
|
401 |
+
location=loc,
|
402 |
+
level=level,
|
403 |
+
remote=remote,
|
404 |
+
timeout=60 if use_advanced else 30,
|
405 |
+
use_llm=use_advanced,
|
406 |
+
serp_api_key=serp_key,
|
407 |
+
nebius_api_key=nebius_key
|
408 |
+
)
|
409 |
+
|
410 |
+
# Create proper DataFrame with headers
|
411 |
+
if salary:
|
412 |
+
headers = ["Job Title", "Company", "Location", "Salary", "Remote", "Posted", "Apply"]
|
413 |
+
column_types = ["str", "str", "str", "str", "str", "str", "html"]
|
414 |
+
else:
|
415 |
+
headers = ["Job Title", "Company", "Location", "Remote", "Posted", "Apply"]
|
416 |
+
column_types = ["str", "str", "str", "str", "str", "html"]
|
417 |
+
|
418 |
+
if data:
|
419 |
+
return status, gr.DataFrame(
|
420 |
+
value=data,
|
421 |
+
headers=headers,
|
422 |
+
datatype=column_types
|
423 |
+
)
|
424 |
+
else:
|
425 |
+
return status, gr.DataFrame(
|
426 |
+
value=[],
|
427 |
+
headers=headers,
|
428 |
+
datatype=column_types
|
429 |
+
)
|
430 |
+
|
431 |
+
# Connect search button
|
432 |
+
search_button.click(
|
433 |
+
fn=handle_search,
|
434 |
+
inputs=[
|
435 |
+
search_input,
|
436 |
+
search_method,
|
437 |
+
show_salary,
|
438 |
+
location,
|
439 |
+
exp_level,
|
440 |
+
remote_only,
|
441 |
+
serp_api_key,
|
442 |
+
nebius_api_key
|
443 |
+
],
|
444 |
+
outputs=[status_display, results_table],
|
445 |
+
show_progress=True
|
446 |
+
)
|
447 |
+
|
448 |
+
# Example searches
|
449 |
+
with gr.Group(elem_classes=["example-buttons"]):
|
450 |
+
gr.Markdown("### π Quick Examples")
|
451 |
+
gr.Markdown("*Click any example to populate the search form*")
|
452 |
+
|
453 |
+
with gr.Row():
|
454 |
+
example_btn1 = gr.Button("π Python Developer (Remote)", size="sm", variant="secondary")
|
455 |
+
example_btn2 = gr.Button("βοΈ Full Stack Engineer", size="sm", variant="secondary")
|
456 |
+
example_btn3 = gr.Button("π§ DevOps Engineer (Senior)", size="sm", variant="secondary")
|
457 |
+
example_btn4 = gr.Button("β‘ React Developer (Entry)", size="sm", variant="secondary")
|
458 |
+
|
459 |
+
# Example button functions
|
460 |
+
def set_example_1():
|
461 |
+
return "Python Developer", "Advanced Search (AI-Enhanced)", True, "Canada", "Senior", True
|
462 |
+
|
463 |
+
def set_example_2():
|
464 |
+
return "Full Stack Engineer", "Basic Search (Fast)", True, "United States", "Mid-Level", False
|
465 |
+
|
466 |
+
def set_example_3():
|
467 |
+
return "DevOps Engineer", "Advanced Search (AI-Enhanced)", False, "Remote Worldwide", "Senior", True
|
468 |
+
|
469 |
+
def set_example_4():
|
470 |
+
return "React Developer", "Basic Search (Fast)", True, "United Kingdom", "Junior", False
|
471 |
+
|
472 |
+
# Connect example buttons
|
473 |
+
example_btn1.click(
|
474 |
+
fn=set_example_1,
|
475 |
+
outputs=[search_input, search_method, show_salary, location, exp_level, remote_only]
|
476 |
+
)
|
477 |
+
|
478 |
+
example_btn2.click(
|
479 |
+
fn=set_example_2,
|
480 |
+
outputs=[search_input, search_method, show_salary, location, exp_level, remote_only]
|
481 |
+
)
|
482 |
+
|
483 |
+
example_btn3.click(
|
484 |
+
fn=set_example_3,
|
485 |
+
outputs=[search_input, search_method, show_salary, location, exp_level, remote_only]
|
486 |
+
)
|
487 |
+
|
488 |
+
example_btn4.click(
|
489 |
+
fn=set_example_4,
|
490 |
+
outputs=[search_input, search_method, show_salary, location, exp_level, remote_only]
|
491 |
+
)
|
492 |
+
|
493 |
+
# API key validation function
|
494 |
+
def validate_keys(serp_key, nebius_key):
|
495 |
+
if not serp_key:
|
496 |
+
return "β οΈ SerpAPI key is required for all searches"
|
497 |
+
if not nebius_key:
|
498 |
+
return "β οΈ Nebius API key is required for advanced search"
|
499 |
+
return "β
API keys configured"
|
500 |
+
|
501 |
+
# Connect key validation
|
502 |
+
for key in [serp_api_key, nebius_api_key]:
|
503 |
+
key.change(
|
504 |
+
fn=validate_keys,
|
505 |
+
inputs=[serp_api_key, nebius_api_key],
|
506 |
+
outputs=api_status
|
507 |
+
)
|
508 |
+
|
509 |
+
# Footer
|
510 |
+
with gr.Group(elem_classes=["footer-info"]):
|
511 |
+
gr.Markdown(f"""
|
512 |
+
### π Session Information
|
513 |
+
|
514 |
+
**Current Time:** {CURRENT_UTC_TIME} UTC
|
515 |
+
**User:** {CURRENT_USER}
|
516 |
+
|
517 |
+
### π Search Methods
|
518 |
+
|
519 |
+
**π€ Advanced Search (AI-Enhanced):**
|
520 |
+
- Uses LLM agents for intelligent job parsing and filtering
|
521 |
+
- Better understanding of remote work requirements
|
522 |
+
- More accurate location and skill matching
|
523 |
+
- Takes 30-60 seconds but provides higher quality results
|
524 |
+
|
525 |
+
**β‘ Basic Search (Fast):**
|
526 |
+
- Direct API-based search with standard filtering
|
527 |
+
- Quick results in 10-30 seconds
|
528 |
+
- Good for broad searches and quick lookups
|
529 |
+
|
530 |
+
### π‘ Tips
|
531 |
+
- Use specific keywords for better results
|
532 |
+
- Enable "Remote Positions Only" for work-from-home opportunities
|
533 |
+
- Advanced search works better for complex requirements
|
534 |
+
- Try different experience levels to see more opportunities
|
535 |
+
|
536 |
+
### π API Key Security
|
537 |
+
Your API keys are:
|
538 |
+
- Never stored on our servers
|
539 |
+
- Only kept in memory during your active search
|
540 |
+
- Cleared after each search
|
541 |
+
- Required for each new search
|
542 |
+
- Transmitted securely via HTTPS
|
543 |
+
- Not visible to other users
|
544 |
+
""")
|
545 |
+
|
546 |
+
return interface
|
547 |
+
|
548 |
+
# Main execution
|
549 |
+
if __name__ == "__main__":
|
550 |
+
print("Starting Job Search Application...")
|
551 |
+
print(f"Current time: {CURRENT_UTC_TIME}")
|
552 |
+
|
553 |
+
try:
|
554 |
+
# Create and launch interface
|
555 |
+
demo = create_interface()
|
556 |
+
|
557 |
+
# Launch with correct parameters
|
558 |
+
demo.launch(
|
559 |
+
server_name="0.0.0.0",
|
560 |
+
server_port=7860,
|
561 |
+
share=False,
|
562 |
+
show_error=True,
|
563 |
+
)
|
564 |
+
|
565 |
+
except Exception as e:
|
566 |
+
print(f"Failed to start application: {str(e)}")
|
567 |
+
import traceback
|
568 |
+
traceback.print_exc()
|
requirements.txt
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core LangChain packages
|
2 |
+
langchain>=0.1.0
|
3 |
+
langchain-core>=0.1.7
|
4 |
+
langchain-community>=0.0.10
|
5 |
+
langchain-openai>=0.0.8
|
6 |
+
|
7 |
+
# API and HTTP related
|
8 |
+
openai>=1.3.7
|
9 |
+
google-search-results>=2.4.2
|
10 |
+
requests>=2.31.0
|
11 |
+
httpx>=0.24.0
|
12 |
+
|
13 |
+
# Environment and utils
|
14 |
+
python-dotenv>=1.0.0
|
15 |
+
pytz>=2023.3
|
16 |
+
|
17 |
+
# Type hints and validation
|
18 |
+
pydantic>=2.0.0
|
19 |
+
pydantic-core>=2.0.0
|
20 |
+
typing-extensions>=4.5.0
|
21 |
+
|
22 |
+
#gradio
|
23 |
+
gradio>=4.14.0
|
24 |
+
|
25 |
+
#HF
|
26 |
+
huggingface-hub>=0.19.0
|