Spaces:
Sleeping
Sleeping
added files
Browse files- .env +11 -0
- .gitignore +70 -0
- app.py +240 -0
- config.py +26 -0
- content.txt +118 -0
- eval.py +193 -0
- requirements.txt +34 -0
.env
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# API Keys
|
2 |
+
GEMINI_API_KEY="AIzaSyD031DqbvCunm0WT0sOeg1ulMzP2wOR2H0"
|
3 |
+
|
4 |
+
# Application Configuration
|
5 |
+
SHL_CATALOG_URL=https://www.shl.com/solutions/products/product-catalog/
|
6 |
+
MAX_RECOMMENDATIONS=10
|
7 |
+
|
8 |
+
# Optional - Uncomment and set these if needed
|
9 |
+
# BACKEND_PORT=8000
|
10 |
+
# FRONTEND_PORT=8501
|
11 |
+
# LOG_LEVEL=INFO
|
.gitignore
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dependency directories
|
2 |
+
node_modules/
|
3 |
+
jspm_packages/
|
4 |
+
bower_components/
|
5 |
+
|
6 |
+
# Distribution directories
|
7 |
+
dist/
|
8 |
+
build/
|
9 |
+
out/
|
10 |
+
|
11 |
+
/env
|
12 |
+
|
13 |
+
# Environment files
|
14 |
+
.env
|
15 |
+
.env.local
|
16 |
+
.env.development.local
|
17 |
+
.env.test.local
|
18 |
+
.env.production.local
|
19 |
+
|
20 |
+
# IDE and editor files
|
21 |
+
.idea/
|
22 |
+
.vscode/
|
23 |
+
*.sublime-project
|
24 |
+
*.sublime-workspace
|
25 |
+
.project
|
26 |
+
.classpath
|
27 |
+
.settings/
|
28 |
+
*.suo
|
29 |
+
*.ntvs*
|
30 |
+
*.njsproj
|
31 |
+
*.sln
|
32 |
+
*.sw?
|
33 |
+
|
34 |
+
# Logs
|
35 |
+
logs
|
36 |
+
*.log
|
37 |
+
npm-debug.log*
|
38 |
+
yarn-debug.log*
|
39 |
+
yarn-error.log*
|
40 |
+
|
41 |
+
# Operating system files
|
42 |
+
.DS_Store
|
43 |
+
.DS_Store?
|
44 |
+
._*
|
45 |
+
.Spotlight-V100
|
46 |
+
.Trashes
|
47 |
+
ehthumbs.db
|
48 |
+
Thumbs.db
|
49 |
+
|
50 |
+
# Optional caches
|
51 |
+
.npm
|
52 |
+
.eslintcache
|
53 |
+
.stylelintcache
|
54 |
+
.rpt2_cache/
|
55 |
+
.rts2_cache_cjs/
|
56 |
+
.rts2_cache_es/
|
57 |
+
.rts2_cache_umd/
|
58 |
+
|
59 |
+
# Test coverage directory
|
60 |
+
coverage/
|
61 |
+
|
62 |
+
# Build files
|
63 |
+
*.tsbuildinfo
|
64 |
+
|
65 |
+
# Debug files
|
66 |
+
.node_repl_history
|
67 |
+
|
68 |
+
# Package files
|
69 |
+
*.tgz
|
70 |
+
.yarn-integrity
|
app.py
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import numpy as np
|
4 |
+
from typing import List, Dict, Any, Optional
|
5 |
+
import pandas as pd
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
import faiss
|
8 |
+
from fastapi import FastAPI, Query, HTTPException
|
9 |
+
from pydantic import BaseModel
|
10 |
+
import google.generativeai as genai
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Configure Google Gemini API
|
17 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
18 |
+
if not GEMINI_API_KEY:
|
19 |
+
raise ValueError("GEMINI_API_KEY environment variable not set")
|
20 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
21 |
+
|
22 |
+
# Initialize FastAPI app
|
23 |
+
app = FastAPI(
|
24 |
+
title="SHL Assessment Recommendation API",
|
25 |
+
description="API for recommending SHL assessments based on job descriptions or queries",
|
26 |
+
version="1.0.0"
|
27 |
+
)
|
28 |
+
|
29 |
+
# Path to the data file
|
30 |
+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
31 |
+
DATA_DIR = os.path.join(ROOT_DIR, "data", "processed")
|
32 |
+
# ASSESSMENTS_PATH = os.path.join(DATA_DIR, "shl_test_solutions.csv")
|
33 |
+
ASSESSMENTS_PATH = os.path.join(ROOT_DIR, "data", "processed", "shl_test_solutions.csv")
|
34 |
+
|
35 |
+
# ASSESSMENTS_PATH = r"data\processed\shl_test_solutions.csv"
|
36 |
+
|
37 |
+
# Ensure data directory exists
|
38 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
39 |
+
# Load and prepare data
|
40 |
+
class RecommendationSystem:
|
41 |
+
def __init__(self, data_path: str):
|
42 |
+
self.df = pd.read_csv(data_path)
|
43 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
44 |
+
|
45 |
+
# Clean and prepare data
|
46 |
+
self.prepare_data()
|
47 |
+
|
48 |
+
# Create embeddings
|
49 |
+
self.create_embeddings()
|
50 |
+
|
51 |
+
# Initialize Gemini model for query enhancement
|
52 |
+
self.gemini_model = genai.GenerativeModel('gemini-1.5-pro')
|
53 |
+
|
54 |
+
def prepare_data(self):
|
55 |
+
"""Clean and prepare the assessment data"""
|
56 |
+
# Ensure all text columns are strings
|
57 |
+
text_cols = ['name', 'description', 'job_levels', 'test_types_expanded']
|
58 |
+
for col in text_cols:
|
59 |
+
if col in self.df.columns:
|
60 |
+
self.df[col] = self.df[col].fillna('').astype(str)
|
61 |
+
|
62 |
+
# Extract duration in minutes as numeric value
|
63 |
+
self.df['duration_minutes'] = self.df['duration'].apply(
|
64 |
+
lambda x: int(re.search(r'(\d+)', str(x)).group(1))
|
65 |
+
if isinstance(x, str) and re.search(r'(\d+)', str(x))
|
66 |
+
else 60 # Default value
|
67 |
+
)
|
68 |
+
|
69 |
+
def create_embeddings(self):
|
70 |
+
"""Create embeddings for assessments"""
|
71 |
+
# Create rich text representation for each assessment
|
72 |
+
self.df['combined_text'] = self.df.apply(
|
73 |
+
lambda row: f"Assessment: {row['name']}. "
|
74 |
+
f"Description: {row['description']}. "
|
75 |
+
f"Job Levels: {row['job_levels']}. "
|
76 |
+
f"Test Types: {row['test_types_expanded']}. "
|
77 |
+
f"Duration: {row['duration']}.",
|
78 |
+
axis=1
|
79 |
+
)
|
80 |
+
|
81 |
+
# Generate embeddings
|
82 |
+
print("Generating embeddings for assessments...")
|
83 |
+
self.embeddings = self.model.encode(self.df['combined_text'].tolist())
|
84 |
+
|
85 |
+
# Create FAISS index for fast similarity search
|
86 |
+
self.dimension = self.embeddings.shape[1]
|
87 |
+
self.index = faiss.IndexFlatL2(self.dimension)
|
88 |
+
self.index.add(np.array(self.embeddings).astype('float32'))
|
89 |
+
print(f"Created FAISS index with {len(self.df)} assessments")
|
90 |
+
|
91 |
+
def enhance_query(self, query: str) -> str:
|
92 |
+
"""Use Gemini to enhance the query with assessment-relevant terms"""
|
93 |
+
prompt = f"""
|
94 |
+
I need to find SHL assessments based on this query: "{query}"
|
95 |
+
|
96 |
+
Please reformulate this query to include specific skills, job roles, and assessment criteria
|
97 |
+
that would help in finding relevant technical assessments. Focus on keywords like programming
|
98 |
+
languages, technical skills, job levels, and any time constraints mentioned.
|
99 |
+
|
100 |
+
Return only the reformulated query without any explanations or additional text.
|
101 |
+
"""
|
102 |
+
|
103 |
+
try:
|
104 |
+
response = self.gemini_model.generate_content(prompt)
|
105 |
+
enhanced_query = response.text.strip()
|
106 |
+
print(f"Original query: {query}")
|
107 |
+
print(f"Enhanced query: {enhanced_query}")
|
108 |
+
return enhanced_query
|
109 |
+
except Exception as e:
|
110 |
+
print(f"Error enhancing query with Gemini: {e}")
|
111 |
+
return query # Return original query if enhancement fails
|
112 |
+
|
113 |
+
def parse_duration_constraint(self, query: str) -> Optional[int]:
|
114 |
+
"""Extract duration constraint from query"""
|
115 |
+
# Look for patterns like "within 45 minutes", "less than 30 minutes", etc.
|
116 |
+
patterns = [
|
117 |
+
r"(?:within|in|under|less than|no more than)\s+(\d+)\s+(?:min|mins|minutes)",
|
118 |
+
r"(\d+)\s+(?:min|mins|minutes)(?:\s+(?:or less|max|maximum|limit))",
|
119 |
+
r"(?:max|maximum|limit)(?:\s+(?:of|is))?\s+(\d+)\s+(?:min|mins|minutes)",
|
120 |
+
r"(?:time limit|duration)(?:\s+(?:of|is))?\s+(\d+)\s+(?:min|mins|minutes)",
|
121 |
+
r"(?:completed in|takes|duration of)\s+(\d+)\s+(?:min|mins|minutes)"
|
122 |
+
]
|
123 |
+
|
124 |
+
for pattern in patterns:
|
125 |
+
match = re.search(pattern, query, re.IGNORECASE)
|
126 |
+
if match:
|
127 |
+
return int(match.group(1))
|
128 |
+
|
129 |
+
return None
|
130 |
+
|
131 |
+
def recommend(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
132 |
+
"""Recommend assessments based on query"""
|
133 |
+
# Enhance query using Gemini
|
134 |
+
enhanced_query = self.enhance_query(query)
|
135 |
+
|
136 |
+
# Extract duration constraint if any
|
137 |
+
duration_limit = self.parse_duration_constraint(query)
|
138 |
+
|
139 |
+
# Generate embedding for the query
|
140 |
+
query_embedding = self.model.encode([enhanced_query])
|
141 |
+
|
142 |
+
# Search for similar assessments
|
143 |
+
D, I = self.index.search(np.array(query_embedding).astype('float32'), len(self.df))
|
144 |
+
|
145 |
+
# Get the indices of the most similar assessments
|
146 |
+
indices = I[0]
|
147 |
+
|
148 |
+
# Apply duration filter if specified
|
149 |
+
if duration_limit:
|
150 |
+
filtered_indices = [
|
151 |
+
idx for idx in indices
|
152 |
+
if self.df.iloc[idx]['duration_minutes'] <= duration_limit
|
153 |
+
]
|
154 |
+
indices = filtered_indices if filtered_indices else indices
|
155 |
+
|
156 |
+
# Prepare results, limiting to max_results
|
157 |
+
results = []
|
158 |
+
for idx in indices[:max_results]:
|
159 |
+
assessment = self.df.iloc[idx]
|
160 |
+
results.append({
|
161 |
+
"name": assessment["name"],
|
162 |
+
"url": assessment["url"],
|
163 |
+
"remote_testing": assessment["remote_testing"],
|
164 |
+
"adaptive_irt": assessment["adaptive_irt"],
|
165 |
+
"duration": assessment["duration"],
|
166 |
+
"test_types": assessment["test_types"],
|
167 |
+
"test_types_expanded": assessment["test_types_expanded"],
|
168 |
+
"description": assessment["description"],
|
169 |
+
"job_levels": assessment["job_levels"],
|
170 |
+
"similarity_score": float(1.0 - (D[0][list(indices).index(idx)] / 100)) # Normalize to 0-1
|
171 |
+
})
|
172 |
+
|
173 |
+
return results
|
174 |
+
|
175 |
+
# Initialize the recommendation system
|
176 |
+
try:
|
177 |
+
recommender = RecommendationSystem(ASSESSMENTS_PATH)
|
178 |
+
print("Recommendation system initialized successfully")
|
179 |
+
except Exception as e:
|
180 |
+
print(f"Error initializing recommendation system: {e}")
|
181 |
+
recommender = None
|
182 |
+
|
183 |
+
# Define API response model
|
184 |
+
class AssessmentRecommendation(BaseModel):
|
185 |
+
name: str
|
186 |
+
url: str
|
187 |
+
remote_testing: str
|
188 |
+
adaptive_irt: str
|
189 |
+
duration: str
|
190 |
+
test_types: str
|
191 |
+
test_types_expanded: str
|
192 |
+
description: str
|
193 |
+
job_levels: str
|
194 |
+
similarity_score: float
|
195 |
+
|
196 |
+
class RecommendationResponse(BaseModel):
|
197 |
+
query: str
|
198 |
+
enhanced_query: str
|
199 |
+
recommendations: List[AssessmentRecommendation]
|
200 |
+
|
201 |
+
# Define API endpoints
|
202 |
+
@app.get("/", response_model=dict)
|
203 |
+
def root():
|
204 |
+
"""Root endpoint that returns API information"""
|
205 |
+
return {
|
206 |
+
"name": "SHL Assessment Recommendation API",
|
207 |
+
"version": "1.0.0",
|
208 |
+
"endpoints": {
|
209 |
+
"/recommend": "GET endpoint for assessment recommendations"
|
210 |
+
}
|
211 |
+
}
|
212 |
+
|
213 |
+
@app.get("/recommend", response_model=RecommendationResponse)
|
214 |
+
def recommend(
|
215 |
+
query: str = Query(..., description="Natural language query or job description text"),
|
216 |
+
max_results: int = Query(10, ge=1, le=10, description="Maximum number of results to return")
|
217 |
+
):
|
218 |
+
"""Recommend SHL assessments based on query"""
|
219 |
+
if not recommender:
|
220 |
+
raise HTTPException(
|
221 |
+
status_code=500,
|
222 |
+
detail="Recommendation system not initialized properly"
|
223 |
+
)
|
224 |
+
|
225 |
+
# Get enhanced query for transparency
|
226 |
+
enhanced_query = recommender.enhance_query(query)
|
227 |
+
|
228 |
+
# Get recommendations
|
229 |
+
recommendations = recommender.recommend(query, max_results=max_results)
|
230 |
+
|
231 |
+
return {
|
232 |
+
"query": query,
|
233 |
+
"enhanced_query": enhanced_query,
|
234 |
+
"recommendations": recommendations
|
235 |
+
}
|
236 |
+
|
237 |
+
# Run the application
|
238 |
+
if __name__ == "__main__":
|
239 |
+
import uvicorn
|
240 |
+
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
config.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Configuration settings for the SHL Assessment Recommendation System."""
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
# Load environment variables from .env file
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
# API and service configurations
|
9 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
10 |
+
SHL_CATALOG_URL = "https://www.shl.com/solutions/products/product-catalog/"
|
11 |
+
|
12 |
+
# Data directories
|
13 |
+
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
|
14 |
+
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
|
15 |
+
RAW_DATA_DIR = os.path.join(DATA_DIR, "raw")
|
16 |
+
PROCESSED_DATA_DIR = os.path.join(DATA_DIR, "processed")
|
17 |
+
MODELS_DIR = os.path.join(PROJECT_ROOT, "models")
|
18 |
+
|
19 |
+
# Create directories if they don't exist
|
20 |
+
for directory in [DATA_DIR, RAW_DATA_DIR, PROCESSED_DATA_DIR, MODELS_DIR]:
|
21 |
+
os.makedirs(directory, exist_ok=True)
|
22 |
+
|
23 |
+
# Application settings
|
24 |
+
MAX_RECOMMENDATIONS = 10
|
25 |
+
DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
26 |
+
CHROMA_DB_PATH = os.path.join(MODELS_DIR, "chroma_db")
|
content.txt
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 🧠 SHL AI Intern RE Generative AI Assignment
|
2 |
+
|
3 |
+
## 📌 Task Overview: Build an SHL Assessment Recommendation System
|
4 |
+
|
5 |
+
Hiring managers often struggle to find the right assessments for roles they're hiring for. Currently, this process relies heavily on keyword searches and filters, making it inefficient.
|
6 |
+
|
7 |
+
Your task is to **build an intelligent recommendation system** that simplifies this process.
|
8 |
+
|
9 |
+
> Given a **natural language query**, a **job description (JD)**, or a **URL**, your application should return a list of relevant SHL assessments.
|
10 |
+
|
11 |
+
You can refer to SHL’s assessment catalog here:
|
12 |
+
👉 [SHL Product Catalog](https://www.shl.com/solutions/products/product-catalog/)
|
13 |
+
|
14 |
+
---
|
15 |
+
|
16 |
+
## ✅ Objectives
|
17 |
+
|
18 |
+
Build a **web application** that:
|
19 |
+
|
20 |
+
1. Accepts a **natural language query**, **job description text**, or a **job post URL**.
|
21 |
+
2. Returns a list of **1–10 most relevant SHL assessments**.
|
22 |
+
3. Displays the recommendations in **tabular format** with the following columns:
|
23 |
+
- **Assessment Name** (linked to SHL's catalog)
|
24 |
+
- **Remote Testing Support** (Yes/No)
|
25 |
+
- **Adaptive/IRT Support** (Yes/No)
|
26 |
+
- **Test Duration**
|
27 |
+
- **Test Type**
|
28 |
+
|
29 |
+
---
|
30 |
+
|
31 |
+
## 📤 Submission Guidelines
|
32 |
+
|
33 |
+
Submit the following via this [Microsoft Form](https://forms.office.com/r/Pq8dYPEGH4):
|
34 |
+
|
35 |
+
- **Hosted Web Demo URL** – Frontend that accepts queries and shows results
|
36 |
+
- **API Endpoint URL** – Accepts query/text and returns structured JSON
|
37 |
+
- **GitHub Repository URL** – Containing complete source code
|
38 |
+
- **1-Page Approach Document** – Include tools, libraries, and methods used
|
39 |
+
|
40 |
+
---
|
41 |
+
|
42 |
+
## 🧪 Evaluation Criteria
|
43 |
+
|
44 |
+
Your submission will be evaluated on:
|
45 |
+
|
46 |
+
### 🔍 Approach
|
47 |
+
- How the catalog was crawled/processed
|
48 |
+
- Data representation and search techniques
|
49 |
+
- Usage of LLM stack (e.g., LangChain, Gemini, etc.)
|
50 |
+
- Tracing and evaluation tools used
|
51 |
+
|
52 |
+
### 🎯 Accuracy
|
53 |
+
- Using benchmark sets
|
54 |
+
- Metrics:
|
55 |
+
- **Mean Recall@3**
|
56 |
+
- **MAP@3**
|
57 |
+
|
58 |
+
### 🧑💻 Demo Quality
|
59 |
+
- Working end-to-end solution
|
60 |
+
- Attention to usability and details
|
61 |
+
- Usage of low-code frameworks like **Streamlit** or **Gradio** is acceptable
|
62 |
+
|
63 |
+
---
|
64 |
+
|
65 |
+
## 📊 Accuracy Metrics
|
66 |
+
|
67 |
+
### Mean Recall@K
|
68 |
+
|
69 |
+
```text
|
70 |
+
Recall@K = (Number of relevant results in top K) / (Total number of relevant results)
|
71 |
+
|
72 |
+
Mean Recall@K = (1/N) * Σ Recall@K_i
|
73 |
+
Where:
|
74 |
+
|
75 |
+
N = number of test queries
|
76 |
+
|
77 |
+
Mean Average Precision@K (MAP@K)
|
78 |
+
text
|
79 |
+
Copy
|
80 |
+
Edit
|
81 |
+
AP@K = (1 / min(K, R)) * Σ (P(k) * rel(k)) for k = 1 to K
|
82 |
+
|
83 |
+
MAP@K = (1/N) * Σ AP@K_i
|
84 |
+
Where:
|
85 |
+
|
86 |
+
R = total number of relevant results
|
87 |
+
|
88 |
+
P(k) = Precision at rank k
|
89 |
+
|
90 |
+
rel(k) = 1 if the item at rank k is relevant, else 0
|
91 |
+
|
92 |
+
N = total number of queries
|
93 |
+
|
94 |
+
The higher the Mean Recall@K and MAP@K, the better the performance.
|
95 |
+
|
96 |
+
📄 Example Queries
|
97 |
+
Here are some test cases to evaluate your system:
|
98 |
+
|
99 |
+
"I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment(s) that can be completed in 40 minutes."
|
100 |
+
|
101 |
+
"Looking to hire mid-level professionals proficient in Python, SQL and JavaScript. Need an assessment package that can test all skills with max duration of 60 minutes."
|
102 |
+
|
103 |
+
"Here is a JD text, can you recommend some assessments that can help me screen applications? Time limit is less than 30 minutes."
|
104 |
+
|
105 |
+
"I am hiring for an analyst and want applications to be screened using cognitive and personality tests. What options are available within 45 minutes?"
|
106 |
+
|
107 |
+
🔗 Resources
|
108 |
+
SHL Product Catalog
|
109 |
+
https://www.shl.com/solutions/products/product-catalog/
|
110 |
+
|
111 |
+
Google Gemini Free API Docs
|
112 |
+
https://ai.google.dev/gemini-api/docs/pricing
|
113 |
+
|
114 |
+
Submission Form
|
115 |
+
https://forms.office.com/r/Pq8dYPEGH4
|
116 |
+
|
117 |
+
Sample Job Description for Testing
|
118 |
+
SHL AI Research Engineer Job Posting
|
eval.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Evaluation script for the SHL Assessment Recommendation System."""
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import numpy as np
|
5 |
+
from typing import List, Dict, Any
|
6 |
+
import pandas as pd
|
7 |
+
import importlib
|
8 |
+
from app import RecommendationSystem
|
9 |
+
|
10 |
+
# Path to the data file
|
11 |
+
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
12 |
+
"data", "processed")
|
13 |
+
ASSESSMENTS_PATH = r"C:\Users\nikhi\OneDrive\Documents\GitHub\SHL_Assignment\data\processed\shl_test_solutions.csv"
|
14 |
+
|
15 |
+
# Test queries with ground truth relevant assessments
|
16 |
+
# In a real scenario, you would have a proper evaluation dataset with human-labeled relevance
|
17 |
+
TEST_QUERIES = [
|
18 |
+
{
|
19 |
+
"query": "I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment(s) that can be completed in 40 minutes.",
|
20 |
+
"relevant_assessments": ["Java", "Core Java", "Java Spring Boot", "Java Programming", "Collaboration Skills"],
|
21 |
+
"time_constraint": 40
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"query": "Looking to hire mid-level professionals who are proficient in Python, SQL and Java Script. Need an assessment package that can test all skills with max duration of 60 minutes.",
|
25 |
+
"relevant_assessments": ["Python", "SQL", "JavaScript", "Full Stack Developer", "Web Development"],
|
26 |
+
"time_constraint": 60
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"query": "I am hiring for an analyst and wants applications to screen using Cognitive and personality tests, what options are available within 45 mins.",
|
30 |
+
"relevant_assessments": ["Analytical Thinking", "Cognitive Ability", "Personality", "Decision Making", "Data Analysis"],
|
31 |
+
"time_constraint": 45
|
32 |
+
}
|
33 |
+
]
|
34 |
+
|
35 |
+
def is_relevant(assessment: Dict[str, Any], relevant_keywords: List[str]) -> bool:
|
36 |
+
"""
|
37 |
+
Check if an assessment is relevant based on keywords in its name or description
|
38 |
+
|
39 |
+
Args:
|
40 |
+
assessment: Assessment dictionary with 'name' and optionally 'description'
|
41 |
+
relevant_keywords: List of keywords to match against
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
Boolean indicating relevance
|
45 |
+
"""
|
46 |
+
assessment_name = assessment["name"].lower()
|
47 |
+
assessment_desc = assessment.get("description", "").lower() if isinstance(assessment.get("description", ""), str) else ""
|
48 |
+
|
49 |
+
# Special case for cognitive/personality assessments
|
50 |
+
if any(kw.lower() in ["cognitive ability", "personality", "analytical thinking"] for kw in relevant_keywords):
|
51 |
+
cognitive_keywords = ["reasoning", "cognitive", "numerical", "verbal", "inductive", "deductive", "verify"]
|
52 |
+
personality_keywords = ["personality", "trait", "behavior", "opq"]
|
53 |
+
analytical_keywords = ["analytical", "analysis", "problem solving", "critical thinking"]
|
54 |
+
|
55 |
+
# Check if assessment name contains any cognitive/personality keywords
|
56 |
+
if any(kw in assessment_name for kw in cognitive_keywords + personality_keywords + analytical_keywords):
|
57 |
+
return True
|
58 |
+
if assessment_desc and any(kw in assessment_desc for kw in cognitive_keywords + personality_keywords + analytical_keywords):
|
59 |
+
return True
|
60 |
+
|
61 |
+
# General keyword matching
|
62 |
+
for keyword in relevant_keywords:
|
63 |
+
keyword_lower = keyword.lower()
|
64 |
+
|
65 |
+
# Direct match in name or description
|
66 |
+
if keyword_lower in assessment_name or (assessment_desc and keyword_lower in assessment_desc):
|
67 |
+
return True
|
68 |
+
|
69 |
+
# Word boundary matching to avoid partial word matches
|
70 |
+
name_words = assessment_name.split()
|
71 |
+
for word in name_words:
|
72 |
+
# Allow stemming-like matching (e.g. 'Python' matches 'Python-based')
|
73 |
+
if (keyword_lower in word or word in keyword_lower) and len(word) >= 4 and len(keyword_lower) >= 4:
|
74 |
+
return True
|
75 |
+
|
76 |
+
# Try matching in description
|
77 |
+
if assessment_desc:
|
78 |
+
desc_words = assessment_desc.split()
|
79 |
+
for word in desc_words:
|
80 |
+
if (keyword_lower in word or word in keyword_lower) and len(word) >= 4 and len(keyword_lower) >= 4:
|
81 |
+
return True
|
82 |
+
|
83 |
+
return False
|
84 |
+
|
85 |
+
def precision_at_k(recommended: List[Dict[str, Any]], relevant_keywords: List[str], k: int) -> float:
|
86 |
+
"""Calculate precision@k"""
|
87 |
+
if k == 0 or not recommended:
|
88 |
+
return 0.0
|
89 |
+
|
90 |
+
hits = sum(1 for i, item in enumerate(recommended[:k])
|
91 |
+
if is_relevant(item, relevant_keywords))
|
92 |
+
return hits / k
|
93 |
+
|
94 |
+
def recall_at_k(recommended: List[Dict[str, Any]], relevant_keywords: List[str], k: int) -> float:
|
95 |
+
"""Calculate recall@k"""
|
96 |
+
if not relevant_keywords or not recommended:
|
97 |
+
return 0.0
|
98 |
+
|
99 |
+
hits = sum(1 for i, item in enumerate(recommended[:k])
|
100 |
+
if is_relevant(item, relevant_keywords))
|
101 |
+
return hits / len(relevant_keywords)
|
102 |
+
|
103 |
+
def average_precision(recommended: List[Dict[str, Any]], relevant_keywords: List[str], k: int) -> float:
|
104 |
+
"""Calculate average precision@k"""
|
105 |
+
if not recommended or not relevant_keywords:
|
106 |
+
return 0.0
|
107 |
+
|
108 |
+
precisions = []
|
109 |
+
num_relevant_found = 0
|
110 |
+
|
111 |
+
for i in range(min(k, len(recommended))):
|
112 |
+
if is_relevant(recommended[i], relevant_keywords):
|
113 |
+
num_relevant_found += 1
|
114 |
+
precisions.append(num_relevant_found / (i + 1))
|
115 |
+
|
116 |
+
if not precisions:
|
117 |
+
return 0.0
|
118 |
+
|
119 |
+
return sum(precisions) / min(len(relevant_keywords), k)
|
120 |
+
|
121 |
+
def evaluate_system():
|
122 |
+
"""Evaluate the recommendation system using test queries"""
|
123 |
+
# Load data before creating recommender to avoid double initialization
|
124 |
+
assessments_df = pd.read_csv(ASSESSMENTS_PATH)
|
125 |
+
|
126 |
+
# Now create recommendation system with the pre-loaded data
|
127 |
+
print("Initializing recommendation system...")
|
128 |
+
recommender = RecommendationSystem(ASSESSMENTS_PATH)
|
129 |
+
|
130 |
+
recalls = []
|
131 |
+
avg_precisions = []
|
132 |
+
|
133 |
+
print("\n=== Evaluation Results ===")
|
134 |
+
|
135 |
+
for i, test_case in enumerate(TEST_QUERIES):
|
136 |
+
query = test_case["query"]
|
137 |
+
relevant_keywords = test_case["relevant_assessments"]
|
138 |
+
|
139 |
+
print(f"\nQuery {i+1}: {query}")
|
140 |
+
print(f"Relevant assessment keywords: {relevant_keywords}")
|
141 |
+
|
142 |
+
# Get recommendations
|
143 |
+
recommendations = recommender.recommend(query, max_results=10)
|
144 |
+
|
145 |
+
# Display enhanced query if available in the recommendation response
|
146 |
+
if hasattr(recommender, 'enhanced_query') and recommender.enhanced_query:
|
147 |
+
print(f"Original query: {query}")
|
148 |
+
print(f"Enhanced query: {recommender.enhanced_query}")
|
149 |
+
|
150 |
+
# Fix the "minutes minutes" issue by checking and cleaning the duration format
|
151 |
+
print("\nTop 3 Recommendations:")
|
152 |
+
for j, rec in enumerate(recommendations[:3]):
|
153 |
+
# Fix duration formatting - remove the word "minutes" if it's already in the rec['duration']
|
154 |
+
duration_str = str(rec['duration'])
|
155 |
+
if "minute" not in duration_str.lower():
|
156 |
+
duration_display = f"{duration_str} minutes"
|
157 |
+
else:
|
158 |
+
duration_display = duration_str
|
159 |
+
|
160 |
+
relevance_marker = "✓" if is_relevant(rec, relevant_keywords) else " "
|
161 |
+
print(f"{j+1}. {rec['name']} (Duration: {duration_display}, Score: {rec['similarity_score']:.2f}) {relevance_marker}")
|
162 |
+
|
163 |
+
# Calculate metrics at k=3
|
164 |
+
k = 3
|
165 |
+
recall = recall_at_k(recommendations, relevant_keywords, k)
|
166 |
+
ap = average_precision(recommendations, relevant_keywords, k)
|
167 |
+
|
168 |
+
recalls.append(recall)
|
169 |
+
avg_precisions.append(ap)
|
170 |
+
|
171 |
+
print(f"\nMetrics at k={k}:")
|
172 |
+
print(f"Recall@{k}: {recall:.2f}")
|
173 |
+
print(f"AP@{k}: {ap:.2f}")
|
174 |
+
|
175 |
+
# Debug information about relevance matching
|
176 |
+
print("\nRelevance details:")
|
177 |
+
for j, rec in enumerate(recommendations[:k]):
|
178 |
+
is_rel = is_relevant(rec, relevant_keywords)
|
179 |
+
print(f"- {rec['name']}: {'Relevant' if is_rel else 'Not relevant'}")
|
180 |
+
|
181 |
+
# Calculate mean metrics
|
182 |
+
mean_recall = np.mean(recalls)
|
183 |
+
mean_ap = np.mean(avg_precisions)
|
184 |
+
|
185 |
+
print("\n=== Overall Performance ===")
|
186 |
+
print(f"Mean Recall@3: {mean_recall:.4f}")
|
187 |
+
print(f"MAP@3: {mean_ap:.4f}")
|
188 |
+
|
189 |
+
# Prevent RecommendationSystem from being imported twice
|
190 |
+
if __name__ == "__main__":
|
191 |
+
evaluate_system()
|
192 |
+
|
193 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Web Framework
|
2 |
+
fastapi==0.104.1
|
3 |
+
uvicorn==0.23.2
|
4 |
+
streamlit==1.28.0
|
5 |
+
pydantic==2.4.2
|
6 |
+
|
7 |
+
# Web Scraping
|
8 |
+
beautifulsoup4==4.12.2
|
9 |
+
requests==2.31.0
|
10 |
+
selenium==4.15.2
|
11 |
+
webdriver-manager==4.0.1
|
12 |
+
|
13 |
+
# Data Processing
|
14 |
+
pandas==2.1.1
|
15 |
+
numpy==1.26.0
|
16 |
+
|
17 |
+
# LLM and Vector Database
|
18 |
+
langchain==0.0.312
|
19 |
+
langchain-community==0.0.10
|
20 |
+
google-generativeai==0.3.0
|
21 |
+
# Fix compatibility issues by pinning versions
|
22 |
+
sentence-transformers==2.2.2
|
23 |
+
transformers==4.34.0
|
24 |
+
huggingface-hub==0.16.4
|
25 |
+
chromadb==0.4.18
|
26 |
+
|
27 |
+
# Utilities
|
28 |
+
python-dotenv==1.0.0
|
29 |
+
tqdm==4.66.1
|
30 |
+
|
31 |
+
|
32 |
+
faiss-cpu==1.7.4
|
33 |
+
# google-generativeai==0.3.1
|
34 |
+
scikit-learn==1.3.1
|