Spaces:

Vishalpainjane
/

SHL_Assignment

Sleeping

App Files Files Community

Vishalpainjane commited on Apr 9

Commit

b3fc99b

verified ·

1 Parent(s): 87a2b9a

added files

Browse files

Files changed (7) hide show

.env +11 -0
.gitignore +70 -0
app.py +240 -0
config.py +26 -0
content.txt +118 -0
eval.py +193 -0
requirements.txt +34 -0

.env ADDED Viewed

	@@ -0,0 +1,11 @@

+# API Keys
+GEMINI_API_KEY="AIzaSyD031DqbvCunm0WT0sOeg1ulMzP2wOR2H0"
+# Application Configuration
+SHL_CATALOG_URL=https://www.shl.com/solutions/products/product-catalog/
+MAX_RECOMMENDATIONS=10
+# Optional - Uncomment and set these if needed
+# BACKEND_PORT=8000
+# FRONTEND_PORT=8501
+# LOG_LEVEL=INFO

.gitignore ADDED Viewed

	@@ -0,0 +1,70 @@

+# Dependency directories
+node_modules/
+jspm_packages/
+bower_components/
+# Distribution directories
+dist/
+build/
+out/
+/env
+# Environment files
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+# IDE and editor files
+.idea/
+.vscode/
+*.sublime-project
+*.sublime-workspace
+.project
+.classpath
+.settings/
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+# Operating system files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Optional caches
+.npm
+.eslintcache
+.stylelintcache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+# Test coverage directory
+coverage/
+# Build files
+*.tsbuildinfo
+# Debug files
+.node_repl_history
+# Package files
+*.tgz
+.yarn-integrity

app.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import os
+import re
+import numpy as np
+from typing import List, Dict, Any, Optional
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+import faiss
+from fastapi import FastAPI, Query, HTTPException
+from pydantic import BaseModel
+import google.generativeai as genai
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure Google Gemini API
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+    raise ValueError("GEMINI_API_KEY environment variable not set")
+genai.configure(api_key=GEMINI_API_KEY)
+# Initialize FastAPI app
+app = FastAPI(
+    title="SHL Assessment Recommendation API",
+    description="API for recommending SHL assessments based on job descriptions or queries",
+    version="1.0.0"
+)
+# Path to the data file
+ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_DIR = os.path.join(ROOT_DIR, "data", "processed")
+# ASSESSMENTS_PATH = os.path.join(DATA_DIR, "shl_test_solutions.csv")
+ASSESSMENTS_PATH = os.path.join(ROOT_DIR, "data", "processed", "shl_test_solutions.csv")
+# ASSESSMENTS_PATH = r"data\processed\shl_test_solutions.csv"
+# Ensure data directory exists
+os.makedirs(DATA_DIR, exist_ok=True)
+# Load and prepare data
+class RecommendationSystem:
+    def __init__(self, data_path: str):
+        self.df = pd.read_csv(data_path)
+        self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        # Clean and prepare data
+        self.prepare_data()
+        # Create embeddings
+        self.create_embeddings()
+        # Initialize Gemini model for query enhancement
+        self.gemini_model = genai.GenerativeModel('gemini-1.5-pro')
+    def prepare_data(self):
+        """Clean and prepare the assessment data"""
+        # Ensure all text columns are strings
+        text_cols = ['name', 'description', 'job_levels', 'test_types_expanded']
+        for col in text_cols:
+            if col in self.df.columns:
+                self.df[col] = self.df[col].fillna('').astype(str)
+        # Extract duration in minutes as numeric value
+        self.df['duration_minutes'] = self.df['duration'].apply(
+            lambda x: int(re.search(r'(\d+)', str(x)).group(1))
+            if isinstance(x, str) and re.search(r'(\d+)', str(x))
+            else 60  # Default value
+        )
+    def create_embeddings(self):
+        """Create embeddings for assessments"""
+        # Create rich text representation for each assessment
+        self.df['combined_text'] = self.df.apply(
+            lambda row: f"Assessment: {row['name']}. "
+                       f"Description: {row['description']}. "
+                       f"Job Levels: {row['job_levels']}. "
+                       f"Test Types: {row['test_types_expanded']}. "
+                       f"Duration: {row['duration']}.",
+            axis=1
+        )
+        # Generate embeddings
+        print("Generating embeddings for assessments...")
+        self.embeddings = self.model.encode(self.df['combined_text'].tolist())
+        # Create FAISS index for fast similarity search
+        self.dimension = self.embeddings.shape[1]
+        self.index = faiss.IndexFlatL2(self.dimension)
+        self.index.add(np.array(self.embeddings).astype('float32'))
+        print(f"Created FAISS index with {len(self.df)} assessments")
+    def enhance_query(self, query: str) -> str:
+        """Use Gemini to enhance the query with assessment-relevant terms"""
+        prompt = f"""
+        I need to find SHL assessments based on this query: "{query}"
+        Please reformulate this query to include specific skills, job roles, and assessment criteria
+        that would help in finding relevant technical assessments. Focus on keywords like programming
+        languages, technical skills, job levels, and any time constraints mentioned.
+        Return only the reformulated query without any explanations or additional text.
+        """
+        try:
+            response = self.gemini_model.generate_content(prompt)
+            enhanced_query = response.text.strip()
+            print(f"Original query: {query}")
+            print(f"Enhanced query: {enhanced_query}")
+            return enhanced_query
+        except Exception as e:
+            print(f"Error enhancing query with Gemini: {e}")
+            return query  # Return original query if enhancement fails
+    def parse_duration_constraint(self, query: str) -> Optional[int]:
+        """Extract duration constraint from query"""
+        # Look for patterns like "within 45 minutes", "less than 30 minutes", etc.
+        patterns = [
+            r"(?:within|in|under|less than|no more than)\s+(\d+)\s+(?:min|mins|minutes)",
+            r"(\d+)\s+(?:min|mins|minutes)(?:\s+(?:or less|max|maximum|limit))",
+            r"(?:max|maximum|limit)(?:\s+(?:of|is))?\s+(\d+)\s+(?:min|mins|minutes)",
+            r"(?:time limit|duration)(?:\s+(?:of|is))?\s+(\d+)\s+(?:min|mins|minutes)",
+            r"(?:completed in|takes|duration of)\s+(\d+)\s+(?:min|mins|minutes)"
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, query, re.IGNORECASE)
+            if match:
+                return int(match.group(1))
+        return None
+    def recommend(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
+        """Recommend assessments based on query"""
+        # Enhance query using Gemini
+        enhanced_query = self.enhance_query(query)
+        # Extract duration constraint if any
+        duration_limit = self.parse_duration_constraint(query)
+        # Generate embedding for the query
+        query_embedding = self.model.encode([enhanced_query])
+        # Search for similar assessments
+        D, I = self.index.search(np.array(query_embedding).astype('float32'), len(self.df))
+        # Get the indices of the most similar assessments
+        indices = I[0]
+        # Apply duration filter if specified
+        if duration_limit:
+            filtered_indices = [
+                idx for idx in indices
+                if self.df.iloc[idx]['duration_minutes'] <= duration_limit
+            ]
+            indices = filtered_indices if filtered_indices else indices
+        # Prepare results, limiting to max_results
+        results = []
+        for idx in indices[:max_results]:
+            assessment = self.df.iloc[idx]
+            results.append({
+                "name": assessment["name"],
+                "url": assessment["url"],
+                "remote_testing": assessment["remote_testing"],
+                "adaptive_irt": assessment["adaptive_irt"],
+                "duration": assessment["duration"],
+                "test_types": assessment["test_types"],
+                "test_types_expanded": assessment["test_types_expanded"],
+                "description": assessment["description"],
+                "job_levels": assessment["job_levels"],
+                "similarity_score": float(1.0 - (D[0][list(indices).index(idx)] / 100))  # Normalize to 0-1
+            })
+        return results
+# Initialize the recommendation system
+try:
+    recommender = RecommendationSystem(ASSESSMENTS_PATH)
+    print("Recommendation system initialized successfully")
+except Exception as e:
+    print(f"Error initializing recommendation system: {e}")
+    recommender = None
+# Define API response model
+class AssessmentRecommendation(BaseModel):
+    name: str
+    url: str
+    remote_testing: str
+    adaptive_irt: str
+    duration: str
+    test_types: str
+    test_types_expanded: str
+    description: str
+    job_levels: str
+    similarity_score: float
+class RecommendationResponse(BaseModel):
+    query: str
+    enhanced_query: str
+    recommendations: List[AssessmentRecommendation]
+# Define API endpoints
+@app.get("/", response_model=dict)
+def root():
+    """Root endpoint that returns API information"""
+    return {
+        "name": "SHL Assessment Recommendation API",
+        "version": "1.0.0",
+        "endpoints": {
+            "/recommend": "GET endpoint for assessment recommendations"
+        }
+    }
+@app.get("/recommend", response_model=RecommendationResponse)
+def recommend(
+    query: str = Query(..., description="Natural language query or job description text"),
+    max_results: int = Query(10, ge=1, le=10, description="Maximum number of results to return")
+):
+    """Recommend SHL assessments based on query"""
+    if not recommender:
+        raise HTTPException(
+            status_code=500,
+            detail="Recommendation system not initialized properly"
+        )
+    # Get enhanced query for transparency
+    enhanced_query = recommender.enhance_query(query)
+    # Get recommendations
+    recommendations = recommender.recommend(query, max_results=max_results)
+    return {
+        "query": query,
+        "enhanced_query": enhanced_query,
+        "recommendations": recommendations
+    }
+# Run the application
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""Configuration settings for the SHL Assessment Recommendation System."""
+import os
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# API and service configurations
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+SHL_CATALOG_URL = "https://www.shl.com/solutions/products/product-catalog/"
+# Data directories
+PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
+DATA_DIR = os.path.join(PROJECT_ROOT, "data")
+RAW_DATA_DIR = os.path.join(DATA_DIR, "raw")
+PROCESSED_DATA_DIR = os.path.join(DATA_DIR, "processed")
+MODELS_DIR = os.path.join(PROJECT_ROOT, "models")
+# Create directories if they don't exist
+for directory in [DATA_DIR, RAW_DATA_DIR, PROCESSED_DATA_DIR, MODELS_DIR]:
+    os.makedirs(directory, exist_ok=True)
+# Application settings
+MAX_RECOMMENDATIONS = 10
+DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+CHROMA_DB_PATH = os.path.join(MODELS_DIR, "chroma_db")

content.txt ADDED Viewed

	@@ -0,0 +1,118 @@

+# 🧠 SHL AI Intern RE Generative AI Assignment
+## 📌 Task Overview: Build an SHL Assessment Recommendation System
+Hiring managers often struggle to find the right assessments for roles they're hiring for. Currently, this process relies heavily on keyword searches and filters, making it inefficient.
+Your task is to **build an intelligent recommendation system** that simplifies this process.
+> Given a **natural language query**, a **job description (JD)**, or a **URL**, your application should return a list of relevant SHL assessments.
+You can refer to SHL’s assessment catalog here:
+👉 [SHL Product Catalog](https://www.shl.com/solutions/products/product-catalog/)
+---
+## ✅ Objectives
+Build a **web application** that:
+1. Accepts a **natural language query**, **job description text**, or a **job post URL**.
+2. Returns a list of **1–10 most relevant SHL assessments**.
+3. Displays the recommendations in **tabular format** with the following columns:
+   - **Assessment Name** (linked to SHL's catalog)
+   - **Remote Testing Support** (Yes/No)
+   - **Adaptive/IRT Support** (Yes/No)
+   - **Test Duration**
+   - **Test Type**
+---
+## 📤 Submission Guidelines
+Submit the following via this [Microsoft Form](https://forms.office.com/r/Pq8dYPEGH4):
+- **Hosted Web Demo URL** – Frontend that accepts queries and shows results
+- **API Endpoint URL** – Accepts query/text and returns structured JSON
+- **GitHub Repository URL** – Containing complete source code
+- **1-Page Approach Document** – Include tools, libraries, and methods used
+---
+## 🧪 Evaluation Criteria
+Your submission will be evaluated on:
+### 🔍 Approach
+- How the catalog was crawled/processed
+- Data representation and search techniques
+- Usage of LLM stack (e.g., LangChain, Gemini, etc.)
+- Tracing and evaluation tools used
+### 🎯 Accuracy
+- Using benchmark sets
+- Metrics:
+  - **Mean Recall@3**
+  - **MAP@3**
+### 🧑‍💻 Demo Quality
+- Working end-to-end solution
+- Attention to usability and details
+- Usage of low-code frameworks like **Streamlit** or **Gradio** is acceptable
+---
+## 📊 Accuracy Metrics
+### Mean Recall@K
+```text
+Recall@K = (Number of relevant results in top K) / (Total number of relevant results)
+Mean Recall@K = (1/N) * Σ Recall@K_i
+Where:
+N = number of test queries
+Mean Average Precision@K (MAP@K)
+text
+Copy
+Edit
+AP@K = (1 / min(K, R)) * Σ (P(k) * rel(k)) for k = 1 to K
+MAP@K = (1/N) * Σ AP@K_i
+Where:
+R = total number of relevant results
+P(k) = Precision at rank k
+rel(k) = 1 if the item at rank k is relevant, else 0
+N = total number of queries
+The higher the Mean Recall@K and MAP@K, the better the performance.
+📄 Example Queries
+Here are some test cases to evaluate your system:
+"I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment(s) that can be completed in 40 minutes."
+"Looking to hire mid-level professionals proficient in Python, SQL and JavaScript. Need an assessment package that can test all skills with max duration of 60 minutes."
+"Here is a JD text, can you recommend some assessments that can help me screen applications? Time limit is less than 30 minutes."
+"I am hiring for an analyst and want applications to be screened using cognitive and personality tests. What options are available within 45 minutes?"
+🔗 Resources
+SHL Product Catalog
+https://www.shl.com/solutions/products/product-catalog/
+Google Gemini Free API Docs
+https://ai.google.dev/gemini-api/docs/pricing
+Submission Form
+https://forms.office.com/r/Pq8dYPEGH4
+Sample Job Description for Testing
+SHL AI Research Engineer Job Posting

eval.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""Evaluation script for the SHL Assessment Recommendation System."""
+import os
+import json
+import numpy as np
+from typing import List, Dict, Any
+import pandas as pd
+import importlib
+from app import RecommendationSystem
+# Path to the data file
+DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+                        "data", "processed")
+ASSESSMENTS_PATH = r"C:\Users\nikhi\OneDrive\Documents\GitHub\SHL_Assignment\data\processed\shl_test_solutions.csv"
+# Test queries with ground truth relevant assessments
+# In a real scenario, you would have a proper evaluation dataset with human-labeled relevance
+TEST_QUERIES = [
+    {
+        "query": "I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment(s) that can be completed in 40 minutes.",
+        "relevant_assessments": ["Java", "Core Java", "Java Spring Boot", "Java Programming", "Collaboration Skills"],
+        "time_constraint": 40
+    },
+    {
+        "query": "Looking to hire mid-level professionals who are proficient in Python, SQL and Java Script. Need an assessment package that can test all skills with max duration of 60 minutes.",
+        "relevant_assessments": ["Python", "SQL", "JavaScript", "Full Stack Developer", "Web Development"],
+        "time_constraint": 60
+    },
+    {
+        "query": "I am hiring for an analyst and wants applications to screen using Cognitive and personality tests, what options are available within 45 mins.",
+        "relevant_assessments": ["Analytical Thinking", "Cognitive Ability", "Personality", "Decision Making", "Data Analysis"],
+        "time_constraint": 45
+    }
+]
+def is_relevant(assessment: Dict[str, Any], relevant_keywords: List[str]) -> bool:
+    """
+    Check if an assessment is relevant based on keywords in its name or description
+    Args:
+        assessment: Assessment dictionary with 'name' and optionally 'description'
+        relevant_keywords: List of keywords to match against
+    Returns:
+        Boolean indicating relevance
+    """
+    assessment_name = assessment["name"].lower()
+    assessment_desc = assessment.get("description", "").lower() if isinstance(assessment.get("description", ""), str) else ""
+    # Special case for cognitive/personality assessments
+    if any(kw.lower() in ["cognitive ability", "personality", "analytical thinking"] for kw in relevant_keywords):
+        cognitive_keywords = ["reasoning", "cognitive", "numerical", "verbal", "inductive", "deductive", "verify"]
+        personality_keywords = ["personality", "trait", "behavior", "opq"]
+        analytical_keywords = ["analytical", "analysis", "problem solving", "critical thinking"]
+        # Check if assessment name contains any cognitive/personality keywords
+        if any(kw in assessment_name for kw in cognitive_keywords + personality_keywords + analytical_keywords):
+            return True
+        if assessment_desc and any(kw in assessment_desc for kw in cognitive_keywords + personality_keywords + analytical_keywords):
+            return True
+    # General keyword matching
+    for keyword in relevant_keywords:
+        keyword_lower = keyword.lower()
+        # Direct match in name or description
+        if keyword_lower in assessment_name or (assessment_desc and keyword_lower in assessment_desc):
+            return True
+        # Word boundary matching to avoid partial word matches
+        name_words = assessment_name.split()
+        for word in name_words:
+            # Allow stemming-like matching (e.g. 'Python' matches 'Python-based')
+            if (keyword_lower in word or word in keyword_lower) and len(word) >= 4 and len(keyword_lower) >= 4:
+                return True
+        # Try matching in description
+        if assessment_desc:
+            desc_words = assessment_desc.split()
+            for word in desc_words:
+                if (keyword_lower in word or word in keyword_lower) and len(word) >= 4 and len(keyword_lower) >= 4:
+                    return True
+    return False
+def precision_at_k(recommended: List[Dict[str, Any]], relevant_keywords: List[str], k: int) -> float:
+    """Calculate precision@k"""
+    if k == 0 or not recommended:
+        return 0.0
+    hits = sum(1 for i, item in enumerate(recommended[:k])
+              if is_relevant(item, relevant_keywords))
+    return hits / k
+def recall_at_k(recommended: List[Dict[str, Any]], relevant_keywords: List[str], k: int) -> float:
+    """Calculate recall@k"""
+    if not relevant_keywords or not recommended:
+        return 0.0
+    hits = sum(1 for i, item in enumerate(recommended[:k])
+              if is_relevant(item, relevant_keywords))
+    return hits / len(relevant_keywords)
+def average_precision(recommended: List[Dict[str, Any]], relevant_keywords: List[str], k: int) -> float:
+    """Calculate average precision@k"""
+    if not recommended or not relevant_keywords:
+        return 0.0
+    precisions = []
+    num_relevant_found = 0
+    for i in range(min(k, len(recommended))):
+        if is_relevant(recommended[i], relevant_keywords):
+            num_relevant_found += 1
+            precisions.append(num_relevant_found / (i + 1))
+    if not precisions:
+        return 0.0
+    return sum(precisions) / min(len(relevant_keywords), k)
+def evaluate_system():
+    """Evaluate the recommendation system using test queries"""
+    # Load data before creating recommender to avoid double initialization
+    assessments_df = pd.read_csv(ASSESSMENTS_PATH)
+    # Now create recommendation system with the pre-loaded data
+    print("Initializing recommendation system...")
+    recommender = RecommendationSystem(ASSESSMENTS_PATH)
+    recalls = []
+    avg_precisions = []
+    print("\n=== Evaluation Results ===")
+    for i, test_case in enumerate(TEST_QUERIES):
+        query = test_case["query"]
+        relevant_keywords = test_case["relevant_assessments"]
+        print(f"\nQuery {i+1}: {query}")
+        print(f"Relevant assessment keywords: {relevant_keywords}")
+        # Get recommendations
+        recommendations = recommender.recommend(query, max_results=10)
+        # Display enhanced query if available in the recommendation response
+        if hasattr(recommender, 'enhanced_query') and recommender.enhanced_query:
+            print(f"Original query: {query}")
+            print(f"Enhanced query: {recommender.enhanced_query}")
+        # Fix the "minutes minutes" issue by checking and cleaning the duration format
+        print("\nTop 3 Recommendations:")
+        for j, rec in enumerate(recommendations[:3]):
+            # Fix duration formatting - remove the word "minutes" if it's already in the rec['duration']
+            duration_str = str(rec['duration'])
+            if "minute" not in duration_str.lower():
+                duration_display = f"{duration_str} minutes"
+            else:
+                duration_display = duration_str
+            relevance_marker = "✓" if is_relevant(rec, relevant_keywords) else " "
+            print(f"{j+1}. {rec['name']} (Duration: {duration_display}, Score: {rec['similarity_score']:.2f}) {relevance_marker}")
+        # Calculate metrics at k=3
+        k = 3
+        recall = recall_at_k(recommendations, relevant_keywords, k)
+        ap = average_precision(recommendations, relevant_keywords, k)
+        recalls.append(recall)
+        avg_precisions.append(ap)
+        print(f"\nMetrics at k={k}:")
+        print(f"Recall@{k}: {recall:.2f}")
+        print(f"AP@{k}: {ap:.2f}")
+        # Debug information about relevance matching
+        print("\nRelevance details:")
+        for j, rec in enumerate(recommendations[:k]):
+            is_rel = is_relevant(rec, relevant_keywords)
+            print(f"- {rec['name']}: {'Relevant' if is_rel else 'Not relevant'}")
+    # Calculate mean metrics
+    mean_recall = np.mean(recalls)
+    mean_ap = np.mean(avg_precisions)
+    print("\n=== Overall Performance ===")
+    print(f"Mean Recall@3: {mean_recall:.4f}")
+    print(f"MAP@3: {mean_ap:.4f}")
+# Prevent RecommendationSystem from being imported twice
+if __name__ == "__main__":
+    evaluate_system()

requirements.txt ADDED Viewed

	@@ -0,0 +1,34 @@

+# Web Framework
+fastapi==0.104.1
+uvicorn==0.23.2
+streamlit==1.28.0
+pydantic==2.4.2
+# Web Scraping
+beautifulsoup4==4.12.2
+requests==2.31.0
+selenium==4.15.2
+webdriver-manager==4.0.1
+# Data Processing
+pandas==2.1.1
+numpy==1.26.0
+# LLM and Vector Database
+langchain==0.0.312
+langchain-community==0.0.10
+google-generativeai==0.3.0
+# Fix compatibility issues by pinning versions
+sentence-transformers==2.2.2
+transformers==4.34.0
+huggingface-hub==0.16.4
+chromadb==0.4.18
+# Utilities
+python-dotenv==1.0.0
+tqdm==4.66.1
+faiss-cpu==1.7.4
+# google-generativeai==0.3.1
+scikit-learn==1.3.1