divyesh01 commited on
Commit
41ea5e0
·
verified ·
1 Parent(s): d5e41ae

File Uploading

Browse files
Files changed (10) hide show
  1. README.md +1 -10
  2. SERP.py +88 -0
  3. apify.py +65 -0
  4. nlp_parsed.py +142 -0
  5. postgres_db.py +247 -0
  6. requirements.txt +8 -0
  7. saral-ai.py +301 -0
  8. saral_ai_api.py +231 -0
  9. templates/index.html +673 -0
  10. validate.py +83 -0
README.md CHANGED
@@ -1,10 +1 @@
1
- ---
2
- title: Saral Ai
3
- emoji: 📚
4
- colorFrom: green
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # SARAL-AI
 
 
 
 
 
 
 
 
 
SERP.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from postgres_db import fetch_from_saral_data , data_input , check_completeness, cur, get_connection
5
+
6
+
7
+ load_dotenv()
8
+
9
+ SERP_API_KEY = os.getenv("SERP_API_KEY")
10
+
11
+
12
+
13
+
14
+
15
+ def query_making(data):
16
+ query = "site:linkedin.com/in"
17
+
18
+ if data['job_title']:
19
+ query += f' "{data["job_title"]}"'
20
+
21
+ if data['skills']:
22
+ for i in data['skills']:
23
+ query += f' "{i}"'
24
+
25
+ if data['experience']:
26
+ exp = data["experience"]
27
+ query += f' "{exp} years" OR "{exp}+ years"'
28
+
29
+ if data['location']:
30
+ if type(data['location']) == list:
31
+ for i in data['location']:
32
+ query += f' "{i}"'
33
+ else:
34
+ query += f' "{data["location"]}"'
35
+
36
+
37
+ if data['work_preference']:
38
+ query += f' "{data["work_preference"]}"'
39
+
40
+ if data['job_type']:
41
+ query += f' "{data["job_type"]}"'
42
+
43
+
44
+ add_keywords = ' -"job" -"jobs" -"hiring" -"vacancy" -"openings" -"career" -"apply"'
45
+ query += add_keywords
46
+ # print(query)
47
+
48
+ return query, data['location']
49
+
50
+
51
+ def serp_api_call(query,start = 0, results_per_page = 10):
52
+ data = None
53
+
54
+ # SERP API CALL
55
+
56
+ params = {
57
+ "engine": "google",
58
+ "q": query.strip(),
59
+ "api_key": SERP_API_KEY,
60
+ "hl": "en",
61
+ "gl": "in",
62
+ "google_domain": "google.co.in",
63
+ "location": "India",
64
+ "num": results_per_page,
65
+ "start": start,
66
+ "safe": "active"
67
+
68
+ }
69
+
70
+ try:
71
+ response = requests.get("https://serpapi.com/search", params=params)
72
+ if response.status_code == 200:
73
+ data = response.json()
74
+
75
+ else:
76
+ print(f"Request failed with status code: {response.status_code}")
77
+ except:
78
+ pass
79
+
80
+ return data
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
apify.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from apify_client import ApifyClient
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ load_dotenv()
6
+
7
+ APIFY_API_KEY = os.getenv("APIFY_API_TOKEN")
8
+
9
+
10
+ linkedin_profiles = {
11
+ "1": "https://linkedin.com/in/ramya-rajendran-730b46a9",
12
+ "2": "https://linkedin.com/in/dhruv-patel-39a333263",
13
+ "3": "https://linkedin.com/in/harsh-patel9797",
14
+ "4": "https://linkedin.com/in/denish-patel-64a8bb183",
15
+ "5": "https://linkedin.com/in/swapnildjoshi",
16
+ "6": "https://linkedin.com/in/bhavin-vaghasiya-82839522a",
17
+ "7": "https://linkedin.com/in/dharmesh-sharma-6a09a0192",
18
+ "8": "https://linkedin.com/in/bhawanii-raajpurohit-72991b1b5",
19
+ "9": "https://linkedin.com/in/trushali-miyani-69aa26276",
20
+ "10": "https://linkedin.com/in/isha-bhanderi-244638246",
21
+ }
22
+
23
+ client = ApifyClient(APIFY_API_KEY)
24
+
25
+ def apify_call(linkedin_profiles):
26
+ list_links = list(linkedin_profiles.values())
27
+
28
+ print(list_links)
29
+
30
+ run_input = {
31
+ "profileUrls": list_links
32
+ }
33
+
34
+ run = client.actor("2SyF0bVxmgGr8IVCZ").call(run_input=run_input)
35
+
36
+ cleaned_profiles = []
37
+
38
+
39
+ for idx, item in enumerate(client.dataset(run["defaultDatasetId"]).iterate_items(),start=1):
40
+ # apify_json[idx] = item
41
+
42
+ # raw_skills = item.get("skills", [])
43
+ # skill_titles = [s.get("title") for s in raw_skills if "title" in s]
44
+
45
+ # profile_data = {
46
+ # "fullName":item.get("fullName"),
47
+ # "profilePic": item.get("profilePic"),
48
+ # "linkedinUrl":item.get("linkedinUrl"),
49
+ # "headline":item.get("headline"),
50
+ # "about":item.get("about"),
51
+ # "skills":skill_titles,
52
+ # "email": item.get("email"),
53
+ # "addressWithCountry": item.get("addressWithCountry"),
54
+ # "experience": item.get("experience")
55
+ # }
56
+
57
+ # profile_data = {k: v for k, v in profile_data.items() if v}
58
+ cleaned_profiles.append(item)
59
+
60
+
61
+
62
+
63
+ return cleaned_profiles
64
+
65
+
nlp_parsed.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from openai import AzureOpenAI
5
+ import json
6
+ from postgres_db import store_prompt, conn
7
+
8
+ load_dotenv()
9
+
10
+ SERP_API_KEY = os.getenv("SERP_API_KEY")
11
+ endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
12
+ api_key = os.getenv("AZURE_OPENAI_API_KEY")
13
+ api_version = os.getenv("AZURE_OPENAI_API_VERSION")
14
+ deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
15
+
16
+
17
+ try:
18
+ client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=endpoint)
19
+ except Exception as e:
20
+ print(f"Failed to initialize Azure OpenAI client: {e}")
21
+
22
+
23
+ def parse_recruiter_query(query):
24
+ """Parse recruiter query using AI to extract structured data"""
25
+ if not client:
26
+ return {"error": "Azure OpenAI client not available"}
27
+
28
+ try:
29
+ system_prompt = """You are an AI assistant that extracts structured recruitment information from natural language queries.
30
+
31
+ Fields to extract:
32
+ - job_title: ONLY the exact position title they're hiring for (e.g., "Python Developer", "Data Scientist").
33
+ DO NOT include phrases like "looking for", "need a", "hiring", etc.
34
+ - skills: Array of required technical skills mentioned (e.g., ["Python", "Django", "SQL"])
35
+ - experience: Required experience in years (numeric value or range). For fresher candidates, use "fresher" exactly.
36
+ - location: Array of city names if multiple cities are mentioned, or single city name as Array if only one city is mentioned.
37
+ - work_preference: Work mode preference - one of: "remote", "onsite", "hybrid", null
38
+ - job_type: Employment type - one of: "full-time", "part-time", "contract", "internship", null
39
+ - is_indian: true if the job location(s) are in India, false otherwise.
40
+ IMPORTANT: If no location is mentioned, always set is_indian = true.
41
+
42
+ CRITICAL INSTRUCTIONS:
43
+ 1. For job_title, NEVER include phrases like "looking for", "need", "hiring", etc.
44
+ 2. For experience, if the query mentions "fresher", "fresh graduate", "entry level", use exactly "fresher"
45
+ 3. For is_indian, check the location(s). If the location(s) are Indian cities or the query context is India-based, return true.
46
+ If no location is mentioned at all, default to true.
47
+ 4. Return ONLY valid JSON without any explanation or additional text.
48
+ 5. Use your knowledge to recognize job titles across all industries and domains."""
49
+
50
+ user_prompt = f"""Extract recruitment information from this query: "{query}"
51
+
52
+ Examples of correct extraction:
53
+
54
+ Input: "We are looking for a Python developer with 3 years experience from Mumbai"
55
+ Output: {{"job_title": "Python Developer", "skills": ["Python"], "experience": "3", "location": ["Mumbai"], "work_preference": null, "job_type": null, "is_indian": true}}
56
+
57
+ Input: "Need a senior React frontend developer with Redux, TypeScript, 5+ years"
58
+ Output: {{"job_title": "React Frontend Developer", "skills": ["React", "Redux", "TypeScript"], "experience": "5+", "location": null, "work_preference": null, "job_type": null, "is_indian": true}}
59
+
60
+ Input: "python developer with 2 year of experience from surat, ahmedabad and mumbai"
61
+ Output: {{"job_title": "Python Developer", "skills": ["Python"], "experience": "2", "location": ["Surat", "Ahmedabad", "Mumbai"], "work_preference": null, "job_type": null, "is_indian": true}}
62
+
63
+ Input: "Remote React developer needed, 5 years experience, Redux, TypeScript"
64
+ Output: {{"job_title": "React Developer", "skills": ["React", "Redux", "TypeScript"], "experience": "5", "location": null, "work_preference": "remote", "job_type": null, "is_indian": true}}
65
+
66
+ Input: "Looking for fresher Java developer from Delhi"
67
+ Output: {{"job_title": "Java Developer", "skills": ["Java"], "experience": "fresher", "location": ["Delhi"], "work_preference": null, "job_type": null, "is_indian": true}}
68
+
69
+ Now extract from the query: "{query}"
70
+
71
+ Remember:
72
+ 1. Extract ONLY the job title without any prefixes like "looking for", "need", etc.
73
+ 2. Extract ONLY the city/location name without additional text.
74
+ 3. For fresher candidates, use exactly "fresher" as experience value.
75
+ 4. For is_indian: true if job location(s) are Indian, false otherwise. If no location is provided, always return true.
76
+ 5. Return ONLY valid JSON."""
77
+
78
+ response = client.chat.completions.create(
79
+ model=deployment,
80
+ messages=[
81
+ {"role": "system", "content": system_prompt},
82
+ {"role": "user", "content": user_prompt}
83
+ ],
84
+ temperature=0.0,
85
+ max_tokens=500
86
+ )
87
+
88
+ return json.loads(response.choices[0].message.content)
89
+
90
+ except json.JSONDecodeError:
91
+ return {"error": "Invalid JSON returned from AI"}
92
+ except Exception as e:
93
+ return {"error": f"Unexpected error: {str(e)}"}
94
+
95
+
96
+
97
+ def prompt_enhancer(prompt: str) -> str:
98
+ """Enhance recruiter prompt to be clearer and more structured"""
99
+ if not client:
100
+ return prompt # fallback: return original if Azure client not available
101
+
102
+ try:
103
+ system_prompt = """You are an AI assistant that enhances recruiter job search prompts.
104
+ Your goal is to:
105
+ 1. Clean up grammar and spelling mistakes.
106
+ 2. Expand shorthand into full professional wording.
107
+ 3. Preserve all important details: job title, skills, experience, location, work mode, job type.
108
+ 4. Do NOT invent new requirements — only clarify what’s already in the query.
109
+ 5. Do not copy examples literally — adapt based on the actual input.
110
+ 6. Return ONLY the enhanced recruiter prompt as plain text (no JSON)."""
111
+
112
+ user_prompt = f"""Rewrite and enhance this recruiter query for clarity:
113
+
114
+ Input: "{prompt}"
115
+
116
+ Example Enhancements:
117
+ - "python dev 2yr exp surat" → "Looking for a Python Developer with 2 years of experience in Surat."
118
+ - "need react js fresher remote" → "Hiring a React.js Developer at fresher level for a remote role."
119
+ - "java 5+ exp ahmedabad onsite" → "Looking for a Java Developer with over 5 years of experience for an onsite role in Ahmedabad."
120
+ - "data analyst 3 years bangalore hybrid" → "Seeking a Data Analyst with 3 years of experience for a hybrid position in Bangalore."
121
+ - "ui ux designer fresher mumbai internship" → "Hiring a UI/UX Designer, fresher level, for an internship role in Mumbai."
122
+
123
+ Now enhance this query: "{prompt}"
124
+ """
125
+
126
+ response = client.chat.completions.create(
127
+ model=deployment,
128
+ messages=[
129
+ {"role": "system", "content": system_prompt},
130
+ {"role": "user", "content": user_prompt}
131
+ ],
132
+ temperature=0.5, # slightly more creative
133
+ max_tokens=200
134
+ )
135
+
136
+ enhanced_prompt = response.choices[0].message.content.strip()
137
+ return enhanced_prompt
138
+
139
+ except Exception as e:
140
+ print(f"Error in prompt_enhancer: {e}")
141
+ return prompt # fallback to original
142
+
postgres_db.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psycopg2
2
+ import json
3
+ from datetime import datetime, timedelta
4
+
5
+
6
+
7
+ hostname = "13.201.135.196"
8
+ database = "saral_ai"
9
+ username = "saral_user"
10
+ pwd = "8k$ScgT97y9£>D"
11
+ port_id = 5432
12
+
13
+
14
+
15
+ conn = None
16
+ cur = None
17
+
18
+
19
+ def get_connection():
20
+ return psycopg2.connect(
21
+ host=hostname,
22
+ dbname=database,
23
+ user=username,
24
+ password=pwd,
25
+ port=port_id
26
+ )
27
+
28
+ def check_completeness(cur, name, location, linkedin_url, headline, skills, experience):
29
+ is_complete = True
30
+ message = "this data is complete"
31
+
32
+ required_fields = [name, location, linkedin_url]
33
+ for field in required_fields:
34
+ if field in [None, "", []]:
35
+ is_complete = False
36
+ message = "missing required fields"
37
+ break
38
+
39
+ cur.execute("SELECT id FROM saral_data WHERE linkedin_url = %s", (linkedin_url,))
40
+ existing = cur.fetchone()
41
+ if existing:
42
+ return False, "this data is duplicate", False
43
+
44
+ optional_fields = [headline, skills, experience]
45
+ for field in optional_fields:
46
+ if field in [None, "", []]:
47
+ is_complete = False
48
+ message = "some optional fields missing"
49
+ break
50
+
51
+ return True, message, is_complete
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+ def data_input(json_data):
60
+ insert_script = '''
61
+ INSERT INTO saral_data
62
+ (name, location, email, linkedin_url, headline, skills, about, experience, profile_pic, is_complete, created_at)
63
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
64
+ '''
65
+ with conn.cursor() as cur:
66
+ for d in json_data:
67
+ name = d.get("fullName")
68
+ location = d.get("addressWithCountry")
69
+ email = d.get("email")
70
+ linkedin_url = d.get("linkedinUrl")
71
+ headline = d.get("headline")
72
+ profile_pic = d.get("profilePic")
73
+
74
+ # Safe parsing of skills
75
+ skills_raw = d.get("skills", [])
76
+ if isinstance(skills_raw, str):
77
+ try:
78
+ skills_raw = json.loads(skills_raw)
79
+ except:
80
+ skills_raw = []
81
+ skills_list = [s.get("title") for s in skills_raw if isinstance(s, dict)]
82
+ skills = json.dumps(skills_list)
83
+
84
+ # Safe parsing of experiences
85
+ experience_raw = d.get("experiences", [])
86
+ if isinstance(experience_raw, str):
87
+ try:
88
+ experience_raw = json.loads(experience_raw)
89
+ except:
90
+ experience_raw = []
91
+ experience = json.dumps(experience_raw)
92
+
93
+ about = d.get("about")
94
+
95
+ success, message, is_complete = check_completeness(
96
+ cur, name, location, linkedin_url, headline, skills_list, experience_raw
97
+ )
98
+ print(message)
99
+
100
+ if not is_complete:
101
+ continue
102
+
103
+ created_at = datetime.now()
104
+ cur.execute(
105
+ insert_script,
106
+ (
107
+ name, location, email, linkedin_url, headline,
108
+ skills, about, experience, profile_pic, is_complete, created_at
109
+ )
110
+ )
111
+
112
+ conn.commit()
113
+
114
+
115
+
116
+ def fetch_from_saral_data(serp_data, conn):
117
+ if not serp_data or not isinstance(serp_data, dict):
118
+ print("⚠️ fetch_from_saral_data: serp_data is None or not a dict")
119
+ return [], [] # return empty lists safely
120
+
121
+ results = []
122
+ remaining = []
123
+ one_month_ago = datetime.now() - timedelta(days=30)
124
+
125
+ serp_json = {}
126
+ for idx, result in enumerate(serp_data.get("organic_results", []), start=1):
127
+ link = result.get("link")
128
+ if link and ("linkedin.com/in/" in link or "in.linkedin.com/in/" in link):
129
+ clean_link = link.replace("in.linkedin.com", "linkedin.com")
130
+ serp_json[idx] = clean_link
131
+
132
+ # create a fresh cursor
133
+ with conn.cursor() as cur:
134
+ for link in serp_json.values():
135
+ cur.execute("""
136
+ SELECT name, location, email, linkedin_url, headline, skills, about, experience, profile_pic, is_complete, created_at
137
+ FROM saral_data
138
+ WHERE linkedin_url = %s AND created_at >= %s
139
+
140
+ """, (link, one_month_ago))
141
+
142
+ row = cur.fetchone()
143
+ if row:
144
+ results.append({
145
+ "fullName": row[0] if row[0] else "Unknown",
146
+ "addressWithCountry": row[1] if row[1] else "Unknown",
147
+ "email": row[2] if row[2] else "-",
148
+ "linkedinUrl": row[3] if row[3] else "-",
149
+ "headline": row[4] if row[4] else "-",
150
+ "skills": row[5] if row[5] else [],
151
+ "about": row[6] if row[6] else "",
152
+ "experiences": row[7] if row[7] else [],
153
+ "profilePic": row[8] if row[8] else None,
154
+ "is_complete": row[9],
155
+ "created_at": row[10]
156
+ })
157
+
158
+
159
+ else:
160
+ remaining.append(link)
161
+
162
+ return results, remaining
163
+
164
+
165
+ def store_prompt(conn, prompt: str, parsed_json: dict):
166
+ job_title = parsed_json.get("job_title")
167
+ skills = parsed_json.get("skills", [])
168
+ experience = parsed_json.get("experience")
169
+ location = parsed_json.get("location", [])
170
+ work_preference = parsed_json.get("work_preference")
171
+ job_type = parsed_json.get("job_type")
172
+ is_indian = parsed_json.get("is_indian")
173
+
174
+ try:
175
+ with conn.cursor() as cur:
176
+ cur.execute("""
177
+ INSERT INTO saral_prompts
178
+ (prompt, job_title, skills, experience, location, work_preference, job_type, created_at,is_indian)
179
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s,%s)
180
+ """, (
181
+ prompt,
182
+ job_title,
183
+ json.dumps(skills) if skills else None, # ensure proper type
184
+ experience,
185
+ location if location else None,
186
+ work_preference,
187
+ job_type,
188
+ datetime.now(),
189
+ is_indian
190
+ ))
191
+ conn.commit()
192
+ except Exception as e:
193
+ print("Error inserting prompt:", e)
194
+ conn.rollback()
195
+
196
+
197
+
198
+
199
+ try:
200
+ conn = psycopg2.connect(
201
+ host=hostname, dbname=database, user=username, password=pwd, port=port_id
202
+ )
203
+
204
+ cur = conn.cursor()
205
+
206
+ create_script = """
207
+ CREATE TABLE IF NOT EXISTS saral_data (
208
+ id SERIAL PRIMARY KEY,
209
+ name TEXT,
210
+ location TEXT,
211
+ email TEXT,
212
+ linkedin_url TEXT,
213
+ headline TEXT,
214
+ skills JSONB,
215
+ about TEXT,
216
+ experience JSONB,
217
+ profile_pic TEXT,
218
+ is_complete BOOLEAN,
219
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
220
+ );
221
+ """
222
+
223
+ # cur.execute(create_script)
224
+
225
+
226
+
227
+
228
+ conn.commit()
229
+
230
+
231
+
232
+
233
+ except Exception as error:
234
+ print(error)
235
+
236
+
237
+ finally:
238
+ # if cur is not None:
239
+ # cur.close()
240
+ # if conn is not None:
241
+ # conn.close()
242
+ pass
243
+
244
+
245
+
246
+
247
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ streamlit
3
+ python-dotenv
4
+ openai
5
+ apify-client
6
+ psycopg2-binary
7
+ requests
8
+ gunicorn
saral-ai.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import re
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from openai import AzureOpenAI
6
+ import json
7
+ from nlp_parsed import parse_recruiter_query,prompt_enhancer
8
+ from SERP import query_making, serp_api_call
9
+ from apify import apify_call
10
+ from validate import validate_function, score_candidates
11
+ from postgres_db import fetch_from_saral_data, check_completeness, data_input, cur, conn, store_prompt
12
+
13
+
14
+ st.set_page_config(page_title="LinkedIn Recruiter Assistant", page_icon="🎯")
15
+
16
+
17
+ if "parsed_data" not in st.session_state:
18
+ st.session_state.parsed_data = {}
19
+
20
+
21
+ if "matched_results" not in st.session_state:
22
+ st.session_state.matched_results = []
23
+ if "unmatched_results" not in st.session_state:
24
+ st.session_state.unmatched_results = []
25
+
26
+
27
+ if "progress_placeholder" not in st.session_state:
28
+ st.session_state.progress_placeholder = None
29
+ if "progress" not in st.session_state:
30
+ st.session_state.progress = None
31
+
32
+
33
+ if "current_page" not in st.session_state:
34
+ st.session_state.current_page = 0
35
+ if "run_search" not in st.session_state:
36
+ st.session_state.run_search = False
37
+
38
+
39
+ if "user_input" not in st.session_state:
40
+ st.session_state.user_input = ""
41
+
42
+
43
+ st.header("Saral AI")
44
+
45
+ user_input = st.text_area(
46
+ "Enter your query here:",
47
+ placeholder="Enter your query here",
48
+ key="user_input_box",
49
+ value=st.session_state.user_input # always pull from session_state
50
+ )
51
+
52
+
53
+ st.session_state.user_input = user_input
54
+
55
+
56
+ # Show query parsing immediately (live preview)
57
+ if user_input.strip():
58
+ parsed_data = parse_recruiter_query(user_input)
59
+
60
+
61
+ st.session_state.parsed_data = parsed_data
62
+
63
+ if "error" in parsed_data:
64
+ st.error(parsed_data["error"])
65
+ elif parsed_data.get("is_indian") == False:
66
+ print("Our platform is not allowing search for out of india")
67
+ else:
68
+ with st.expander("Query", expanded=True):
69
+ col1, col2 = st.columns([1, 1])
70
+ with col1:
71
+ st.markdown(f'**Job Title:** {parsed_data.get("job_title", "None")}')
72
+ st.markdown(f'**Skills:** {parsed_data.get("skills", "None")}')
73
+ st.markdown(
74
+ f'**Experience:** {parsed_data.get("experience","None")} years of Experience'
75
+ )
76
+ st.markdown(f'is_indian :{parsed_data.get("is_indian","None")}')
77
+ with col2:
78
+ st.markdown(f'**Location:** {parsed_data.get("location", "None")}')
79
+ st.markdown(
80
+ f'**Work Preference:** {parsed_data.get("work_preference", "None")}'
81
+ )
82
+ st.markdown(f'**Job Type:** {parsed_data.get("job_type", "None")}')
83
+
84
+
85
+
86
+ # Enhance prompt button
87
+ if st.button("Enhance Prompt", use_container_width=True):
88
+ enhanced = prompt_enhancer(st.session_state.user_input)
89
+
90
+ # Store only in your own session_state variable
91
+ st.session_state.user_input = enhanced
92
+
93
+ # force rerun so text_area shows updated text
94
+ # st.experimental_rerun()
95
+
96
+
97
+
98
+ # Only fetch SERP + Apify when button clicked
99
+ if st.button(
100
+ "Enter",
101
+ use_container_width=True,
102
+ disabled=(parsed_data.get("is_indian") is False) # disable only if explicitly False
103
+ ):
104
+ st.session_state.current_page = 0 # reset pagination
105
+ st.session_state.run_search = True
106
+
107
+
108
+ if st.session_state.run_search:
109
+ if not user_input.strip():
110
+ st.warning("Please enter a valid query.")
111
+ st.stop()
112
+
113
+
114
+
115
+ store_prompt(conn,user_input,parsed_data)
116
+
117
+ # Progress bar
118
+ st.session_state.progress_placeholder = st.empty()
119
+ st.session_state.progress = st.session_state.progress_placeholder.progress(0)
120
+ status = st.empty()
121
+
122
+ if user_input.strip() and "error" not in parsed_data:
123
+ query, location = query_making(parsed_data) # getting query like https:://linkedin.com --- AND location list
124
+
125
+ print(query)
126
+
127
+
128
+ ### pagination concept
129
+
130
+ if st.session_state.current_page >= 0 :
131
+ results_per_page = 10
132
+ start = st.session_state.current_page * results_per_page
133
+
134
+ serp_data = serp_api_call(
135
+ query,
136
+ start= start,
137
+ results_per_page=10
138
+ )
139
+
140
+
141
+ saral_data, remain_urls = fetch_from_saral_data(serp_data, conn)
142
+
143
+ print(remain_urls)
144
+
145
+
146
+ st.session_state.progress.progress(30)
147
+
148
+ serp_json = {}
149
+
150
+ apify_json = {}
151
+
152
+ if len(remain_urls) >= 1:
153
+ for idx, i in enumerate(remain_urls,start=1):
154
+ serp_json[idx] = i
155
+
156
+ apify_json = apify_call(serp_json)
157
+ st.session_state.progress.progress(70)
158
+
159
+
160
+ if apify_json:
161
+ total_candidates = saral_data + apify_json
162
+
163
+ else:
164
+ total_candidates = saral_data
165
+
166
+ data_input(total_candidates)
167
+
168
+ # Validate funciton (location)
169
+ matched, unmatched = validate_function(location, total_candidates)
170
+ st.session_state.progress.progress(70)
171
+
172
+
173
+
174
+ matched = score_candidates(parsed_data , matched)
175
+
176
+ st.session_state.matched_results = matched
177
+ st.session_state.unmatched_results = unmatched
178
+
179
+ st.session_state.progress.progress(100)
180
+ st.session_state.progress_placeholder.empty()
181
+ st.session_state.progress = None
182
+ st.session_state.progress_placeholder = None
183
+
184
+ else:
185
+ st.warning("Please enter a valid query.")
186
+
187
+
188
+ if st.session_state.matched_results:
189
+
190
+ # length of Matched and unmatched profiles
191
+ col1, col2 = st.columns([1, 1])
192
+ with col1:
193
+ st.success(f"Matched Profiles: {len(st.session_state.matched_results)}")
194
+ with col2:
195
+ st.warning(f"Unmatched Profiles: {len(st.session_state.unmatched_results)}")
196
+
197
+
198
+
199
+
200
+ col1, col2, col3 = st.columns([1,2,1])
201
+ with col1:
202
+ if st.button("< Previous") and st.session_state.current_page > 0:
203
+ st.session_state.current_page -= 1
204
+ st.session_state.run_search = True
205
+
206
+ with col2:
207
+ st.write(f'Current Page {st.session_state.current_page + 1}')
208
+
209
+ with col3:
210
+ if st.button("Next >"):
211
+ st.session_state.current_page += 1
212
+ st.session_state.run_search = True
213
+
214
+
215
+ st.subheader("Candidates Profiles")
216
+ for idx, profiles in enumerate(st.session_state.matched_results, start=1):
217
+ with st.expander(f"{idx}. {profiles.get('fullName', 'Unknown')}"):
218
+ st.json(profiles)
219
+
220
+ with st.expander(
221
+ f"{idx}. {profiles.get('fullName', 'Unknown')} • Score: {profiles.get('score','None')} ", expanded=True
222
+ ):
223
+ col1, col2 = st.columns([1, 2])
224
+ with col1:
225
+ image = profiles.get("profilePic")
226
+
227
+ temp_image = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRDVO09x_DXK3p4Mt1j08Ab0R875TdhsDcG2A&s"
228
+
229
+ if profiles.get("profilePic"):
230
+ st.image(profiles.get("profilePic"), width=150)
231
+ else:
232
+ st.image(temp_image, width=150)
233
+
234
+ st.markdown(f"**Location:** {profiles.get('addressWithCountry','-')}")
235
+ st.markdown(f"**Email:** {profiles.get('email','None')}")
236
+
237
+
238
+ experiences = profiles.get("experiences", [])
239
+ open_to_work = True # default
240
+
241
+ for exp in experiences:
242
+ caption = exp.get("caption", "")
243
+ if "Present" in caption: # if still working
244
+ open_to_work = False
245
+ break
246
+
247
+ st.markdown(f"**Open to Work:** {'False' if not open_to_work else 'True'}")
248
+
249
+ st.markdown(
250
+ f"**LinkedIn:** [LinkedIn]({profiles.get('linkedinUrl','')})"
251
+ )
252
+
253
+
254
+ with col2:
255
+ st.markdown(f"### {profiles.get('fullName')}")
256
+ if profiles.get("headline"):
257
+ st.markdown(f"*{profiles.get('headline')}*")
258
+
259
+ skills_raw = profiles.get("skills", [])
260
+ skill_titles = [
261
+ s.get("title")
262
+ for s in skills_raw
263
+ if isinstance(s, dict) and "title" in s
264
+ ]
265
+ if skill_titles:
266
+ st.markdown("**Skills:** " + " • ".join(skill_titles[:10]))
267
+
268
+ if profiles.get("about"):
269
+ about = profiles.get("about")
270
+ st.markdown(
271
+ "**About:** " + (about[:250] + "..." if len(about) > 250 else about)
272
+ )
273
+
274
+ if profiles.get("experiences"):
275
+ st.markdown("**Experience**")
276
+ for exp in profiles["experiences"]:
277
+ title = exp.get("title", "")
278
+ subtitle = exp.get("subtitle") or exp.get("metadata", "")
279
+ caption = exp.get("caption", "")
280
+
281
+ # Print main line
282
+ st.write(f"• {title} at {subtitle} — {caption}")
283
+
284
+ # Print description if available
285
+ if exp.get("description"):
286
+ for desc in exp["description"]:
287
+ if isinstance(desc, dict) and "text" in desc:
288
+ st.markdown(f" - {desc['text']}")
289
+
290
+ if profiles.get("is_complete"):
291
+ st.markdown(f'{profiles.get("is_complete")}')
292
+
293
+ if st.session_state.unmatched_results:
294
+ st.subheader("Unmatched Profiles")
295
+ for idx, profiles in enumerate(st.session_state.unmatched_results, start=1):
296
+
297
+ st.markdown(
298
+ f"{idx}, {profiles.get('fullName', 'Unknown')} - {profiles.get('addressWithCountry', 'Unknown')} [LINKEDIN]({profiles.get('linkedinUrl', 'Unknown')})"
299
+ )
300
+
301
+
saral_ai_api.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, session
2
+ import re
3
+ import os
4
+ import json
5
+ import traceback
6
+ from dotenv import load_dotenv
7
+ from openai import AzureOpenAI
8
+
9
+ # Import your custom modules (make sure these are available)
10
+ try:
11
+ from nlp_parsed import parse_recruiter_query, prompt_enhancer
12
+ from SERP import query_making, serp_api_call
13
+ from apify import apify_call
14
+ from validate import validate_function, score_candidates
15
+ from postgres_db import fetch_from_saral_data, check_completeness, data_input, cur, conn, store_prompt
16
+ MODULES_AVAILABLE = True
17
+ except ImportError as e:
18
+ print(f"Warning: Some modules not available: {e}")
19
+ MODULES_AVAILABLE = False
20
+
21
+ load_dotenv()
22
+
23
+ app = Flask(__name__)
24
+ app.secret_key = os.getenv('SECRET_KEY', 'your-secret-key-here')
25
+
26
+ # Mock functions for when modules aren't available (for testing)
27
+ def mock_parse_recruiter_query(query):
28
+ return {
29
+ "job_title": "Software Engineer",
30
+ "skills": ["Python", "Flask"],
31
+ "experience": "3-5",
32
+ "location": "Mumbai",
33
+ "work_preference": "Remote",
34
+ "job_type": "Full-time",
35
+ "is_indian": True
36
+ }
37
+
38
+ def mock_prompt_enhancer(prompt):
39
+ return f"Enhanced: {prompt} - Looking for skilled professionals"
40
+
41
+ def mock_query_making(parsed_data):
42
+ return "https://linkedin.com/search", ["Mumbai", "Delhi"]
43
+
44
+ def mock_serp_api_call(query, start=0, results_per_page=10):
45
+ return [f"https://linkedin.com/in/user{i}" for i in range(start, start + results_per_page)]
46
+
47
+ def mock_fetch_from_saral_data(serp_data, conn):
48
+ return [], serp_data # Return empty saral_data, all URLs as remaining
49
+
50
+ def mock_apify_call(serp_json):
51
+ mock_profiles = []
52
+ for i in range(min(5, len(serp_json))):
53
+ mock_profiles.append({
54
+ "fullName": f"John Doe {i+1}",
55
+ "headline": "Software Engineer with 5+ years experience",
56
+ "addressWithCountry": "Mumbai, India",
57
+ "email": f"john{i+1}@example.com",
58
+ "linkedinUrl": f"https://linkedin.com/in/johndoe{i+1}",
59
+ "skills": [{"title": "Python"}, {"title": "Flask"}, {"title": "JavaScript"}],
60
+ "about": "Experienced software developer with expertise in web technologies...",
61
+ "experiences": [
62
+ {
63
+ "title": "Senior Software Engineer",
64
+ "subtitle": "Tech Company",
65
+ "caption": "Jan 2020 - Present",
66
+ "description": [{"text": "Developed web applications using Python and Flask"}]
67
+ }
68
+ ],
69
+ "profilePic": "https://via.placeholder.com/150",
70
+ "is_complete": "Complete Profile"
71
+ })
72
+ return mock_profiles
73
+
74
+ def mock_validate_function(location, candidates):
75
+ # Split candidates into matched and unmatched (80% matched, 20% unmatched)
76
+ split_point = int(len(candidates) * 0.8)
77
+ return candidates[:split_point], candidates[split_point:]
78
+
79
+ def mock_score_candidates(parsed_data, candidates):
80
+ for i, candidate in enumerate(candidates):
81
+ candidate['score'] = round(85 + (i % 15), 1) # Scores between 85-100
82
+ return candidates
83
+
84
+ def mock_data_input(candidates):
85
+ pass
86
+
87
+ def mock_store_prompt(conn, prompt, parsed_data):
88
+ pass
89
+
90
+ @app.route('/')
91
+ def index():
92
+ return render_template('index.html')
93
+
94
+ @app.route('/parse_query', methods=['POST'])
95
+ def parse_query():
96
+ try:
97
+ data = request.json
98
+ user_input = data.get('query', '').strip()
99
+
100
+ if not user_input:
101
+ return jsonify({'error': 'Please enter a valid query'})
102
+
103
+ if MODULES_AVAILABLE:
104
+ parsed_data = parse_recruiter_query(user_input)
105
+ else:
106
+ parsed_data = mock_parse_recruiter_query(user_input)
107
+
108
+ return jsonify({'success': True, 'parsed_data': parsed_data})
109
+
110
+ except Exception as e:
111
+ return jsonify({'error': f'Error parsing query: {str(e)}'})
112
+
113
+ @app.route('/enhance_prompt', methods=['POST'])
114
+ def enhance_prompt():
115
+ try:
116
+ data = request.json
117
+ user_input = data.get('query', '').strip()
118
+
119
+ if not user_input:
120
+ return jsonify({'error': 'Please enter a valid query'})
121
+
122
+ if MODULES_AVAILABLE:
123
+ enhanced = prompt_enhancer(user_input)
124
+ else:
125
+ enhanced = mock_prompt_enhancer(user_input)
126
+
127
+ return jsonify({'success': True, 'enhanced_query': enhanced})
128
+
129
+ except Exception as e:
130
+ return jsonify({'error': f'Error enhancing prompt: {str(e)}'})
131
+
132
+ @app.route('/search', methods=['POST'])
133
+ def search():
134
+ try:
135
+ data = request.json
136
+ user_input = data.get('query', '').strip()
137
+ current_page = data.get('page', 0)
138
+
139
+ if not user_input:
140
+ return jsonify({'error': 'Please enter a valid query'})
141
+
142
+ # Parse query
143
+ if MODULES_AVAILABLE:
144
+ parsed_data = parse_recruiter_query(user_input)
145
+ print(parsed_data)
146
+ else:
147
+ parsed_data = mock_parse_recruiter_query(user_input)
148
+ print(parsed_data)
149
+
150
+ if "error" in parsed_data:
151
+ return jsonify({'error': parsed_data["error"]})
152
+
153
+ if parsed_data.get("is_indian") == False:
154
+ return jsonify({'error': 'Our platform is not allowing search outside of India'})
155
+
156
+ # Store prompt
157
+ if MODULES_AVAILABLE:
158
+ store_prompt(conn, user_input, parsed_data)
159
+ else:
160
+ mock_store_prompt(None, user_input, parsed_data)
161
+
162
+ # Get query and location
163
+ if MODULES_AVAILABLE:
164
+ query, location = query_making(parsed_data)
165
+ print(query)
166
+
167
+ else:
168
+ query, location = mock_query_making(parsed_data)
169
+ print(query)
170
+
171
+
172
+ # Pagination
173
+ results_per_page = 10
174
+ start = current_page * results_per_page
175
+
176
+ # Get SERP data
177
+ if MODULES_AVAILABLE:
178
+ serp_data = serp_api_call(query, start=start, results_per_page=results_per_page)
179
+ saral_data, remain_urls = fetch_from_saral_data(serp_data, conn)
180
+ else:
181
+ serp_data = mock_serp_api_call(query, start=start, results_per_page=results_per_page)
182
+ saral_data, remain_urls = mock_fetch_from_saral_data(serp_data, None)
183
+
184
+ # Process remaining URLs with Apify
185
+ apify_json = {}
186
+ if len(remain_urls) >= 1:
187
+ serp_json = {idx: url for idx, url in enumerate(remain_urls, start=1)}
188
+
189
+ if MODULES_AVAILABLE:
190
+ apify_json = apify_call(serp_json)
191
+ else:
192
+ apify_json = mock_apify_call(serp_json)
193
+
194
+ # Combine data
195
+ if apify_json:
196
+ total_candidates = saral_data + apify_json
197
+ else:
198
+ total_candidates = saral_data
199
+
200
+ # Store data
201
+ if MODULES_AVAILABLE:
202
+ data_input(total_candidates)
203
+ else:
204
+ mock_data_input(total_candidates)
205
+
206
+ # Validate and score
207
+ if MODULES_AVAILABLE:
208
+ matched, unmatched = validate_function(location, total_candidates)
209
+ matched = score_candidates(parsed_data, matched)
210
+ else:
211
+ matched, unmatched = mock_validate_function(location, total_candidates)
212
+ matched = mock_score_candidates(parsed_data, matched)
213
+
214
+ return jsonify({
215
+ 'success': True,
216
+ 'parsed_data': parsed_data,
217
+ 'matched_results': matched,
218
+ 'unmatched_results': unmatched,
219
+ 'current_page': current_page
220
+ })
221
+
222
+ except Exception as e:
223
+ print(f"Error in search: {traceback.format_exc()}")
224
+ return jsonify({'error': f'Search error: {str(e)}'})
225
+
226
+ # if __name__ == '__main__':
227
+ # # Ensure templates directory exists
228
+ # if not os.path.exists('templates'):
229
+ # os.makedirs('templates')
230
+
231
+ # app.run(debug=True, host='0.0.0.0', port=5000)
templates/index.html ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Saral AI - LinkedIn Recruiter Assistant</title>
7
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.2/css/bootstrap.min.css" rel="stylesheet">
8
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" rel="stylesheet">
9
+ <!-- <link rel="icon" type="image/x-icon" href="{{ url_for('static', filename='favicon.ico') }}"> -->
10
+ <style>
11
+ :root {
12
+ --primary-color: #0077b5;
13
+ --secondary-color: #00a0dc;
14
+ --success-color: #28a745;
15
+ --warning-color: #ffc107;
16
+ --error-color: #dc3545;
17
+ --dark-color: #2c3e50;
18
+ --light-bg: #f8f9fa;
19
+ }
20
+
21
+ body {
22
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
23
+ min-height: 100vh;
24
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
25
+ }
26
+
27
+ .main-container {
28
+ background: white;
29
+ margin: 20px auto;
30
+ border-radius: 20px;
31
+ box-shadow: 0 15px 35px rgba(0,0,0,0.1);
32
+ overflow: hidden;
33
+ }
34
+
35
+ .header {
36
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
37
+ color: white;
38
+ padding: 30px;
39
+ text-align: center;
40
+ }
41
+
42
+ .header h1 {
43
+ margin: 0;
44
+ font-size: 2.5rem;
45
+ font-weight: bold;
46
+ }
47
+
48
+ .content-area {
49
+ padding: 40px;
50
+ }
51
+
52
+ .query-input {
53
+ border: 2px solid #e9ecef;
54
+ border-radius: 10px;
55
+ padding: 15px;
56
+ font-size: 16px;
57
+ transition: all 0.3s ease;
58
+ min-height: 120px;
59
+ resize: vertical;
60
+ }
61
+
62
+ .query-input:focus {
63
+ border-color: var(--primary-color);
64
+ box-shadow: 0 0 0 0.2rem rgba(0,119,181,0.25);
65
+ }
66
+
67
+ .btn-custom {
68
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
69
+ border: none;
70
+ padding: 12px 30px;
71
+ border-radius: 25px;
72
+ color: white;
73
+ font-weight: 600;
74
+ transition: all 0.3s ease;
75
+ margin: 5px;
76
+ }
77
+
78
+ .btn-custom:hover {
79
+ transform: translateY(-2px);
80
+ box-shadow: 0 5px 15px rgba(0,119,181,0.4);
81
+ color: white;
82
+ }
83
+
84
+ .btn-secondary-custom {
85
+ background: linear-gradient(135deg, #6c757d, #495057);
86
+ border: none;
87
+ padding: 10px 25px;
88
+ border-radius: 20px;
89
+ color: white;
90
+ font-weight: 500;
91
+ transition: all 0.3s ease;
92
+ margin: 5px;
93
+ }
94
+
95
+ .btn-secondary-custom:hover {
96
+ transform: translateY(-2px);
97
+ box-shadow: 0 5px 15px rgba(108,117,125,0.4);
98
+ color: white;
99
+ }
100
+
101
+ .query-display {
102
+ background: var(--light-bg);
103
+ border-radius: 15px;
104
+ padding: 25px;
105
+ margin: 20px 0;
106
+ border-left: 5px solid var(--primary-color);
107
+ }
108
+
109
+ .profile-card {
110
+ background: white;
111
+ border-radius: 15px;
112
+ padding: 25px;
113
+ margin: 15px 0;
114
+ box-shadow: 0 5px 15px rgba(0,0,0,0.08);
115
+ border: 1px solid #e9ecef;
116
+ transition: all 0.3s ease;
117
+ }
118
+
119
+ .profile-card:hover {
120
+ transform: translateY(-5px);
121
+ box-shadow: 0 10px 25px rgba(0,0,0,0.15);
122
+ }
123
+
124
+ .profile-image {
125
+ width: 120px;
126
+ height: 120px;
127
+ border-radius: 50%;
128
+ object-fit: cover;
129
+ border: 4px solid var(--primary-color);
130
+ }
131
+
132
+ .skills-tag {
133
+ display: inline-block;
134
+ background: var(--primary-color);
135
+ color: white;
136
+ padding: 5px 12px;
137
+ margin: 3px;
138
+ border-radius: 15px;
139
+ font-size: 12px;
140
+ font-weight: 500;
141
+ }
142
+
143
+ .experience-item {
144
+ background: #f8f9fa;
145
+ padding: 15px;
146
+ margin: 10px 0;
147
+ border-radius: 10px;
148
+ border-left: 4px solid var(--secondary-color);
149
+ }
150
+
151
+ .score-badge {
152
+ background: linear-gradient(135deg, var(--success-color), #20c997);
153
+ color: white;
154
+ padding: 8px 16px;
155
+ border-radius: 20px;
156
+ font-weight: bold;
157
+ font-size: 14px;
158
+ }
159
+
160
+ .loading-spinner {
161
+ display: none;
162
+ text-align: center;
163
+ padding: 20px;
164
+ }
165
+
166
+ .stats-card {
167
+ background: linear-gradient(135deg, #28a745, #20c997);
168
+ color: white;
169
+ padding: 20px;
170
+ border-radius: 15px;
171
+ text-align: center;
172
+ margin: 10px 0;
173
+ }
174
+
175
+ .pagination-controls {
176
+ display: flex;
177
+ justify-content: center;
178
+ align-items: center;
179
+ margin: 30px 0;
180
+ gap: 15px;
181
+ }
182
+
183
+ .error-message {
184
+ background: #f8d7da;
185
+ border: 1px solid #f5c6cb;
186
+ color: #721c24;
187
+ padding: 15px;
188
+ border-radius: 10px;
189
+ margin: 20px 0;
190
+ }
191
+
192
+ .success-message {
193
+ background: #d4edda;
194
+ border: 1px solid #c3e6cb;
195
+ color: #155724;
196
+ padding: 15px;
197
+ border-radius: 10px;
198
+ margin: 20px 0;
199
+ }
200
+
201
+ .unmatched-list {
202
+ background: #fff3cd;
203
+ border: 1px solid #ffeaa7;
204
+ padding: 20px;
205
+ border-radius: 15px;
206
+ margin-top: 30px;
207
+ }
208
+
209
+ .progress-bar-custom {
210
+ background: linear-gradient(90deg, var(--primary-color), var(--secondary-color));
211
+ height: 20px;
212
+ border-radius: 10px;
213
+ transition: width 0.3s ease;
214
+ }
215
+
216
+ @media (max-width: 768px) {
217
+ .content-area {
218
+ padding: 20px;
219
+ }
220
+ .header h1 {
221
+ font-size: 2rem;
222
+ }
223
+ .profile-card {
224
+ padding: 15px;
225
+ }
226
+ }
227
+ </style>
228
+ </head>
229
+ <body>
230
+ <div class="container-fluid">
231
+ <div class="main-container">
232
+ <div class="header">
233
+ <h1><i class="fas fa-search"></i> Saral AI</h1>
234
+ <p class="mb-0">LinkedIn Recruiter Assistant</p>
235
+ </div>
236
+
237
+ <div class="content-area">
238
+ <div class="row">
239
+ <div class="col-12">
240
+ <div class="mb-4">
241
+ <label for="queryInput" class="form-label h5">Enter your recruitment query:</label>
242
+ <textarea
243
+ id="queryInput"
244
+ class="form-control query-input"
245
+ placeholder="e.g., Looking for Python developers with 3-5 years experience in Mumbai..."
246
+ rows="4"></textarea>
247
+ </div>
248
+
249
+ <div class="text-center mb-4">
250
+ <button class="btn btn-secondary-custom" onclick="enhancePrompt()">
251
+ <i class="fas fa-magic"></i> Enhance Prompt
252
+ </button>
253
+ <button class="btn btn-custom" onclick="searchCandidates()" id="searchBtn">
254
+ <i class="fas fa-play"></i> Enter
255
+ </button>
256
+ </div>
257
+
258
+ <div class="loading-spinner" id="loadingSpinner">
259
+ <div class="spinner-border text-primary" role="status">
260
+ <span class="visually-hidden">Loading...</span>
261
+ </div>
262
+ <p class="mt-2">Searching for candidates...</p>
263
+ <div class="progress mt-3" style="height: 20px;">
264
+ <div class="progress-bar progress-bar-custom" id="progressBar"
265
+ role="progressbar" style="width: 0%"></div>
266
+ </div>
267
+ </div>
268
+
269
+ <div id="errorMessage" class="error-message" style="display: none;"></div>
270
+ <div id="successMessage" class="success-message" style="display: none;"></div>
271
+
272
+ <div id="queryDisplay" class="query-display" style="display: none;">
273
+ <h5><i class="fas fa-info-circle"></i> Parsed Query Information</h5>
274
+ <div class="row" id="queryDetails"></div>
275
+ </div>
276
+
277
+ <div id="resultsStats" style="display: none;">
278
+ <div class="row">
279
+ <div class="col-md-6">
280
+ <div class="stats-card">
281
+ <h3 id="matchedCount">0</h3>
282
+ <p class="mb-0">Matched Profiles</p>
283
+ </div>
284
+ </div>
285
+ <div class="col-md-6">
286
+ <div class="stats-card" style="background: linear-gradient(135deg, #ffc107, #fd7e14);">
287
+ <h3 id="unmatchedCount">0</h3>
288
+ <p class="mb-0">Unmatched Profiles</p>
289
+ </div>
290
+ </div>
291
+ </div>
292
+ </div>
293
+
294
+ <div class="pagination-controls" id="paginationControls" style="display: none;">
295
+ <button class="btn btn-secondary-custom" onclick="previousPage()" id="prevBtn">
296
+ <i class="fas fa-chevron-left"></i> Previous
297
+ </button>
298
+ <span id="pageInfo" class="mx-3 fw-bold">Page 1</span>
299
+ <button class="btn btn-secondary-custom" onclick="nextPage()" id="nextBtn">
300
+ Next <i class="fas fa-chevron-right"></i>
301
+ </button>
302
+ </div>
303
+
304
+ <div id="candidateResults"></div>
305
+
306
+ <div id="unmatchedResults" class="unmatched-list" style="display: none;">
307
+ <h5><i class="fas fa-exclamation-triangle"></i> Unmatched Profiles</h5>
308
+ <div id="unmatchedList"></div>
309
+ </div>
310
+ </div>
311
+ </div>
312
+ </div>
313
+ </div>
314
+ </div>
315
+
316
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.2/js/bootstrap.bundle.min.js"></script>
317
+ <script>
318
+ let currentPage = 0;
319
+ let currentQuery = '';
320
+ let currentResults = {
321
+ matched: [],
322
+ unmatched: [],
323
+ parsed_data: {}
324
+ };
325
+
326
+ function showError(message) {
327
+ const errorDiv = document.getElementById('errorMessage');
328
+ errorDiv.innerHTML = `<i class="fas fa-exclamation-triangle"></i> ${message}`;
329
+ errorDiv.style.display = 'block';
330
+ document.getElementById('successMessage').style.display = 'none';
331
+ }
332
+
333
+ function showSuccess(message) {
334
+ const successDiv = document.getElementById('successMessage');
335
+ successDiv.innerHTML = `<i class="fas fa-check-circle"></i> ${message}`;
336
+ successDiv.style.display = 'block';
337
+ document.getElementById('errorMessage').style.display = 'none';
338
+ }
339
+
340
+ function hideMessages() {
341
+ document.getElementById('errorMessage').style.display = 'none';
342
+ document.getElementById('successMessage').style.display = 'none';
343
+ }
344
+
345
+ function showLoading() {
346
+ document.getElementById('loadingSpinner').style.display = 'block';
347
+ document.getElementById('searchBtn').disabled = true;
348
+ }
349
+
350
+ function hideLoading() {
351
+ document.getElementById('loadingSpinner').style.display = 'none';
352
+ document.getElementById('searchBtn').disabled = false;
353
+ }
354
+
355
+ function updateProgress(percentage) {
356
+ document.getElementById('progressBar').style.width = percentage + '%';
357
+ }
358
+
359
+ async function enhancePrompt() {
360
+ const query = document.getElementById('queryInput').value.trim();
361
+ if (!query) {
362
+ showError('Please enter a query first');
363
+ return;
364
+ }
365
+
366
+ try {
367
+ const response = await fetch('/enhance_prompt', {
368
+ method: 'POST',
369
+ headers: {
370
+ 'Content-Type': 'application/json'
371
+ },
372
+ body: JSON.stringify({ query })
373
+ });
374
+
375
+ const data = await response.json();
376
+ if (data.success) {
377
+ document.getElementById('queryInput').value = data.enhanced_query;
378
+ showSuccess('Prompt enhanced successfully!');
379
+ } else {
380
+ showError(data.error || 'Failed to enhance prompt');
381
+ }
382
+ } catch (error) {
383
+ showError('Error enhancing prompt: ' + error.message);
384
+ }
385
+ }
386
+
387
+ function displayParsedQuery(parsedData) {
388
+ const queryDetails = document.getElementById('queryDetails');
389
+ queryDetails.innerHTML = `
390
+ <div class="col-md-6">
391
+ <p><strong>Job Title:</strong> ${parsedData.job_title || 'None'}</p>
392
+ <p><strong>Skills:</strong> ${Array.isArray(parsedData.skills) ? parsedData.skills.join(', ') : parsedData.skills || 'None'}</p>
393
+ <p><strong>Experience:</strong> ${parsedData.experience || 'None'} years</p>
394
+ <p><strong>Indian Candidate:</strong> ${parsedData.is_indian ? 'Yes' : 'No'}</p>
395
+ </div>
396
+ <div class="col-md-6">
397
+ <p><strong>Location:</strong> ${parsedData.location || 'None'}</p>
398
+ <p><strong>Work Preference:</strong> ${parsedData.work_preference || 'None'}</p>
399
+ <p><strong>Job Type:</strong> ${parsedData.job_type || 'None'}</p>
400
+ </div>
401
+ `;
402
+ document.getElementById('queryDisplay').style.display = 'block';
403
+ }
404
+
405
+ async function searchCandidates() {
406
+ const query = document.getElementById('queryInput').value.trim();
407
+ if (!query) {
408
+ showError('Please enter a query first');
409
+ return;
410
+ }
411
+
412
+ currentQuery = query;
413
+ currentPage = 0; // Reset to first page
414
+ showLoading();
415
+ hideMessages();
416
+ updateProgress(0);
417
+
418
+ try {
419
+ // First parse the query
420
+ updateProgress(10);
421
+ const parseResponse = await fetch('/parse_query', {
422
+ method: 'POST',
423
+ headers: {
424
+ 'Content-Type': 'application/json'
425
+ },
426
+ body: JSON.stringify({ query })
427
+ });
428
+
429
+ const parseData = await parseResponse.json();
430
+ if (!parseData.success) {
431
+ throw new Error(parseData.error || 'Failed to parse query');
432
+ }
433
+
434
+ // Check if query is for Indian candidates
435
+ if (parseData.parsed_data.is_indian === false) {
436
+ throw new Error('Our platform only supports searches for candidates in India');
437
+ }
438
+
439
+ displayParsedQuery(parseData.parsed_data);
440
+ updateProgress(30);
441
+
442
+ // Now search for candidates
443
+ const response = await fetch('/search', {
444
+ method: 'POST',
445
+ headers: {
446
+ 'Content-Type': 'application/json'
447
+ },
448
+ body: JSON.stringify({
449
+ query: query,
450
+ parsed_data: parseData.parsed_data,
451
+ page: currentPage
452
+ })
453
+ });
454
+
455
+ updateProgress(70);
456
+
457
+ const data = await response.json();
458
+ if (data.success) {
459
+ currentResults = data;
460
+ displayResults(data);
461
+ updateProgress(100);
462
+ showSuccess(`Search completed! Found ${data.matched_results.length} matched profiles.`);
463
+ } else {
464
+ throw new Error(data.error || 'Search failed');
465
+ }
466
+ } catch (error) {
467
+ showError('Error during search: ' + error.message);
468
+ } finally {
469
+ hideLoading();
470
+ }
471
+ }
472
+
473
+ function displayResults(data) {
474
+ // Display stats
475
+ document.getElementById('matchedCount').textContent = data.matched_results.length;
476
+ document.getElementById('unmatchedCount').textContent = data.unmatched_results.length;
477
+ document.getElementById('resultsStats').style.display = 'block';
478
+
479
+ // Display pagination
480
+ updatePagination();
481
+
482
+ // Display matched results
483
+ displayCandidates(data.matched_results);
484
+
485
+ // Display unmatched results
486
+ displayUnmatchedCandidates(data.unmatched_results);
487
+ }
488
+
489
+ function updatePagination() {
490
+ document.getElementById('pageInfo').textContent = `Page ${currentPage + 1}`;
491
+ document.getElementById('paginationControls').style.display = 'flex';
492
+ document.getElementById('prevBtn').disabled = currentPage === 0;
493
+ }
494
+
495
+ function displayCandidates(candidates) {
496
+ const resultsDiv = document.getElementById('candidateResults');
497
+ if (!candidates || candidates.length === 0) {
498
+ resultsDiv.innerHTML = '<div class="text-center"><h5>No matched candidates found</h5></div>';
499
+ return;
500
+ }
501
+
502
+ let html = '<h4><i class="fas fa-users"></i> Candidate Profiles</h4>';
503
+
504
+ candidates.forEach((candidate, index) => {
505
+ const skills = Array.isArray(candidate.skills)
506
+ ? candidate.skills.map(s => typeof s === 'object' ? s.title : s).slice(0, 10)
507
+ : [];
508
+
509
+ const experiences = candidate.experiences || [];
510
+ const isOpenToWork = !experiences.some(exp =>
511
+ exp.caption && exp.caption.includes('Present')
512
+ );
513
+
514
+ const defaultImage = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRDVO09x_DXK3p4Mt1j08Ab0R875TdhsDcG2A&s";
515
+
516
+ html += `
517
+ <div class="profile-card">
518
+ <div class="row">
519
+ <div class="col-md-3 text-center">
520
+ <img src="${candidate.profilePic || defaultImage}"
521
+ alt="Profile" class="profile-image mb-3">
522
+ <div class="score-badge mb-2">
523
+ Score: ${candidate.score || 'N/A'}
524
+ </div>
525
+ <p><strong>Location:</strong><br>${candidate.addressWithCountry || 'N/A'}</p>
526
+ <p><strong>Email:</strong><br>${candidate.email || 'None'}</p>
527
+ <p><strong>Open to Work:</strong><br>${isOpenToWork ? 'True' : 'False'}</p>
528
+ ${candidate.linkedinUrl ? `
529
+ <a href="${candidate.linkedinUrl}" target="_blank" class="btn btn-custom btn-sm">
530
+ <i class="fab fa-linkedin"></i> LinkedIn
531
+ </a>
532
+ ` : ''}
533
+ </div>
534
+ <div class="col-md-9">
535
+ <h4>${candidate.fullName || 'Unknown'}</h4>
536
+ ${candidate.headline ? `<p class="text-muted fst-italic">${candidate.headline}</p>` : ''}
537
+
538
+ ${skills.length > 0 ? `
539
+ <div class="mb-3">
540
+ <strong>Skills:</strong><br>
541
+ ${skills.map(skill => `<span class="skills-tag">${skill}</span>`).join('')}
542
+ </div>
543
+ ` : ''}
544
+
545
+ ${candidate.about ? `
546
+ <div class="mb-3">
547
+ <strong>About:</strong>
548
+ <p>${candidate.about.length > 250 ? candidate.about.substring(0, 250) + '...' : candidate.about}</p>
549
+ </div>
550
+ ` : ''}
551
+
552
+ ${experiences.length > 0 ? `
553
+ <div class="mb-3">
554
+ <strong>Experience:</strong>
555
+ ${experiences.map(exp => `
556
+ <div class="experience-item">
557
+ <strong>${exp.title || ''}</strong> at <strong>${exp.subtitle || exp.metadata || ''}</strong>
558
+ <small class="text-muted d-block">${exp.caption || ''}</small>
559
+ ${exp.description && exp.description.length > 0 ? `
560
+ <ul class="mt-2">
561
+ ${exp.description.map(desc =>
562
+ typeof desc === 'object' && desc.text ?
563
+ `<li>${desc.text}</li>` : ''
564
+ ).join('')}
565
+ </ul>
566
+ ` : ''}
567
+ </div>
568
+ `).join('')}
569
+ </div>
570
+ ` : ''}
571
+
572
+ ${candidate.is_complete ? `
573
+ <div class="text-success">
574
+ <i class="fas fa-check-circle"></i> ${candidate.is_complete}
575
+ </div>
576
+ ` : ''}
577
+ </div>
578
+ </div>
579
+ </div>
580
+ `;
581
+ });
582
+
583
+ resultsDiv.innerHTML = html;
584
+ }
585
+
586
+ function displayUnmatchedCandidates(unmatchedCandidates) {
587
+ const unmatchedDiv = document.getElementById('unmatchedResults');
588
+ const unmatchedList = document.getElementById('unmatchedList');
589
+
590
+ if (!unmatchedCandidates || unmatchedCandidates.length === 0) {
591
+ unmatchedDiv.style.display = 'none';
592
+ return;
593
+ }
594
+
595
+ let html = '';
596
+ unmatchedCandidates.forEach((candidate, index) => {
597
+ html += `
598
+ <p>${index + 1}. ${candidate.fullName || 'Unknown'} -
599
+ ${candidate.addressWithCountry || 'Unknown'}
600
+ ${candidate.linkedinUrl ? `<a href="${candidate.linkedinUrl}" target="_blank">LINKEDIN</a>` : ''}</p>
601
+ `;
602
+ });
603
+
604
+ unmatchedList.innerHTML = html;
605
+ unmatchedDiv.style.display = 'block';
606
+ }
607
+
608
+ async function nextPage() {
609
+ currentPage++;
610
+ await searchPage();
611
+ }
612
+
613
+ async function previousPage() {
614
+ if (currentPage > 0) {
615
+ currentPage--;
616
+ await searchPage();
617
+ }
618
+ }
619
+
620
+ async function searchPage() {
621
+ if (!currentQuery) return;
622
+
623
+ showLoading();
624
+ hideMessages();
625
+ updateProgress(0);
626
+
627
+ try {
628
+ updateProgress(30);
629
+ const response = await fetch('/search', {
630
+ method: 'POST',
631
+ headers: {
632
+ 'Content-Type': 'application/json'
633
+ },
634
+ body: JSON.stringify({
635
+ query: currentQuery,
636
+ page: currentPage
637
+ })
638
+ });
639
+
640
+ updateProgress(70);
641
+
642
+ const data = await response.json();
643
+ if (data.success) {
644
+ currentResults = data;
645
+ displayResults(data);
646
+ updateProgress(100);
647
+ showSuccess(`Page ${currentPage + 1} loaded successfully!`);
648
+ } else {
649
+ throw new Error(data.error || 'Failed to load page');
650
+ }
651
+ } catch (error) {
652
+ showError('Error loading page: ' + error.message);
653
+ // Revert page on error
654
+ currentPage = Math.max(0, currentPage - (event.target.textContent.includes('Next') ? 1 : -1));
655
+ } finally {
656
+ hideLoading();
657
+ }
658
+ }
659
+
660
+ // Auto-parse query as user types (optional feature)
661
+ let parseTimeout;
662
+ document.getElementById('queryInput').addEventListener('input', function() {
663
+ clearTimeout(parseTimeout);
664
+ parseTimeout = setTimeout(() => {
665
+ const query = this.value.trim();
666
+ if (query && query.length > 10) {
667
+ // You can add auto-parsing here if desired
668
+ }
669
+ }, 500);
670
+ });
671
+ </script>
672
+ </body>
673
+ </html>
validate.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def validate_function(location,apify_json):
2
+ locations = [loc.lower().strip() for loc in location] if location else []
3
+
4
+ if not locations:
5
+ locations = ["india"]
6
+
7
+
8
+ match_list = []
9
+ unmatched_list = []
10
+
11
+ for profile in apify_json:
12
+ address = profile.get("addressWithCountry", "")
13
+
14
+ if not address:
15
+ unmatched_list.append(profile) # no address → unmatched
16
+ continue
17
+
18
+ address_lower = [part.strip().lower() for part in address.split(",")]
19
+
20
+
21
+ if "india" in address_lower or any("india" in part for part in address_lower):
22
+ if any(loc in address_lower for loc in locations):
23
+ match_list.append(profile)
24
+ else:
25
+ unmatched_list.append(profile)
26
+ else:
27
+ unmatched_list.append(profile)
28
+
29
+
30
+
31
+ return match_list , unmatched_list
32
+
33
+
34
+
35
+ def score_candidates(parsed_data, matched_list):
36
+ job_title = parsed_data.get("job_title", "").lower()
37
+ job_keywords = job_title.split() if job_title else []
38
+
39
+ required_skills = [s.lower() for s in parsed_data.get("skills", [])]
40
+
41
+ for profile in matched_list:
42
+ score = 0
43
+ breakdown = {}
44
+
45
+ # Headline check (count occurrences of each keyword)
46
+ headline = (profile.get("headline") or "").lower()
47
+ headline_score = 0
48
+ for kw in job_keywords:
49
+ count = headline.count(kw)
50
+ headline_score += count * 15 # each occurrence worth 15
51
+ score += headline_score
52
+ breakdown["headline_match"] = headline_score
53
+
54
+ # About check (count occurrences of each keyword)
55
+ about = (profile.get("about") or "").lower()
56
+ about_score = 0
57
+ for kw in job_keywords:
58
+ count = about.count(kw)
59
+ about_score += count * 10 # each occurrence worth 10
60
+ score += about_score
61
+ breakdown["about_match"] = about_score
62
+
63
+ # Skills check (exact match count)
64
+ profile_skills = [
65
+ s.get("title", "").lower()
66
+ for s in profile.get("skills", [])
67
+ if isinstance(s, dict)
68
+ ]
69
+ skill_score = 0
70
+ for req_skill in required_skills:
71
+ skill_score += profile_skills.count(req_skill) * 10 # per match worth 10
72
+ score += skill_score
73
+ breakdown["skills_match"] = skill_score
74
+
75
+ # Cap score at 100
76
+ profile["score"] = min(round(score), 100)
77
+ profile["score_breakdown"] = breakdown
78
+
79
+ # Sort list in-place by score (highest first)
80
+ matched_list.sort(key=lambda x: x.get("score", 0), reverse=True)
81
+
82
+ return matched_list
83
+