Spaces:
Sleeping
Sleeping
File size: 16,926 Bytes
1b4bba1 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d11c1bc f92d24c d11c1bc f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 f92d24c d6b66d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
import os
import warnings
# === CRITICAL: Set cache directories BEFORE any other imports ===
os.environ['HF_HOME'] = '/tmp/huggingface_cache'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets_cache'
warnings.filterwarnings("ignore")
import json
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from dotenv import load_dotenv
from google.adk.agents import Agent
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.genai import types
import re
import asyncio
from tools import (
db_tool,
tavily_tool,
predictor_tool,
mentor_tool,
add_query_to_sheet
)
# === LOAD ENV ===
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# === PREPROCESSING DICTIONARY ===
INSTITUTE_MAPPING = {
"iit": ["indian institute of technology"],
"IIT": ["indian institute of technology"],
"i.i.t": ["indian institute of technology"],
"I.I.T": ["indian institute of technology"],
"i i t": ["indian institute of technology"],
"I I T": ["indian institute of technology"],
"indian institute of technology": ["indian institute of technology"],
"Indian Institute of Technology": ["indian institute of technology"],
"nit": ["national institute of technology"],
"NIT": ["national institute of technology"],
"n.i.t": ["national institute of technology"],
"N.I.T": ["national institute of technology"],
"n i t": ["national institute of technology"],
"N I T": ["national institute of technology"],
"national institute of technology": ["national institute of technology"],
"National Institute of Technology": ["national institute of technology"],
"iiit": ["indian institute of information technology"],
"IIIT": ["indian institute of information technology"],
"i.i.i.t": ["indian institute of information technology"],
"I.I.I.T": ["indian institute of information technology"],
"i i i t": ["indian institute of information technology"],
"I I I T": ["indian institute of information technology"],
"indian institute of information technology": ["indian institute of information technology"],
"Indian Institute of Information Technology": ["indian institute of information technology"],
"bit": ["birla institute of technology"],
"BIT": ["birla institute of technology"],
"b.i.t": ["birla institute of technology"],
"B.I.T": ["birla institute of technology"],
"b i t": ["birla institute of technology"],
"B I T": ["birla institute of technology"],
"birla institute of technology": ["birla institute of technology"],
"Birla Institute of Technology": ["birla institute of technology"],
"bits": ["birla institute of technology and science"],
"BITS": ["birla institute of technology and science"],
"b.i.t.s": ["birla institute of technology and science"],
"B.I.T.S": ["birla institute of technology and science"],
"b i t s": ["birla institute of technology and science"],
"B I T S": ["birla institute of technology and science"],
"birla institute of technology and science": ["birla institute of technology and science"],
"Birla Institute of Technology and Science": ["birla institute of technology and science"],
"dtu": ["delhi technological university"],
"DTU": ["delhi technological university"],
"nsut": ["netaji subhas university of technology"],
"NSUT": ["netaji subhas university of technology"],
"iiitd": ["indraprastha institute of information technology delhi"],
"IIITD": ["indraprastha institute of information technology delhi"],
"iiith": ["international institute of information technology hyderabad"],
"IIITH": ["international institute of information technology hyderabad"],
"iiitb": ["international institute of information technology bangalore"],
"IIITB": ["international institute of information technology bangalore"],
"vit": ["vellore institute of technology"],
"VIT": ["vellore institute of technology"],
"srm": ["srm institute of science and technology"],
"SRM": ["srm institute of science and technology"],
"mit": ["manipal institute of technology"],
"MIT": ["manipal institute of technology"],
"pes": ["pes university"],
"PES": ["pes university"],
"bms": ["bms college of engineering"],
"BMS": ["bms college of engineering"],
"rv": ["rv college of engineering"],
"RV": ["rv college of engineering"],
"ramaiah": ["ms ramaiah institute of technology"],
"RAMAIAH": ["ms ramaiah institute of technology"],
"thapar": ["thapar institute of engineering and technology"],
"THAPAR": ["thapar institute of engineering and technology"],
"ism": ["indian school of mines"],
"ISM": ["indian school of mines"],
"spa": ["school of planning & architecture"],
"SPA": ["school of planning & architecture"],
"vnit": ["visvesvaraya national institute of technology"],
"VNIT": ["visvesvaraya national institute of technology"],
"manit": ["maulana azad national institute of technology"],
"MANIT": ["maulana azad national institute of technology"],
"svnit": ["sardar vallabhbhai national institute of technology"],
"SVNIT": ["sardar vallabhbhai national institute of technology"],
"mnit": ["malaviya national institute of technology"],
"MNIT": ["malaviya national institute of technology"],
"mnnit": ["motilal nehru national institute of technology"],
"MNNIT": ["motilal nehru national institute of technology"],
"sliet": ["sant longowal institute of engineering and technology"],
"SLIET": ["sant longowal institute of engineering and technology"],
"iiest": ["indian institute of engineering science and technology"],
"IIEST": ["indian institute of engineering science and technology"],
"cusat": ["cochin university of science and technology"],
"CUSAT": ["cochin university of science and technology"],
"niftem": ["national institute of food technology entrepreneurship and management"],
"NIFTEM": ["national institute of food technology entrepreneurship and management"],
"iiht": ["indian institute of handloom technology"],
"IIHT": ["indian institute of handloom technology"],
"ict": ["institute of chemical technology"],
"ICT": ["institute of chemical technology"],
"iitram": ["institute of infrastructure, technology, research and management"],
"IITRAM": ["institute of infrastructure, technology, research and management"],
"csvtu": ["chhattisgarh swami vivekanada technical university"],
"CSVTU": ["chhattisgarh swami vivekanada technical university"],
"gkc": ["ghani khan choudhary institute of engineering and technology"],
"GKC": ["ghani khan choudhary institute of engineering and technology"],
"nerist": ["north eastern regional institute of science and technology"],
"NERIST": ["north eastern regional institute of science and technology"],
"cit": ["central institute of technology"],
"CIT": ["central institute of technology"],
"tezu": ["tezpur university"],
"TEZU": ["tezpur university"],
"nehu": ["north-eastern hill university"],
"NEHU": ["north-eastern hill university"],
"mizoram": ["mizoram university"],
"MIZORAM": ["mizoram university"],
"assam": ["assam university"],
"ASSAM": ["assam university"],
"smvdu": ["shri mata vaishno devi university"],
"SMVDU": ["shri mata vaishno devi university"],
"pu": ["puducherry university"],
"PU": ["puducherry university"],
"ptu": ["puducherry technological university"],
"PTU": ["puducherry technological university"],
"jnu": ["jawaharlal nehru university"],
"JNU": ["jawaharlal nehru university"],
"uoh": ["university of hyderabad"],
"UOH": ["university of hyderabad"],
"ggu": ["guru ghasidas vishwavidyalaya"],
"GGU": ["guru ghasidas vishwavidyalaya"],
"cu": ["central university"],
"CU": ["central university"],
"gsv": ["gati shakti vishwavidyalaya"],
"GSV": ["gati shakti vishwavidyalaya"],
"gkv": ["gurukula kangri vishwavidyalaya"],
"GKV": ["gurukula kangri vishwavidyalaya"]
}
BRANCH_MAPPING = {
"cse": "computer science and engineering",
"CSE": "computer science and engineering",
"cs": "computer science and engineering",
"CS": "computer science and engineering",
"ece": "electronics and communication engineering",
"ECE": "electronics and communication engineering",
"ee": "electrical engineering",
"EE": "electrical engineering",
"MECH": "mechanical engineering",
"mech": "mechanical engineering",
"ce": "civil engineering",
"CE": "civil engineering",
"che": "chemical engineering",
"CHE": "chemical engineering",
"aero": "aerospace engineering",
"AERO": "aerospace engineering",
"bio": "biotechnology",
"BIO": "biotechnology"
}
# === PREPROCESSING FUNCTION ===
def preprocess_query(query: str) -> str:
sorted_institutes = sorted(INSTITUTE_MAPPING.keys(), key=len, reverse=True)
for key in sorted_institutes:
pattern = rf'\b{re.escape(key)}\b'
query = re.sub(pattern, INSTITUTE_MAPPING[key][0], query, flags=re.IGNORECASE)
for key, full_name in BRANCH_MAPPING.items():
pattern = rf'\b{re.escape(key)}\b'
query = re.sub(pattern, full_name, query, flags=re.IGNORECASE)
return query
# === AGENT SETUP ===
async def create_agent_runner(user_id: str, session_id: str):
instruction_text="""
You are a highly experienced college counselor specializing in helping high school students choose the right engineering colleges. You have access to several tools to help answer student queries.
AVAILABLE TOOLS:
1. db_tool - Search local college database
2. tavily_tool - Search internet for college information
3. predictor_tool - Predict colleges based on rank and preferences
4. mentor_tool - Find mentors from specific colleges
WORKFLOW:
Step 1: Classify the user's query into one of these categories:
a. "Know college based on my rank"
b. "Know about a college"
c. "Talk to a college student/mentor"
d. "General query"
e. "JOSAA or CSAB queries"
f. "Unrelated query"
Step 2: Based on category, follow these steps:
For Category A (College predictions):
- Extract: userCrl (Common Rank List or category rank), userCategory, userGender, userHomeState
- Optional: collegeName, branchName preferences
- ALWAYS call the predictor_tool if userCrl is provided.
- After Extracting the params from the query pass the params inside predictor_tool.
- Display the output in bullet points format showing predicted colleges, branches, cutoffs, etc.
- Add this at end of the response 'For detailed and better filters please visit https://www.precollege.in/college-predictor'
For Category B (College information):
- Extract college name
- Call db_tool
- If db_tool fails to return meaningful output(Answer according to query), fallback to tavily_tool to fetch data from the internet.
- Present a brief, well-structured summary in natural language.
For Category C (Find mentors):
- Extract college name
- Call mentor_tool
- Provide the top 6 mentor details using mentor_tool(in name:link to mentor profile format) and include this at end of the respose 'For more mentor details: visit: https://precollege.in/mentors
For Category D (General queries):
- Answer using db_tool
- If db_tool doesn't provide enough, fallback to tavily_tool
- Summarize the answer clearly and informatively.
For Category E (JOSAA or CSAB queries):
- Answer using tavily_tool using JOSAA and CSAB offical websites.(https://josaa.nic.in/, https://csab.nic.in/)
- Answer clearly and informatively in points.
For Category F (Unrelated query):
- If the query is unrelated to JoSAA counseling, colleges, branches, or mentoring:
- Respond: "Sorry, this is beyond my capabilities."
IMPORTANT RULES:
- ALWAYS use the tools specified above based on the workflow.
- Never invent data. If something cannot be found, say so clearly.
- Use clean formatting and polite tone.
- Always give output in bullet points
- Do not skip steps or tools. Follow the workflow strictly.
"""
agent = Agent(
name="college_info_agent",
model="gemini-1.5-flash",
instruction=instruction_text,
tools=[db_tool, tavily_tool, predictor_tool, mentor_tool],
generate_content_config=types.GenerateContentConfig(
max_output_tokens=1500,
temperature=0.1,
),
)
session_svc = InMemorySessionService()
# Handle potential async session creation
try:
session = session_svc.create_session(
app_name="college_agent_app",
user_id=user_id,
session_id=session_id
)
# If it's a coroutine, await it
if asyncio.iscoroutine(session):
session = await session
except Exception as e:
print(f"Session creation error: {e}")
# Fallback: create a simple session object
class SimpleSession:
def __init__(self, session_id):
self.id = session_id
session = SimpleSession(session_id)
runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc)
return runner, session
# === FASTAPI SETUP ===
app = FastAPI(
title="College Counselor Agent",
description="AI Agent for college counseling and selection",
version="1.0.0"
)
class ChatRequest(BaseModel):
user_id: str
session_id: str
question: str
class ChatResponse(BaseModel):
session_id: str
answer: str
@app.get("/")
async def root():
return {
"message": "College Counselor Agent is running!",
"docs": "/docs",
"health": "/healthz"
}
@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(req: ChatRequest):
try:
runner, session = await create_agent_runner(req.user_id, req.session_id)
processed_query = preprocess_query(req.question)
user_msg = types.Content(role="user", parts=[types.Part(text=processed_query)])
reply_text = ""
# Handle potential async runner execution
try:
events = runner.run(user_id=req.user_id, session_id=session.id, new_message=user_msg)
# If events is a coroutine, await it
if asyncio.iscoroutine(events):
events = await events
except Exception as runner_error:
print(f"Runner error: {runner_error}")
return ChatResponse(
session_id=session.id,
answer="Sorry, I encountered an error processing your request. Please try again."
)
# Process events
try:
for ev in events:
if hasattr(ev, 'is_final_response') and ev.is_final_response():
try:
for part in ev.content.parts:
if hasattr(part, 'text') and part.text:
reply_text = part.text
break
except Exception:
try:
reply_text = getattr(ev, 'text', '')
except Exception:
try:
reply_text = str(getattr(ev, 'message', ''))
except Exception:
reply_text = "Sorry, I couldn't process the response properly."
break
except Exception as event_error:
print(f"Event processing error: {event_error}")
reply_text = "Sorry, I encountered an error while processing the response."
# Fallback if no reply_text
if not reply_text:
reply_text = "I'm sorry, I couldn't generate a proper response. Please try rephrasing your question."
# Log to sheet (handle potential errors)
try:
add_query_to_sheet(req.user_id, processed_query, reply_text)
except Exception as sheet_error:
print(f"Sheet logging error: {sheet_error}")
# Don't fail the request if logging fails
return ChatResponse(session_id=session.id, answer=reply_text)
except Exception as e:
print(f"Chat endpoint error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@app.get("/healthz")
def health_check():
return {"status": "ok", "service": "college-counselor-agent"}
# Add a simple test endpoint
@app.get("/test")
async def test_endpoint():
return {
"message": "Test endpoint working",
"env_vars": {
"HF_HOME": os.environ.get('HF_HOME'),
"TRANSFORMERS_CACHE": os.environ.get('TRANSFORMERS_CACHE')
}
} |