Asura05's picture
Update app.py
d11c1bc verified
import os
import warnings
# === CRITICAL: Set cache directories BEFORE any other imports ===
os.environ['HF_HOME'] = '/tmp/huggingface_cache'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets_cache'
warnings.filterwarnings("ignore")
import json
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from dotenv import load_dotenv
from google.adk.agents import Agent
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.genai import types
import re
import asyncio
from tools import (
db_tool,
tavily_tool,
predictor_tool,
mentor_tool,
add_query_to_sheet
)
# === LOAD ENV ===
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# === PREPROCESSING DICTIONARY ===
INSTITUTE_MAPPING = {
"iit": ["indian institute of technology"],
"IIT": ["indian institute of technology"],
"i.i.t": ["indian institute of technology"],
"I.I.T": ["indian institute of technology"],
"i i t": ["indian institute of technology"],
"I I T": ["indian institute of technology"],
"indian institute of technology": ["indian institute of technology"],
"Indian Institute of Technology": ["indian institute of technology"],
"nit": ["national institute of technology"],
"NIT": ["national institute of technology"],
"n.i.t": ["national institute of technology"],
"N.I.T": ["national institute of technology"],
"n i t": ["national institute of technology"],
"N I T": ["national institute of technology"],
"national institute of technology": ["national institute of technology"],
"National Institute of Technology": ["national institute of technology"],
"iiit": ["indian institute of information technology"],
"IIIT": ["indian institute of information technology"],
"i.i.i.t": ["indian institute of information technology"],
"I.I.I.T": ["indian institute of information technology"],
"i i i t": ["indian institute of information technology"],
"I I I T": ["indian institute of information technology"],
"indian institute of information technology": ["indian institute of information technology"],
"Indian Institute of Information Technology": ["indian institute of information technology"],
"bit": ["birla institute of technology"],
"BIT": ["birla institute of technology"],
"b.i.t": ["birla institute of technology"],
"B.I.T": ["birla institute of technology"],
"b i t": ["birla institute of technology"],
"B I T": ["birla institute of technology"],
"birla institute of technology": ["birla institute of technology"],
"Birla Institute of Technology": ["birla institute of technology"],
"bits": ["birla institute of technology and science"],
"BITS": ["birla institute of technology and science"],
"b.i.t.s": ["birla institute of technology and science"],
"B.I.T.S": ["birla institute of technology and science"],
"b i t s": ["birla institute of technology and science"],
"B I T S": ["birla institute of technology and science"],
"birla institute of technology and science": ["birla institute of technology and science"],
"Birla Institute of Technology and Science": ["birla institute of technology and science"],
"dtu": ["delhi technological university"],
"DTU": ["delhi technological university"],
"nsut": ["netaji subhas university of technology"],
"NSUT": ["netaji subhas university of technology"],
"iiitd": ["indraprastha institute of information technology delhi"],
"IIITD": ["indraprastha institute of information technology delhi"],
"iiith": ["international institute of information technology hyderabad"],
"IIITH": ["international institute of information technology hyderabad"],
"iiitb": ["international institute of information technology bangalore"],
"IIITB": ["international institute of information technology bangalore"],
"vit": ["vellore institute of technology"],
"VIT": ["vellore institute of technology"],
"srm": ["srm institute of science and technology"],
"SRM": ["srm institute of science and technology"],
"mit": ["manipal institute of technology"],
"MIT": ["manipal institute of technology"],
"pes": ["pes university"],
"PES": ["pes university"],
"bms": ["bms college of engineering"],
"BMS": ["bms college of engineering"],
"rv": ["rv college of engineering"],
"RV": ["rv college of engineering"],
"ramaiah": ["ms ramaiah institute of technology"],
"RAMAIAH": ["ms ramaiah institute of technology"],
"thapar": ["thapar institute of engineering and technology"],
"THAPAR": ["thapar institute of engineering and technology"],
"ism": ["indian school of mines"],
"ISM": ["indian school of mines"],
"spa": ["school of planning & architecture"],
"SPA": ["school of planning & architecture"],
"vnit": ["visvesvaraya national institute of technology"],
"VNIT": ["visvesvaraya national institute of technology"],
"manit": ["maulana azad national institute of technology"],
"MANIT": ["maulana azad national institute of technology"],
"svnit": ["sardar vallabhbhai national institute of technology"],
"SVNIT": ["sardar vallabhbhai national institute of technology"],
"mnit": ["malaviya national institute of technology"],
"MNIT": ["malaviya national institute of technology"],
"mnnit": ["motilal nehru national institute of technology"],
"MNNIT": ["motilal nehru national institute of technology"],
"sliet": ["sant longowal institute of engineering and technology"],
"SLIET": ["sant longowal institute of engineering and technology"],
"iiest": ["indian institute of engineering science and technology"],
"IIEST": ["indian institute of engineering science and technology"],
"cusat": ["cochin university of science and technology"],
"CUSAT": ["cochin university of science and technology"],
"niftem": ["national institute of food technology entrepreneurship and management"],
"NIFTEM": ["national institute of food technology entrepreneurship and management"],
"iiht": ["indian institute of handloom technology"],
"IIHT": ["indian institute of handloom technology"],
"ict": ["institute of chemical technology"],
"ICT": ["institute of chemical technology"],
"iitram": ["institute of infrastructure, technology, research and management"],
"IITRAM": ["institute of infrastructure, technology, research and management"],
"csvtu": ["chhattisgarh swami vivekanada technical university"],
"CSVTU": ["chhattisgarh swami vivekanada technical university"],
"gkc": ["ghani khan choudhary institute of engineering and technology"],
"GKC": ["ghani khan choudhary institute of engineering and technology"],
"nerist": ["north eastern regional institute of science and technology"],
"NERIST": ["north eastern regional institute of science and technology"],
"cit": ["central institute of technology"],
"CIT": ["central institute of technology"],
"tezu": ["tezpur university"],
"TEZU": ["tezpur university"],
"nehu": ["north-eastern hill university"],
"NEHU": ["north-eastern hill university"],
"mizoram": ["mizoram university"],
"MIZORAM": ["mizoram university"],
"assam": ["assam university"],
"ASSAM": ["assam university"],
"smvdu": ["shri mata vaishno devi university"],
"SMVDU": ["shri mata vaishno devi university"],
"pu": ["puducherry university"],
"PU": ["puducherry university"],
"ptu": ["puducherry technological university"],
"PTU": ["puducherry technological university"],
"jnu": ["jawaharlal nehru university"],
"JNU": ["jawaharlal nehru university"],
"uoh": ["university of hyderabad"],
"UOH": ["university of hyderabad"],
"ggu": ["guru ghasidas vishwavidyalaya"],
"GGU": ["guru ghasidas vishwavidyalaya"],
"cu": ["central university"],
"CU": ["central university"],
"gsv": ["gati shakti vishwavidyalaya"],
"GSV": ["gati shakti vishwavidyalaya"],
"gkv": ["gurukula kangri vishwavidyalaya"],
"GKV": ["gurukula kangri vishwavidyalaya"]
}
BRANCH_MAPPING = {
"cse": "computer science and engineering",
"CSE": "computer science and engineering",
"cs": "computer science and engineering",
"CS": "computer science and engineering",
"ece": "electronics and communication engineering",
"ECE": "electronics and communication engineering",
"ee": "electrical engineering",
"EE": "electrical engineering",
"MECH": "mechanical engineering",
"mech": "mechanical engineering",
"ce": "civil engineering",
"CE": "civil engineering",
"che": "chemical engineering",
"CHE": "chemical engineering",
"aero": "aerospace engineering",
"AERO": "aerospace engineering",
"bio": "biotechnology",
"BIO": "biotechnology"
}
# === PREPROCESSING FUNCTION ===
def preprocess_query(query: str) -> str:
sorted_institutes = sorted(INSTITUTE_MAPPING.keys(), key=len, reverse=True)
for key in sorted_institutes:
pattern = rf'\b{re.escape(key)}\b'
query = re.sub(pattern, INSTITUTE_MAPPING[key][0], query, flags=re.IGNORECASE)
for key, full_name in BRANCH_MAPPING.items():
pattern = rf'\b{re.escape(key)}\b'
query = re.sub(pattern, full_name, query, flags=re.IGNORECASE)
return query
# === AGENT SETUP ===
async def create_agent_runner(user_id: str, session_id: str):
instruction_text="""
You are a highly experienced college counselor specializing in helping high school students choose the right engineering colleges. You have access to several tools to help answer student queries.
AVAILABLE TOOLS:
1. db_tool - Search local college database
2. tavily_tool - Search internet for college information
3. predictor_tool - Predict colleges based on rank and preferences
4. mentor_tool - Find mentors from specific colleges
WORKFLOW:
Step 1: Classify the user's query into one of these categories:
a. "Know college based on my rank"
b. "Know about a college"
c. "Talk to a college student/mentor"
d. "General query"
e. "JOSAA or CSAB queries"
f. "Unrelated query"
Step 2: Based on category, follow these steps:
For Category A (College predictions):
- Extract: userCrl (Common Rank List or category rank), userCategory, userGender, userHomeState
- Optional: collegeName, branchName preferences
- ALWAYS call the predictor_tool if userCrl is provided.
- After Extracting the params from the query pass the params inside predictor_tool.
- Display the output in bullet points format showing predicted colleges, branches, cutoffs, etc.
- Add this at end of the response 'For detailed and better filters please visit https://www.precollege.in/college-predictor'
For Category B (College information):
- Extract college name
- Call db_tool
- If db_tool fails to return meaningful output(Answer according to query), fallback to tavily_tool to fetch data from the internet.
- Present a brief, well-structured summary in natural language.
For Category C (Find mentors):
- Extract college name
- Call mentor_tool
- Provide the top 6 mentor details using mentor_tool(in name:link to mentor profile format) and include this at end of the respose 'For more mentor details: visit: https://precollege.in/mentors
For Category D (General queries):
- Answer using db_tool
- If db_tool doesn't provide enough, fallback to tavily_tool
- Summarize the answer clearly and informatively.
For Category E (JOSAA or CSAB queries):
- Answer using tavily_tool using JOSAA and CSAB offical websites.(https://josaa.nic.in/, https://csab.nic.in/)
- Answer clearly and informatively in points.
For Category F (Unrelated query):
- If the query is unrelated to JoSAA counseling, colleges, branches, or mentoring:
- Respond: "Sorry, this is beyond my capabilities."
IMPORTANT RULES:
- ALWAYS use the tools specified above based on the workflow.
- Never invent data. If something cannot be found, say so clearly.
- Use clean formatting and polite tone.
- Always give output in bullet points
- Do not skip steps or tools. Follow the workflow strictly.
"""
agent = Agent(
name="college_info_agent",
model="gemini-1.5-flash",
instruction=instruction_text,
tools=[db_tool, tavily_tool, predictor_tool, mentor_tool],
generate_content_config=types.GenerateContentConfig(
max_output_tokens=1500,
temperature=0.1,
),
)
session_svc = InMemorySessionService()
# Handle potential async session creation
try:
session = session_svc.create_session(
app_name="college_agent_app",
user_id=user_id,
session_id=session_id
)
# If it's a coroutine, await it
if asyncio.iscoroutine(session):
session = await session
except Exception as e:
print(f"Session creation error: {e}")
# Fallback: create a simple session object
class SimpleSession:
def __init__(self, session_id):
self.id = session_id
session = SimpleSession(session_id)
runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc)
return runner, session
# === FASTAPI SETUP ===
app = FastAPI(
title="College Counselor Agent",
description="AI Agent for college counseling and selection",
version="1.0.0"
)
class ChatRequest(BaseModel):
user_id: str
session_id: str
question: str
class ChatResponse(BaseModel):
session_id: str
answer: str
@app.get("/")
async def root():
return {
"message": "College Counselor Agent is running!",
"docs": "/docs",
"health": "/healthz"
}
@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(req: ChatRequest):
try:
runner, session = await create_agent_runner(req.user_id, req.session_id)
processed_query = preprocess_query(req.question)
user_msg = types.Content(role="user", parts=[types.Part(text=processed_query)])
reply_text = ""
# Handle potential async runner execution
try:
events = runner.run(user_id=req.user_id, session_id=session.id, new_message=user_msg)
# If events is a coroutine, await it
if asyncio.iscoroutine(events):
events = await events
except Exception as runner_error:
print(f"Runner error: {runner_error}")
return ChatResponse(
session_id=session.id,
answer="Sorry, I encountered an error processing your request. Please try again."
)
# Process events
try:
for ev in events:
if hasattr(ev, 'is_final_response') and ev.is_final_response():
try:
for part in ev.content.parts:
if hasattr(part, 'text') and part.text:
reply_text = part.text
break
except Exception:
try:
reply_text = getattr(ev, 'text', '')
except Exception:
try:
reply_text = str(getattr(ev, 'message', ''))
except Exception:
reply_text = "Sorry, I couldn't process the response properly."
break
except Exception as event_error:
print(f"Event processing error: {event_error}")
reply_text = "Sorry, I encountered an error while processing the response."
# Fallback if no reply_text
if not reply_text:
reply_text = "I'm sorry, I couldn't generate a proper response. Please try rephrasing your question."
# Log to sheet (handle potential errors)
try:
add_query_to_sheet(req.user_id, processed_query, reply_text)
except Exception as sheet_error:
print(f"Sheet logging error: {sheet_error}")
# Don't fail the request if logging fails
return ChatResponse(session_id=session.id, answer=reply_text)
except Exception as e:
print(f"Chat endpoint error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@app.get("/healthz")
def health_check():
return {"status": "ok", "service": "college-counselor-agent"}
# Add a simple test endpoint
@app.get("/test")
async def test_endpoint():
return {
"message": "Test endpoint working",
"env_vars": {
"HF_HOME": os.environ.get('HF_HOME'),
"TRANSFORMERS_CACHE": os.environ.get('TRANSFORMERS_CACHE')
}
}