import os import warnings # === CRITICAL: Set cache directories BEFORE any other imports === os.environ['HF_HOME'] = '/tmp/huggingface_cache' os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache' os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets_cache' warnings.filterwarnings("ignore") import json from fastapi import FastAPI, HTTPException from pydantic import BaseModel from dotenv import load_dotenv from google.adk.agents import Agent from google.adk.sessions import InMemorySessionService from google.adk.runners import Runner from google.genai import types import re import asyncio from tools import ( db_tool, tavily_tool, predictor_tool, mentor_tool, add_query_to_sheet ) # === LOAD ENV === load_dotenv() HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # === PREPROCESSING DICTIONARY === INSTITUTE_MAPPING = { "iit": ["indian institute of technology"], "IIT": ["indian institute of technology"], "i.i.t": ["indian institute of technology"], "I.I.T": ["indian institute of technology"], "i i t": ["indian institute of technology"], "I I T": ["indian institute of technology"], "indian institute of technology": ["indian institute of technology"], "Indian Institute of Technology": ["indian institute of technology"], "nit": ["national institute of technology"], "NIT": ["national institute of technology"], "n.i.t": ["national institute of technology"], "N.I.T": ["national institute of technology"], "n i t": ["national institute of technology"], "N I T": ["national institute of technology"], "national institute of technology": ["national institute of technology"], "National Institute of Technology": ["national institute of technology"], "iiit": ["indian institute of information technology"], "IIIT": ["indian institute of information technology"], "i.i.i.t": ["indian institute of information technology"], "I.I.I.T": ["indian institute of information technology"], "i i i t": ["indian institute of information technology"], "I I I T": ["indian institute of information technology"], "indian institute of information technology": ["indian institute of information technology"], "Indian Institute of Information Technology": ["indian institute of information technology"], "bit": ["birla institute of technology"], "BIT": ["birla institute of technology"], "b.i.t": ["birla institute of technology"], "B.I.T": ["birla institute of technology"], "b i t": ["birla institute of technology"], "B I T": ["birla institute of technology"], "birla institute of technology": ["birla institute of technology"], "Birla Institute of Technology": ["birla institute of technology"], "bits": ["birla institute of technology and science"], "BITS": ["birla institute of technology and science"], "b.i.t.s": ["birla institute of technology and science"], "B.I.T.S": ["birla institute of technology and science"], "b i t s": ["birla institute of technology and science"], "B I T S": ["birla institute of technology and science"], "birla institute of technology and science": ["birla institute of technology and science"], "Birla Institute of Technology and Science": ["birla institute of technology and science"], "dtu": ["delhi technological university"], "DTU": ["delhi technological university"], "nsut": ["netaji subhas university of technology"], "NSUT": ["netaji subhas university of technology"], "iiitd": ["indraprastha institute of information technology delhi"], "IIITD": ["indraprastha institute of information technology delhi"], "iiith": ["international institute of information technology hyderabad"], "IIITH": ["international institute of information technology hyderabad"], "iiitb": ["international institute of information technology bangalore"], "IIITB": ["international institute of information technology bangalore"], "vit": ["vellore institute of technology"], "VIT": ["vellore institute of technology"], "srm": ["srm institute of science and technology"], "SRM": ["srm institute of science and technology"], "mit": ["manipal institute of technology"], "MIT": ["manipal institute of technology"], "pes": ["pes university"], "PES": ["pes university"], "bms": ["bms college of engineering"], "BMS": ["bms college of engineering"], "rv": ["rv college of engineering"], "RV": ["rv college of engineering"], "ramaiah": ["ms ramaiah institute of technology"], "RAMAIAH": ["ms ramaiah institute of technology"], "thapar": ["thapar institute of engineering and technology"], "THAPAR": ["thapar institute of engineering and technology"], "ism": ["indian school of mines"], "ISM": ["indian school of mines"], "spa": ["school of planning & architecture"], "SPA": ["school of planning & architecture"], "vnit": ["visvesvaraya national institute of technology"], "VNIT": ["visvesvaraya national institute of technology"], "manit": ["maulana azad national institute of technology"], "MANIT": ["maulana azad national institute of technology"], "svnit": ["sardar vallabhbhai national institute of technology"], "SVNIT": ["sardar vallabhbhai national institute of technology"], "mnit": ["malaviya national institute of technology"], "MNIT": ["malaviya national institute of technology"], "mnnit": ["motilal nehru national institute of technology"], "MNNIT": ["motilal nehru national institute of technology"], "sliet": ["sant longowal institute of engineering and technology"], "SLIET": ["sant longowal institute of engineering and technology"], "iiest": ["indian institute of engineering science and technology"], "IIEST": ["indian institute of engineering science and technology"], "cusat": ["cochin university of science and technology"], "CUSAT": ["cochin university of science and technology"], "niftem": ["national institute of food technology entrepreneurship and management"], "NIFTEM": ["national institute of food technology entrepreneurship and management"], "iiht": ["indian institute of handloom technology"], "IIHT": ["indian institute of handloom technology"], "ict": ["institute of chemical technology"], "ICT": ["institute of chemical technology"], "iitram": ["institute of infrastructure, technology, research and management"], "IITRAM": ["institute of infrastructure, technology, research and management"], "csvtu": ["chhattisgarh swami vivekanada technical university"], "CSVTU": ["chhattisgarh swami vivekanada technical university"], "gkc": ["ghani khan choudhary institute of engineering and technology"], "GKC": ["ghani khan choudhary institute of engineering and technology"], "nerist": ["north eastern regional institute of science and technology"], "NERIST": ["north eastern regional institute of science and technology"], "cit": ["central institute of technology"], "CIT": ["central institute of technology"], "tezu": ["tezpur university"], "TEZU": ["tezpur university"], "nehu": ["north-eastern hill university"], "NEHU": ["north-eastern hill university"], "mizoram": ["mizoram university"], "MIZORAM": ["mizoram university"], "assam": ["assam university"], "ASSAM": ["assam university"], "smvdu": ["shri mata vaishno devi university"], "SMVDU": ["shri mata vaishno devi university"], "pu": ["puducherry university"], "PU": ["puducherry university"], "ptu": ["puducherry technological university"], "PTU": ["puducherry technological university"], "jnu": ["jawaharlal nehru university"], "JNU": ["jawaharlal nehru university"], "uoh": ["university of hyderabad"], "UOH": ["university of hyderabad"], "ggu": ["guru ghasidas vishwavidyalaya"], "GGU": ["guru ghasidas vishwavidyalaya"], "cu": ["central university"], "CU": ["central university"], "gsv": ["gati shakti vishwavidyalaya"], "GSV": ["gati shakti vishwavidyalaya"], "gkv": ["gurukula kangri vishwavidyalaya"], "GKV": ["gurukula kangri vishwavidyalaya"] } BRANCH_MAPPING = { "cse": "computer science and engineering", "CSE": "computer science and engineering", "cs": "computer science and engineering", "CS": "computer science and engineering", "ece": "electronics and communication engineering", "ECE": "electronics and communication engineering", "ee": "electrical engineering", "EE": "electrical engineering", "MECH": "mechanical engineering", "mech": "mechanical engineering", "ce": "civil engineering", "CE": "civil engineering", "che": "chemical engineering", "CHE": "chemical engineering", "aero": "aerospace engineering", "AERO": "aerospace engineering", "bio": "biotechnology", "BIO": "biotechnology" } # === PREPROCESSING FUNCTION === def preprocess_query(query: str) -> str: sorted_institutes = sorted(INSTITUTE_MAPPING.keys(), key=len, reverse=True) for key in sorted_institutes: pattern = rf'\b{re.escape(key)}\b' query = re.sub(pattern, INSTITUTE_MAPPING[key][0], query, flags=re.IGNORECASE) for key, full_name in BRANCH_MAPPING.items(): pattern = rf'\b{re.escape(key)}\b' query = re.sub(pattern, full_name, query, flags=re.IGNORECASE) return query # === AGENT SETUP === async def create_agent_runner(user_id: str, session_id: str): instruction_text=""" You are a highly experienced college counselor specializing in helping high school students choose the right engineering colleges. You have access to several tools to help answer student queries. AVAILABLE TOOLS: 1. db_tool - Search local college database 2. tavily_tool - Search internet for college information 3. predictor_tool - Predict colleges based on rank and preferences 4. mentor_tool - Find mentors from specific colleges WORKFLOW: Step 1: Classify the user's query into one of these categories: a. "Know college based on my rank" b. "Know about a college" c. "Talk to a college student/mentor" d. "General query" e. "JOSAA or CSAB queries" f. "Unrelated query" Step 2: Based on category, follow these steps: For Category A (College predictions): - Extract: userCrl (Common Rank List or category rank), userCategory, userGender, userHomeState - Optional: collegeName, branchName preferences - ALWAYS call the predictor_tool if userCrl is provided. - After Extracting the params from the query pass the params inside predictor_tool. - Display the output in bullet points format showing predicted colleges, branches, cutoffs, etc. - Add this at end of the response 'For detailed and better filters please visit https://www.precollege.in/college-predictor' For Category B (College information): - Extract college name - Call db_tool - If db_tool fails to return meaningful output(Answer according to query), fallback to tavily_tool to fetch data from the internet. - Present a brief, well-structured summary in natural language. For Category C (Find mentors): - Extract college name - Call mentor_tool - Provide the top 6 mentor details using mentor_tool(in name:link to mentor profile format) and include this at end of the respose 'For more mentor details: visit: https://precollege.in/mentors For Category D (General queries): - Answer using db_tool - If db_tool doesn't provide enough, fallback to tavily_tool - Summarize the answer clearly and informatively. For Category E (JOSAA or CSAB queries): - Answer using tavily_tool using JOSAA and CSAB offical websites.(https://josaa.nic.in/, https://csab.nic.in/) - Answer clearly and informatively in points. For Category F (Unrelated query): - If the query is unrelated to JoSAA counseling, colleges, branches, or mentoring: - Respond: "Sorry, this is beyond my capabilities." IMPORTANT RULES: - ALWAYS use the tools specified above based on the workflow. - Never invent data. If something cannot be found, say so clearly. - Use clean formatting and polite tone. - Always give output in bullet points - Do not skip steps or tools. Follow the workflow strictly. """ agent = Agent( name="college_info_agent", model="gemini-1.5-flash", instruction=instruction_text, tools=[db_tool, tavily_tool, predictor_tool, mentor_tool], generate_content_config=types.GenerateContentConfig( max_output_tokens=1500, temperature=0.1, ), ) session_svc = InMemorySessionService() # Handle potential async session creation try: session = session_svc.create_session( app_name="college_agent_app", user_id=user_id, session_id=session_id ) # If it's a coroutine, await it if asyncio.iscoroutine(session): session = await session except Exception as e: print(f"Session creation error: {e}") # Fallback: create a simple session object class SimpleSession: def __init__(self, session_id): self.id = session_id session = SimpleSession(session_id) runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc) return runner, session # === FASTAPI SETUP === app = FastAPI( title="College Counselor Agent", description="AI Agent for college counseling and selection", version="1.0.0" ) class ChatRequest(BaseModel): user_id: str session_id: str question: str class ChatResponse(BaseModel): session_id: str answer: str @app.get("/") async def root(): return { "message": "College Counselor Agent is running!", "docs": "/docs", "health": "/healthz" } @app.post("/chat", response_model=ChatResponse) async def chat_endpoint(req: ChatRequest): try: runner, session = await create_agent_runner(req.user_id, req.session_id) processed_query = preprocess_query(req.question) user_msg = types.Content(role="user", parts=[types.Part(text=processed_query)]) reply_text = "" # Handle potential async runner execution try: events = runner.run(user_id=req.user_id, session_id=session.id, new_message=user_msg) # If events is a coroutine, await it if asyncio.iscoroutine(events): events = await events except Exception as runner_error: print(f"Runner error: {runner_error}") return ChatResponse( session_id=session.id, answer="Sorry, I encountered an error processing your request. Please try again." ) # Process events try: for ev in events: if hasattr(ev, 'is_final_response') and ev.is_final_response(): try: for part in ev.content.parts: if hasattr(part, 'text') and part.text: reply_text = part.text break except Exception: try: reply_text = getattr(ev, 'text', '') except Exception: try: reply_text = str(getattr(ev, 'message', '')) except Exception: reply_text = "Sorry, I couldn't process the response properly." break except Exception as event_error: print(f"Event processing error: {event_error}") reply_text = "Sorry, I encountered an error while processing the response." # Fallback if no reply_text if not reply_text: reply_text = "I'm sorry, I couldn't generate a proper response. Please try rephrasing your question." # Log to sheet (handle potential errors) try: add_query_to_sheet(req.user_id, processed_query, reply_text) except Exception as sheet_error: print(f"Sheet logging error: {sheet_error}") # Don't fail the request if logging fails return ChatResponse(session_id=session.id, answer=reply_text) except Exception as e: print(f"Chat endpoint error: {str(e)}") raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") @app.get("/healthz") def health_check(): return {"status": "ok", "service": "college-counselor-agent"} # Add a simple test endpoint @app.get("/test") async def test_endpoint(): return { "message": "Test endpoint working", "env_vars": { "HF_HOME": os.environ.get('HF_HOME'), "TRANSFORMERS_CACHE": os.environ.get('TRANSFORMERS_CACHE') } }