Spaces:

Asura05
/

Precollege-Fastapi

Sleeping

File size: 6,058 Bytes

import os

# === CRITICAL: Set cache directories BEFORE any other imports ===
os.environ['HF_HOME'] = '/tmp/huggingface_cache'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets_cache'

# Now import everything else
import json
import datetime
import requests
import gspread
from dotenv import load_dotenv
from huggingface_hub import login as hf_login
from langchain_community.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from langchain_tavily import TavilySearch
from google.adk.tools import FunctionTool

# === LOAD ENV ===
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
SERVICE_ACCOUNT_JSON = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
SHEET_KEY = os.getenv("SHEET_KEY")
PREDICTOR_API_URL = os.getenv("PREDICTOR_API_URL")
PREDICTOR_API_KEY = os.getenv("PREDICTOR_API_KEY")

hf_login(token=HF_TOKEN)

# === GOOGLE SHEET LOGGING ===
service_account_dict = json.loads(SERVICE_ACCOUNT_JSON) if isinstance(SERVICE_ACCOUNT_JSON, str) else SERVICE_ACCOUNT_JSON

def add_query_to_sheet(user_id: str, query: str, response: str):
    gc = gspread.service_account_from_dict(service_account_dict)
    sh = gc.open_by_key(SHEET_KEY)
    ws = sh.worksheet("Sheet1")
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    ws.append_row([user_id, timestamp, query, response])

# === VECTOR STORE ===
def load_vector_store(data_dir: str):
    texts = []
    for fname in os.listdir(data_dir):
        if fname.lower().endswith(".md"):
            path = os.path.join(data_dir, fname)
            try:
                with open(path, "r", encoding="utf-8") as f:
                    texts.append(f.read())
            except UnicodeDecodeError:
                with open(path, "r", encoding="latin-1") as f:
                    texts.append(f.read())
    st_model = SentenceTransformer("all-MiniLM-L6-v2")
    class LocalEmbeddings(Embeddings):
        def embed_documents(self, docs):
            return st_model.encode(docs).tolist()
        def embed_query(self, q):
            return st_model.encode([q])[0].tolist()
    return FAISS.from_texts(texts, LocalEmbeddings())

vector_store = load_vector_store("College_markdown")

# === TOOL DEFINITIONS ===
def db_search(query: str) -> dict:
    docs = vector_store.similarity_search(query, k=6)
    if not docs: return {"results": []}
    return {"results": [d.page_content for d in docs]}

def tavily_search(query: str) -> dict:
    tool = TavilySearch(max_results=6, topic="general", include_raw_content=True)
    result = tool.invoke({"query": query})
    snippets = [item.get('content') for item in result.get('results', [])]
    return {"results": snippets or []}

def college_predictor(
    userCrl: int,
    userCategory: str,
    userGender: str,
    userHomeState: str,
    limit: int = 6,
    counsellingName: str = "josaa",
    collegeName: str = "national institute of technology",
    branchName: str = "computer science and engineering"
) -> str:
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {PREDICTOR_API_KEY}"
    }
    params = {
        "userCrl": userCrl,
        "userCategory": userCategory,
        "userGender": userGender,
        "userHomeState": userHomeState,
        "limit": limit,
        "counsellingName": counsellingName,
    }
    if collegeName:
        params["collegeQuery"] = collegeName
    if branchName:
        params["branchQuery"] = branchName

    try:
        response = requests.post(PREDICTOR_API_URL, json=params, headers=headers, timeout=30)
        response.raise_for_status()
        data = response.json()

        if not data or 'data' not in data or 'colleges' not in data['data']:
            return "No college predictions found with the given criteria."

        colleges = data['data']['colleges']
        if not colleges:
            return "No college predictions found with the given criteria."

        results = []
        for i, college in enumerate(colleges[:limit], start=1):
            parts = [f"{i}. College: {college.get('Institute', 'N/A')}"]
            if college.get('Academic_Program_Name'):
                parts.append(f"Branch: {college['Academic_Program_Name']}")
            if college.get('Seat_Type'):
                parts.append(f"Category: {college['Seat_Type']}")
            if college.get('Max_ClosingRank'):
                parts.append(f"Closing Rank: {college['Max_ClosingRank']}")
            results.append(", ".join(parts))

        return f"Based on your rank {userCrl}, here are college predictions:\n\n" + "\n".join(results)

    except requests.exceptions.RequestException as e:
        return f"Error fetching college predictions: {str(e)}"

def mentor_search(college_query: str) -> str:
    """Search mentors by college name and return formatted links."""
    url = f"https://test.api.precollege.in/api/v1/mentor/search?q={college_query}"
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()

        if not data or "data" not in data or not data["data"]:
            return f"No mentors found for '{college_query}'."

        mentors = data["data"]
        lines = []
        for mentor in mentors:
            name = mentor.get("name", "Unknown")
            username = mentor.get("username", "")
            profile_url = f"https://www.precollege.in/mentor/{username}" if username else "No profile link"
            lines.append(f"{name}: {profile_url}")

        return f"Mentors for '{college_query}':\n\n" + "\n".join(lines)
    except requests.exceptions.RequestException as e:
        return f"Failed to fetch mentors: {str(e)}"


# === FUNCTION TOOL WRAPPERS ===
db_tool = FunctionTool(db_search)
tavily_tool = FunctionTool(tavily_search)
predictor_tool = FunctionTool(college_predictor)
mentor_tool = FunctionTool(mentor_search)