Spaces:
Sleeping
Sleeping
File size: 6,058 Bytes
f92d24c 1b4bba1 f92d24c f1391d4 f92d24c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
# === CRITICAL: Set cache directories BEFORE any other imports ===
os.environ['HF_HOME'] = '/tmp/huggingface_cache'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets_cache'
# Now import everything else
import json
import datetime
import requests
import gspread
from dotenv import load_dotenv
from huggingface_hub import login as hf_login
from langchain_community.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from langchain_tavily import TavilySearch
from google.adk.tools import FunctionTool
# === LOAD ENV ===
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
SERVICE_ACCOUNT_JSON = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
SHEET_KEY = os.getenv("SHEET_KEY")
PREDICTOR_API_URL = os.getenv("PREDICTOR_API_URL")
PREDICTOR_API_KEY = os.getenv("PREDICTOR_API_KEY")
hf_login(token=HF_TOKEN)
# === GOOGLE SHEET LOGGING ===
service_account_dict = json.loads(SERVICE_ACCOUNT_JSON) if isinstance(SERVICE_ACCOUNT_JSON, str) else SERVICE_ACCOUNT_JSON
def add_query_to_sheet(user_id: str, query: str, response: str):
gc = gspread.service_account_from_dict(service_account_dict)
sh = gc.open_by_key(SHEET_KEY)
ws = sh.worksheet("Sheet1")
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
ws.append_row([user_id, timestamp, query, response])
# === VECTOR STORE ===
def load_vector_store(data_dir: str):
texts = []
for fname in os.listdir(data_dir):
if fname.lower().endswith(".md"):
path = os.path.join(data_dir, fname)
try:
with open(path, "r", encoding="utf-8") as f:
texts.append(f.read())
except UnicodeDecodeError:
with open(path, "r", encoding="latin-1") as f:
texts.append(f.read())
st_model = SentenceTransformer("all-MiniLM-L6-v2")
class LocalEmbeddings(Embeddings):
def embed_documents(self, docs):
return st_model.encode(docs).tolist()
def embed_query(self, q):
return st_model.encode([q])[0].tolist()
return FAISS.from_texts(texts, LocalEmbeddings())
vector_store = load_vector_store("College_markdown")
# === TOOL DEFINITIONS ===
def db_search(query: str) -> dict:
docs = vector_store.similarity_search(query, k=6)
if not docs: return {"results": []}
return {"results": [d.page_content for d in docs]}
def tavily_search(query: str) -> dict:
tool = TavilySearch(max_results=6, topic="general", include_raw_content=True)
result = tool.invoke({"query": query})
snippets = [item.get('content') for item in result.get('results', [])]
return {"results": snippets or []}
def college_predictor(
userCrl: int,
userCategory: str,
userGender: str,
userHomeState: str,
limit: int = 6,
counsellingName: str = "josaa",
collegeName: str = "national institute of technology",
branchName: str = "computer science and engineering"
) -> str:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {PREDICTOR_API_KEY}"
}
params = {
"userCrl": userCrl,
"userCategory": userCategory,
"userGender": userGender,
"userHomeState": userHomeState,
"limit": limit,
"counsellingName": counsellingName,
}
if collegeName:
params["collegeQuery"] = collegeName
if branchName:
params["branchQuery"] = branchName
try:
response = requests.post(PREDICTOR_API_URL, json=params, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
if not data or 'data' not in data or 'colleges' not in data['data']:
return "No college predictions found with the given criteria."
colleges = data['data']['colleges']
if not colleges:
return "No college predictions found with the given criteria."
results = []
for i, college in enumerate(colleges[:limit], start=1):
parts = [f"{i}. College: {college.get('Institute', 'N/A')}"]
if college.get('Academic_Program_Name'):
parts.append(f"Branch: {college['Academic_Program_Name']}")
if college.get('Seat_Type'):
parts.append(f"Category: {college['Seat_Type']}")
if college.get('Max_ClosingRank'):
parts.append(f"Closing Rank: {college['Max_ClosingRank']}")
results.append(", ".join(parts))
return f"Based on your rank {userCrl}, here are college predictions:\n\n" + "\n".join(results)
except requests.exceptions.RequestException as e:
return f"Error fetching college predictions: {str(e)}"
def mentor_search(college_query: str) -> str:
"""Search mentors by college name and return formatted links."""
url = f"https://test.api.precollege.in/api/v1/mentor/search?q={college_query}"
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
if not data or "data" not in data or not data["data"]:
return f"No mentors found for '{college_query}'."
mentors = data["data"]
lines = []
for mentor in mentors:
name = mentor.get("name", "Unknown")
username = mentor.get("username", "")
profile_url = f"https://www.precollege.in/mentor/{username}" if username else "No profile link"
lines.append(f"{name}: {profile_url}")
return f"Mentors for '{college_query}':\n\n" + "\n".join(lines)
except requests.exceptions.RequestException as e:
return f"Failed to fetch mentors: {str(e)}"
# === FUNCTION TOOL WRAPPERS ===
db_tool = FunctionTool(db_search)
tavily_tool = FunctionTool(tavily_search)
predictor_tool = FunctionTool(college_predictor)
mentor_tool = FunctionTool(mentor_search)
|