Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -340,79 +340,34 @@ for name in enabled_tool_names:
|
|
| 340 |
|
| 341 |
|
| 342 |
# -------------------------------
|
| 343 |
-
#
|
| 344 |
-
# -------------------------------
|
| 345 |
-
from fastapi import FastAPI, Request
|
| 346 |
-
from langchain_core.documents import Document
|
| 347 |
-
import uuid
|
| 348 |
-
|
| 349 |
-
app = FastAPI()
|
| 350 |
-
|
| 351 |
-
@app.post("/start")
|
| 352 |
-
async def start_questions(request: Request):
|
| 353 |
-
data = await request.json()
|
| 354 |
-
questions = data.get("questions", [])
|
| 355 |
-
|
| 356 |
-
docs = []
|
| 357 |
-
for task in questions:
|
| 358 |
-
question_text = task.get("question", "").strip()
|
| 359 |
-
if not question_text:
|
| 360 |
-
continue
|
| 361 |
-
|
| 362 |
-
task["id"] = str(uuid.uuid4())
|
| 363 |
-
docs.append(Document(page_content=question_text, metadata=task))
|
| 364 |
-
|
| 365 |
-
return {"message": f"Loaded {len(docs)} questions", "docs": [doc.page_content for doc in docs]}
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
# -------------------------------
|
| 371 |
-
# Step 4: Set up BERT Embeddings and FAISS VectorStore
|
| 372 |
# -------------------------------
|
| 373 |
|
| 374 |
# -----------------------------
|
| 375 |
-
#
|
| 376 |
# -----------------------------
|
| 377 |
import torch
|
| 378 |
import torch.nn.functional as F
|
| 379 |
from transformers import BertTokenizer, BertModel
|
| 380 |
-
from langchain.embeddings import Embeddings
|
| 381 |
|
| 382 |
-
class BERTEmbeddings
|
| 383 |
-
def __init__(self, model_name='bert-base-uncased'
|
| 384 |
-
# Initialize the tokenizer and model
|
| 385 |
self.tokenizer = BertTokenizer.from_pretrained(model_name)
|
| 386 |
self.model = BertModel.from_pretrained(model_name)
|
| 387 |
-
self.model.eval() # Set
|
| 388 |
-
self.device = device
|
| 389 |
-
self.model.to(self.device) # Move model to the specified device (CPU or GPU)
|
| 390 |
|
| 391 |
def embed_documents(self, texts):
|
| 392 |
-
|
| 393 |
-
inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
| 394 |
-
inputs = {key: value.to(self.device) for key, value in inputs.items()} # Move inputs to the specified device
|
| 395 |
-
|
| 396 |
with torch.no_grad():
|
| 397 |
outputs = self.model(**inputs)
|
| 398 |
-
|
| 399 |
-
# Get the embeddings by averaging the last hidden state across tokens
|
| 400 |
embeddings = outputs.last_hidden_state.mean(dim=1)
|
| 401 |
-
|
| 402 |
-
# Normalize embeddings for cosine similarity
|
| 403 |
-
embeddings = F.normalize(embeddings, p=2, dim=1)
|
| 404 |
-
|
| 405 |
-
# Return the embeddings as numpy array
|
| 406 |
return embeddings.cpu().numpy()
|
| 407 |
|
| 408 |
def embed_query(self, text):
|
| 409 |
-
# Embed a single query (text)
|
| 410 |
return self.embed_documents([text])[0]
|
| 411 |
|
| 412 |
|
| 413 |
-
# -----------------------------
|
| 414 |
-
# 2. Initialize Embedding Model
|
| 415 |
-
# -----------------------------
|
| 416 |
|
| 417 |
# -----------------------------
|
| 418 |
# Create FAISS Vector Store
|
|
@@ -434,7 +389,7 @@ class MyVectorStore:
|
|
| 434 |
return cls(index)
|
| 435 |
|
| 436 |
# -----------------------------
|
| 437 |
-
#
|
| 438 |
# -----------------------------
|
| 439 |
# Define the URL where the JSON file is hosted
|
| 440 |
url = "https://agents-course-unit4-scoring.hf.space/questions"
|
|
@@ -467,11 +422,8 @@ loaded_vector_store = MyVectorStore.load_local("faiss_index.index")
|
|
| 467 |
|
| 468 |
|
| 469 |
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
# -----------------------------
|
| 474 |
-
#
|
| 475 |
# -----------------------------
|
| 476 |
|
| 477 |
retriever = FAISS.load_local("faiss_index.index", embedding_model).as_retriever()
|
|
@@ -502,17 +454,6 @@ llm = HuggingFaceEndpoint(
|
|
| 502 |
)
|
| 503 |
|
| 504 |
|
| 505 |
-
# No longer required as Langgraph is replacing Langchain
|
| 506 |
-
# Initialize LangChain agent
|
| 507 |
-
#agent = initialize_agent(
|
| 508 |
-
# tools=tools,
|
| 509 |
-
# llm=llm,
|
| 510 |
-
# agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 511 |
-
# verbose=True
|
| 512 |
-
#)
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
|
| 517 |
# -------------------------------
|
| 518 |
# Step 8: Use the Planner, Classifier, and Decision Logic
|
|
|
|
| 340 |
|
| 341 |
|
| 342 |
# -------------------------------
|
| 343 |
+
# Set up BERT Embeddings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
# -------------------------------
|
| 345 |
|
| 346 |
# -----------------------------
|
| 347 |
+
# Define Custom BERT Embedding Model
|
| 348 |
# -----------------------------
|
| 349 |
import torch
|
| 350 |
import torch.nn.functional as F
|
| 351 |
from transformers import BertTokenizer, BertModel
|
|
|
|
| 352 |
|
| 353 |
+
class BERTEmbeddings:
|
| 354 |
+
def __init__(self, model_name='bert-base-uncased'):
|
|
|
|
| 355 |
self.tokenizer = BertTokenizer.from_pretrained(model_name)
|
| 356 |
self.model = BertModel.from_pretrained(model_name)
|
| 357 |
+
self.model.eval() # Set to evaluation mode
|
|
|
|
|
|
|
| 358 |
|
| 359 |
def embed_documents(self, texts):
|
| 360 |
+
inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
|
|
|
|
|
|
|
|
|
|
| 361 |
with torch.no_grad():
|
| 362 |
outputs = self.model(**inputs)
|
|
|
|
|
|
|
| 363 |
embeddings = outputs.last_hidden_state.mean(dim=1)
|
| 364 |
+
embeddings = F.normalize(embeddings, p=2, dim=1) # Normalize for cosine similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
return embeddings.cpu().numpy()
|
| 366 |
|
| 367 |
def embed_query(self, text):
|
|
|
|
| 368 |
return self.embed_documents([text])[0]
|
| 369 |
|
| 370 |
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
# -----------------------------
|
| 373 |
# Create FAISS Vector Store
|
|
|
|
| 389 |
return cls(index)
|
| 390 |
|
| 391 |
# -----------------------------
|
| 392 |
+
# Prepare Documents
|
| 393 |
# -----------------------------
|
| 394 |
# Define the URL where the JSON file is hosted
|
| 395 |
url = "https://agents-course-unit4-scoring.hf.space/questions"
|
|
|
|
| 422 |
|
| 423 |
|
| 424 |
|
|
|
|
|
|
|
|
|
|
| 425 |
# -----------------------------
|
| 426 |
+
# Create LangChain Retriever Tool
|
| 427 |
# -----------------------------
|
| 428 |
|
| 429 |
retriever = FAISS.load_local("faiss_index.index", embedding_model).as_retriever()
|
|
|
|
| 454 |
)
|
| 455 |
|
| 456 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
|
| 458 |
# -------------------------------
|
| 459 |
# Step 8: Use the Planner, Classifier, and Decision Logic
|