Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 14

Commit

3df23ae

verified ·

1 Parent(s): 9ac015d

Update agent.py

Browse files

Files changed (1) hide show

agent.py +9 -68

agent.py CHANGED Viewed

@@ -340,79 +340,34 @@ for name in enabled_tool_names:
 # -------------------------------
-# Step 2: Load the JSON file or tasks (Replace this part if you're loading tasks dynamically)
-# -------------------------------
-from fastapi import FastAPI, Request
-from langchain_core.documents import Document
-import uuid
-app = FastAPI()
-@app.post("/start")
-async def start_questions(request: Request):
-    data = await request.json()
-    questions = data.get("questions", [])
-    docs = []
-    for task in questions:
-        question_text = task.get("question", "").strip()
-        if not question_text:
-            continue
-        task["id"] = str(uuid.uuid4())
-        docs.append(Document(page_content=question_text, metadata=task))
-    return {"message": f"Loaded {len(docs)} questions", "docs": [doc.page_content for doc in docs]}
-# -------------------------------
-# Step 4: Set up BERT Embeddings and FAISS VectorStore
 # -------------------------------
 # -----------------------------
-# 1. Define Custom BERT Embedding Model
 # -----------------------------
 import torch
 import torch.nn.functional as F
 from transformers import BertTokenizer, BertModel
-from langchain.embeddings import Embeddings
-class BERTEmbeddings(Embeddings):
-    def __init__(self, model_name='bert-base-uncased', device='cpu'):
-        # Initialize the tokenizer and model
         self.tokenizer = BertTokenizer.from_pretrained(model_name)
         self.model = BertModel.from_pretrained(model_name)
-        self.model.eval()  # Set model to eval mode
-        self.device = device
-        self.model.to(self.device)  # Move model to the specified device (CPU or GPU)
     def embed_documents(self, texts):
-        # Tokenize the input texts
-        inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=512)
-        inputs = {key: value.to(self.device) for key, value in inputs.items()}  # Move inputs to the specified device
         with torch.no_grad():
             outputs = self.model(**inputs)
-        # Get the embeddings by averaging the last hidden state across tokens
         embeddings = outputs.last_hidden_state.mean(dim=1)
-        # Normalize embeddings for cosine similarity
-        embeddings = F.normalize(embeddings, p=2, dim=1)
-        # Return the embeddings as numpy array
         return embeddings.cpu().numpy()
     def embed_query(self, text):
-        # Embed a single query (text)
         return self.embed_documents([text])[0]
-# -----------------------------
-# 2. Initialize Embedding Model
-# -----------------------------
 # -----------------------------
 # Create FAISS Vector Store
@@ -434,7 +389,7 @@ class MyVectorStore:
         return cls(index)
 # -----------------------------
-# 3. Prepare Documents
 # -----------------------------
 # Define the URL where the JSON file is hosted
 url = "https://agents-course-unit4-scoring.hf.space/questions"
@@ -467,11 +422,8 @@ loaded_vector_store = MyVectorStore.load_local("faiss_index.index")
 # -----------------------------
-# 6. Create LangChain Retriever Tool
 # -----------------------------
 retriever = FAISS.load_local("faiss_index.index", embedding_model).as_retriever()
@@ -502,17 +454,6 @@ llm = HuggingFaceEndpoint(
 )
-# No longer required as Langgraph is replacing Langchain
-# Initialize LangChain agent
-#agent = initialize_agent(
-#    tools=tools,
-#    llm=llm,
-#    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-#    verbose=True
-#)
 # -------------------------------
 # Step 8: Use the Planner, Classifier, and Decision Logic

 # -------------------------------
+# Set up BERT Embeddings
 # -------------------------------
 # -----------------------------
+# Define Custom BERT Embedding Model
 # -----------------------------
 import torch
 import torch.nn.functional as F
 from transformers import BertTokenizer, BertModel
+class BERTEmbeddings:
+    def __init__(self, model_name='bert-base-uncased'):
         self.tokenizer = BertTokenizer.from_pretrained(model_name)
         self.model = BertModel.from_pretrained(model_name)
+        self.model.eval()  # Set to evaluation mode
     def embed_documents(self, texts):
+        inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
         with torch.no_grad():
             outputs = self.model(**inputs)
         embeddings = outputs.last_hidden_state.mean(dim=1)
+        embeddings = F.normalize(embeddings, p=2, dim=1)  # Normalize for cosine similarity
         return embeddings.cpu().numpy()
     def embed_query(self, text):
         return self.embed_documents([text])[0]
 # -----------------------------
 # Create FAISS Vector Store
         return cls(index)
 # -----------------------------
+# Prepare Documents
 # -----------------------------
 # Define the URL where the JSON file is hosted
 url = "https://agents-course-unit4-scoring.hf.space/questions"
 # -----------------------------
+# Create LangChain Retriever Tool
 # -----------------------------
 retriever = FAISS.load_local("faiss_index.index", embedding_model).as_retriever()
 )
 # -------------------------------
 # Step 8: Use the Planner, Classifier, and Decision Logic