Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 11

Commit

cf02c0e

verified ·

1 Parent(s): 7df3234

Update agent.py

Browse files

Files changed (1) hide show

agent.py +60 -13

agent.py CHANGED Viewed

@@ -41,7 +41,7 @@ from io import StringIO
 from transformers import BertTokenizer, BertModel
 import torch
 load_dotenv()
@@ -361,28 +361,75 @@ class BERTEmbeddings(Embeddings):
 # Example usage of BERTEmbedding with LangChain
-embedding_model = BERTEmbeddings(model_name="bert-base-uncased")
-# Sample text (replace with your own text)
 docs = [
-    Document(page_content="Mercedes Sosa released many albums between 2000 and 2009."),
-    Document(page_content="She was a prominent Argentine folk singer."),
-    Document(page_content="Her album 'Al Despertar' was released in 1998."),
-    Document(page_content="She continued releasing music well into the 2000s.")
 ]
-# Get the embeddings for the documents
-vector_store = FAISS.from_documents(docs, embedding_model)
-# Now, you can use the embeddings with FAISS or other retrieval systems
-# For example, with FAISS:
-# Assuming 'docs' contains your list of documents and 'embedding_model' is the model you created
 vector_store = FAISS.from_documents(docs, embedding_model)
 vector_store.save_local("faiss_index")
 # -----------------------------
-# Step 4: Create Retriever Tool
 # -----------------------------
 retriever = vector_store.as_retriever()

 from transformers import BertTokenizer, BertModel
 import torch
+import torch.nn.functional as F
 load_dotenv()
 # Example usage of BERTEmbedding with LangChain
+# -----------------------------
+# 1. Define Custom BERT Embedding Model
+# -----------------------------
+class BERTEmbeddings(Embeddings):
+    def __init__(self, model_name='bert-base-uncased'):
+        self.tokenizer = BertTokenizer.from_pretrained(model_name)
+        self.model = BertModel.from_pretrained(model_name)
+        self.model.eval()  # Set model to eval mode
+    def embed_documents(self, texts):
+        inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        embeddings = outputs.last_hidden_state.mean(dim=1)
+        embeddings = F.normalize(embeddings, p=2, dim=1)  # Normalize for cosine similarity
+        return embeddings.cpu().numpy()
+    def embed_query(self, text):
+        return self.embed_documents([text])[0]
+# -----------------------------
+# 2. Initialize Embedding Model
+# -----------------------------
+embedding_model = BERTEmbeddings()
+# -----------------------------
+# 3. Prepare Documents
+# -----------------------------
 docs = [
+    Document(page_content="Mercedes Sosa released many albums between 2000 and 2009.", metadata={"id": 1}),
+    Document(page_content="She was a prominent Argentine folk singer.", metadata={"id": 2}),
+    Document(page_content="Her album 'Al Despertar' was released in 1998.", metadata={"id": 3}),
+    Document(page_content="She continued releasing music well into the 2000s.", metadata={"id": 4}),
 ]
+# -----------------------------
+# 4. Create FAISS Vector Store
+# -----------------------------
 vector_store = FAISS.from_documents(docs, embedding_model)
 vector_store.save_local("faiss_index")
+# -----------------------------
+# 5. Query & Filter Results (optional preview)
+# -----------------------------
+query = "How many albums did Mercedes Sosa release between 2000 and 2009?"
+results = vector_store.similarity_search_with_score(query, k=5)
+threshold = 0.75
+filtered = [doc for doc, score in results if score < threshold]
+print("\n📊 Retrieved Documents with Similarity Scores:")
+filtered = []
+for doc, score in results:
+    print(f"🔢 Score: {score:.4f}")
+    print(f"📄 Content: {doc.page_content}")
+    if score < threshold:
+        filtered.append(doc)
+        print("✅ Accepted")
+    else:
+        print("❌ Rejected")
+    print("-" * 80)
 # -----------------------------
+# 6. Create LangChain Retriever Tool
 # -----------------------------
 retriever = vector_store.as_retriever()