Final_Assignment_Project

Sleeping

wt002 commited on May 11

Commit

79e8949

verified ·

1 Parent(s): b1f6104

Update agent.py

Files changed (1) hide show

agent.py CHANGED Viewed

@@ -324,22 +324,21 @@ for task in tasks:
 class BERTEmbeddings(Embeddings):
     def __init__(self, model_name='bert-base-uncased'):
-        # Load the pre-trained BERT model and tokenizer
         self.tokenizer = BertTokenizer.from_pretrained(model_name)
         self.model = BertModel.from_pretrained(model_name)
-    def embed(self, texts):
-        # Tokenize and convert texts to input format for BERT
         inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
-        # Get the BERT embeddings (we use the last hidden state)
         with torch.no_grad():
             outputs = self.model(**inputs)
-        # Use the mean of the last layer hidden states as the embedding
-        embeddings = outputs.last_hidden_state.mean(dim=1)  # Shape: (batch_size, hidden_dim)
-        # Return the embeddings as a list of numpy arrays
         return embeddings.cpu().numpy().tolist()
 # Example usage of BERTEmbedding with LangChain

 class BERTEmbeddings(Embeddings):
     def __init__(self, model_name='bert-base-uncased'):
         self.tokenizer = BertTokenizer.from_pretrained(model_name)
         self.model = BertModel.from_pretrained(model_name)
+        self.model.eval()  # Set to evaluation mode
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return self._embed(texts)
+    def embed_query(self, text: str) -> List[float]:
+        return self._embed([text])[0]
+    def _embed(self, texts: List[str]) -> List[List[float]]:
         inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
         with torch.no_grad():
             outputs = self.model(**inputs)
+        embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
         return embeddings.cpu().numpy().tolist()
 # Example usage of BERTEmbedding with LangChain