Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -21,6 +21,14 @@ from langchain.embeddings.base import Embeddings
|
|
| 21 |
from typing import List
|
| 22 |
import numpy as np
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
load_dotenv()
|
| 25 |
|
| 26 |
@tool
|
|
@@ -125,52 +133,47 @@ with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
|
| 125 |
# System message
|
| 126 |
sys_msg = SystemMessage(content=system_prompt)
|
| 127 |
|
| 128 |
-
# Custom embedding class
|
| 129 |
-
|
| 130 |
-
#from langchain_huggingface import HuggingFaceEmbeddings
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
# Initialize SentenceTransformer and set max_seq_length
|
| 135 |
-
sentence_transformer = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
| 136 |
-
sentence_transformer.max_seq_length = 512 # Set max sequence length
|
| 137 |
-
|
| 138 |
-
# Initialize embeddings with the model name (dim=768)
|
| 139 |
-
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 140 |
-
|
| 141 |
-
#from sentence_transformers import SentenceTransformer
|
| 142 |
|
| 143 |
-
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
| 144 |
-
query = "What is vector search?"
|
| 145 |
-
query_embedding = model.encode(query)
|
| 146 |
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
# Initialize Supabase client
|
| 153 |
-
supabase: Client = create_client(
|
| 154 |
-
os.environ.get("SUPABASE_URL"),
|
| 155 |
-
os.environ.get("SUPABASE_SERVICE_KEY")
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
# Initialize Supabase vector store
|
| 159 |
-
vector_store = SupabaseVectorStore(
|
| 160 |
-
client=supabase,
|
| 161 |
-
embedding=embeddings,
|
| 162 |
-
table_name="documents",
|
| 163 |
-
query_name="match_documents_langchain"
|
| 164 |
-
)
|
| 165 |
-
|
| 166 |
# Create retriever tool
|
| 167 |
-
|
| 168 |
-
retriever=
|
| 169 |
-
name="
|
| 170 |
-
description="
|
| 171 |
)
|
| 172 |
|
| 173 |
|
|
|
|
|
|
|
| 174 |
tools = [
|
| 175 |
multiply,
|
| 176 |
add,
|
|
|
|
| 21 |
from typing import List
|
| 22 |
import numpy as np
|
| 23 |
|
| 24 |
+
import json
|
| 25 |
+
import uuid
|
| 26 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 27 |
+
from langchain.vectorstores import FAISS
|
| 28 |
+
from langchain.schema import Document
|
| 29 |
+
from langchain.tools.retriever import create_retriever_tool
|
| 30 |
+
from sentence_transformers import SentenceTransformer
|
| 31 |
+
|
| 32 |
load_dotenv()
|
| 33 |
|
| 34 |
@tool
|
|
|
|
| 133 |
# System message
|
| 134 |
sys_msg = SystemMessage(content=system_prompt)
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
# -------------------------------
|
| 139 |
+
# Step 1: Load metadata.jsonl (max 165 docs)
|
| 140 |
+
# -------------------------------
|
| 141 |
+
docs = []
|
| 142 |
+
with open("metadata.jsonl", "r", encoding="utf-8") as f:
|
| 143 |
+
for i, line in enumerate(f):
|
| 144 |
+
if i >= 165:
|
| 145 |
+
break
|
| 146 |
+
data = json.loads(line)
|
| 147 |
+
content = data.pop("content", "").strip()
|
| 148 |
+
if not content:
|
| 149 |
+
continue # skip empty
|
| 150 |
+
data["id"] = str(uuid.uuid4()) # ensure each doc has unique ID
|
| 151 |
+
docs.append(Document(page_content=content, metadata=data))
|
| 152 |
+
|
| 153 |
+
# -------------------------------
|
| 154 |
+
# Step 2: Set up Embeddings + FAISS
|
| 155 |
+
# -------------------------------
|
| 156 |
+
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 157 |
+
vector_store = FAISS.from_documents(docs, embedding_model)
|
| 158 |
+
|
| 159 |
+
# Save FAISS index locally (optional)
|
| 160 |
+
vector_store.save_local("faiss_index")
|
| 161 |
+
|
| 162 |
+
# -------------------------------
|
| 163 |
+
# Step 3: Create Retriever Tool
|
| 164 |
+
# -------------------------------
|
| 165 |
+
retriever = vector_store.as_retriever()
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
# Create retriever tool
|
| 168 |
+
question_retriever_tool = create_retriever_tool(
|
| 169 |
+
retriever=retriever,
|
| 170 |
+
name="Question_Search",
|
| 171 |
+
description="Use this tool to retrieve documents related to a user's question."
|
| 172 |
)
|
| 173 |
|
| 174 |
|
| 175 |
+
|
| 176 |
+
|
| 177 |
tools = [
|
| 178 |
multiply,
|
| 179 |
add,
|