Spaces:
Running
Running
Update agent.py
Browse files
agent.py
CHANGED
@@ -21,6 +21,14 @@ from langchain.embeddings.base import Embeddings
|
|
21 |
from typing import List
|
22 |
import numpy as np
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
load_dotenv()
|
25 |
|
26 |
@tool
|
@@ -125,52 +133,47 @@ with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
|
125 |
# System message
|
126 |
sys_msg = SystemMessage(content=system_prompt)
|
127 |
|
128 |
-
# Custom embedding class
|
129 |
-
|
130 |
-
#from langchain_huggingface import HuggingFaceEmbeddings
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
# Initialize SentenceTransformer and set max_seq_length
|
135 |
-
sentence_transformer = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
136 |
-
sentence_transformer.max_seq_length = 512 # Set max sequence length
|
137 |
-
|
138 |
-
# Initialize embeddings with the model name (dim=768)
|
139 |
-
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
140 |
-
|
141 |
-
#from sentence_transformers import SentenceTransformer
|
142 |
|
143 |
-
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
144 |
-
query = "What is vector search?"
|
145 |
-
query_embedding = model.encode(query)
|
146 |
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
# Initialize Supabase client
|
153 |
-
supabase: Client = create_client(
|
154 |
-
os.environ.get("SUPABASE_URL"),
|
155 |
-
os.environ.get("SUPABASE_SERVICE_KEY")
|
156 |
-
)
|
157 |
-
|
158 |
-
# Initialize Supabase vector store
|
159 |
-
vector_store = SupabaseVectorStore(
|
160 |
-
client=supabase,
|
161 |
-
embedding=embeddings,
|
162 |
-
table_name="documents",
|
163 |
-
query_name="match_documents_langchain"
|
164 |
-
)
|
165 |
-
|
166 |
# Create retriever tool
|
167 |
-
|
168 |
-
retriever=
|
169 |
-
name="
|
170 |
-
description="
|
171 |
)
|
172 |
|
173 |
|
|
|
|
|
174 |
tools = [
|
175 |
multiply,
|
176 |
add,
|
|
|
21 |
from typing import List
|
22 |
import numpy as np
|
23 |
|
24 |
+
import json
|
25 |
+
import uuid
|
26 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
27 |
+
from langchain.vectorstores import FAISS
|
28 |
+
from langchain.schema import Document
|
29 |
+
from langchain.tools.retriever import create_retriever_tool
|
30 |
+
from sentence_transformers import SentenceTransformer
|
31 |
+
|
32 |
load_dotenv()
|
33 |
|
34 |
@tool
|
|
|
133 |
# System message
|
134 |
sys_msg = SystemMessage(content=system_prompt)
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
|
|
|
|
|
|
137 |
|
138 |
+
# -------------------------------
|
139 |
+
# Step 1: Load metadata.jsonl (max 165 docs)
|
140 |
+
# -------------------------------
|
141 |
+
docs = []
|
142 |
+
with open("metadata.jsonl", "r", encoding="utf-8") as f:
|
143 |
+
for i, line in enumerate(f):
|
144 |
+
if i >= 165:
|
145 |
+
break
|
146 |
+
data = json.loads(line)
|
147 |
+
content = data.pop("content", "").strip()
|
148 |
+
if not content:
|
149 |
+
continue # skip empty
|
150 |
+
data["id"] = str(uuid.uuid4()) # ensure each doc has unique ID
|
151 |
+
docs.append(Document(page_content=content, metadata=data))
|
152 |
+
|
153 |
+
# -------------------------------
|
154 |
+
# Step 2: Set up Embeddings + FAISS
|
155 |
+
# -------------------------------
|
156 |
+
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
157 |
+
vector_store = FAISS.from_documents(docs, embedding_model)
|
158 |
+
|
159 |
+
# Save FAISS index locally (optional)
|
160 |
+
vector_store.save_local("faiss_index")
|
161 |
+
|
162 |
+
# -------------------------------
|
163 |
+
# Step 3: Create Retriever Tool
|
164 |
+
# -------------------------------
|
165 |
+
retriever = vector_store.as_retriever()
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
# Create retriever tool
|
168 |
+
question_retriever_tool = create_retriever_tool(
|
169 |
+
retriever=retriever,
|
170 |
+
name="Question_Search",
|
171 |
+
description="Use this tool to retrieve documents related to a user's question."
|
172 |
)
|
173 |
|
174 |
|
175 |
+
|
176 |
+
|
177 |
tools = [
|
178 |
multiply,
|
179 |
add,
|