wt002 commited on
Commit
9b810cb
·
verified ·
1 Parent(s): 70f8384

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +42 -39
agent.py CHANGED
@@ -21,6 +21,14 @@ from langchain.embeddings.base import Embeddings
21
  from typing import List
22
  import numpy as np
23
 
 
 
 
 
 
 
 
 
24
  load_dotenv()
25
 
26
  @tool
@@ -125,52 +133,47 @@ with open("system_prompt.txt", "r", encoding="utf-8") as f:
125
  # System message
126
  sys_msg = SystemMessage(content=system_prompt)
127
 
128
- # Custom embedding class
129
-
130
- #from langchain_huggingface import HuggingFaceEmbeddings
131
-
132
-
133
-
134
- # Initialize SentenceTransformer and set max_seq_length
135
- sentence_transformer = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
136
- sentence_transformer.max_seq_length = 512 # Set max sequence length
137
-
138
- # Initialize embeddings with the model name (dim=768)
139
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
140
-
141
- #from sentence_transformers import SentenceTransformer
142
 
143
- model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
144
- query = "What is vector search?"
145
- query_embedding = model.encode(query)
146
 
147
- print("Embedding Length:", len(query_embedding)) # Ensure it's 768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
-
150
-
151
-
152
- # Initialize Supabase client
153
- supabase: Client = create_client(
154
- os.environ.get("SUPABASE_URL"),
155
- os.environ.get("SUPABASE_SERVICE_KEY")
156
- )
157
-
158
- # Initialize Supabase vector store
159
- vector_store = SupabaseVectorStore(
160
- client=supabase,
161
- embedding=embeddings,
162
- table_name="documents",
163
- query_name="match_documents_langchain"
164
- )
165
-
166
  # Create retriever tool
167
- retriever_tool = create_retriever_tool(
168
- retriever=vector_store.as_retriever(),
169
- name="Question Search",
170
- description="A tool to retrieve similar questions from a vector store."
171
  )
172
 
173
 
 
 
174
  tools = [
175
  multiply,
176
  add,
 
21
  from typing import List
22
  import numpy as np
23
 
24
+ import json
25
+ import uuid
26
+ from langchain.embeddings import HuggingFaceEmbeddings
27
+ from langchain.vectorstores import FAISS
28
+ from langchain.schema import Document
29
+ from langchain.tools.retriever import create_retriever_tool
30
+ from sentence_transformers import SentenceTransformer
31
+
32
  load_dotenv()
33
 
34
  @tool
 
133
  # System message
134
  sys_msg = SystemMessage(content=system_prompt)
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
 
 
 
137
 
138
+ # -------------------------------
139
+ # Step 1: Load metadata.jsonl (max 165 docs)
140
+ # -------------------------------
141
+ docs = []
142
+ with open("metadata.jsonl", "r", encoding="utf-8") as f:
143
+ for i, line in enumerate(f):
144
+ if i >= 165:
145
+ break
146
+ data = json.loads(line)
147
+ content = data.pop("content", "").strip()
148
+ if not content:
149
+ continue # skip empty
150
+ data["id"] = str(uuid.uuid4()) # ensure each doc has unique ID
151
+ docs.append(Document(page_content=content, metadata=data))
152
+
153
+ # -------------------------------
154
+ # Step 2: Set up Embeddings + FAISS
155
+ # -------------------------------
156
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
157
+ vector_store = FAISS.from_documents(docs, embedding_model)
158
+
159
+ # Save FAISS index locally (optional)
160
+ vector_store.save_local("faiss_index")
161
+
162
+ # -------------------------------
163
+ # Step 3: Create Retriever Tool
164
+ # -------------------------------
165
+ retriever = vector_store.as_retriever()
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  # Create retriever tool
168
+ question_retriever_tool = create_retriever_tool(
169
+ retriever=retriever,
170
+ name="Question_Search",
171
+ description="Use this tool to retrieve documents related to a user's question."
172
  )
173
 
174
 
175
+
176
+
177
  tools = [
178
  multiply,
179
  add,