wt002 commited on
Commit
aaf11bc
·
verified ·
1 Parent(s): 9b810cb

Update agent.csv

Browse files
Files changed (1) hide show
  1. agent.py +34 -19
agent.py CHANGED
@@ -21,14 +21,15 @@ from langchain.embeddings.base import Embeddings
21
  from typing import List
22
  import numpy as np
23
 
24
- import json
25
  import uuid
26
  from langchain.embeddings import HuggingFaceEmbeddings
27
  from langchain.vectorstores import FAISS
28
  from langchain.schema import Document
29
- from langchain.tools.retriever import create_retriever_tool
30
  from sentence_transformers import SentenceTransformer
31
 
 
32
  load_dotenv()
33
 
34
  @tool
@@ -136,39 +137,53 @@ sys_msg = SystemMessage(content=system_prompt)
136
 
137
 
138
  # -------------------------------
139
- # Step 1: Load metadata.jsonl (max 165 docs)
140
  # -------------------------------
141
- docs = []
142
- with open("metadata.jsonl", "r", encoding="utf-8") as f:
143
- for i, line in enumerate(f):
144
- if i >= 165:
145
- break
146
- data = json.loads(line)
147
- content = data.pop("content", "").strip()
148
- if not content:
149
- continue # skip empty
150
- data["id"] = str(uuid.uuid4()) # ensure each doc has unique ID
151
- docs.append(Document(page_content=content, metadata=data))
 
 
 
 
 
 
 
 
 
152
 
153
  # -------------------------------
154
- # Step 2: Set up Embeddings + FAISS
155
  # -------------------------------
 
156
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 
 
157
  vector_store = FAISS.from_documents(docs, embedding_model)
158
 
159
- # Save FAISS index locally (optional)
160
  vector_store.save_local("faiss_index")
161
 
 
 
162
  # -------------------------------
163
- # Step 3: Create Retriever Tool
164
  # -------------------------------
165
  retriever = vector_store.as_retriever()
166
 
167
- # Create retriever tool
168
  question_retriever_tool = create_retriever_tool(
169
  retriever=retriever,
170
  name="Question_Search",
171
- description="Use this tool to retrieve documents related to a user's question."
172
  )
173
 
174
 
 
21
  from typing import List
22
  import numpy as np
23
 
24
+ import pandas as pd
25
  import uuid
26
  from langchain.embeddings import HuggingFaceEmbeddings
27
  from langchain.vectorstores import FAISS
28
  from langchain.schema import Document
29
+ from langchain.agents import create_retriever_tool
30
  from sentence_transformers import SentenceTransformer
31
 
32
+
33
  load_dotenv()
34
 
35
  @tool
 
137
 
138
 
139
  # -------------------------------
140
+ # Step 1: Load documents from CSV file (max 165 rows)
141
  # -------------------------------
142
+ csv_file_path = "/home/wendy/Downloads/documents.csv" # Replace with your actual file path
143
+ df = pd.read_csv(csv_file_path).head(165)
144
+
145
+ # Check if 'content' column exists
146
+ assert 'content' in df.columns, "'content' column is required in the CSV file."
147
+
148
+ # Add 'id' and 'metadata' column
149
+ df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
150
+ if 'metadata' not in df.columns:
151
+ df['metadata'] = [{} for _ in range(len(df))]
152
+ else:
153
+ # If metadata is a JSON string, convert it to dict
154
+ import json
155
+ df['metadata'] = df['metadata'].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
156
+
157
+ # Convert each row into a Document
158
+ docs = [
159
+ Document(page_content=row['content'], metadata={'id': row['id'], **row['metadata']})
160
+ for _, row in df.iterrows()
161
+ ]
162
 
163
  # -------------------------------
164
+ # Step 2: Set up HuggingFace Embeddings and FAISS VectorStore
165
  # -------------------------------
166
+ # Initialize HuggingFace Embedding model
167
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
168
+
169
+ # Create FAISS VectorStore from documents
170
  vector_store = FAISS.from_documents(docs, embedding_model)
171
 
172
+ # Save the FAISS index locally
173
  vector_store.save_local("faiss_index")
174
 
175
+ #print("✅ FAISS index created and saved locally.")
176
+
177
  # -------------------------------
178
+ # Step 3: Create Retriever Tool (for use in LangChain)
179
  # -------------------------------
180
  retriever = vector_store.as_retriever()
181
 
182
+ # Create the retriever tool
183
  question_retriever_tool = create_retriever_tool(
184
  retriever=retriever,
185
  name="Question_Search",
186
+ description="A tool to retrieve documents related to a user's question."
187
  )
188
 
189