Update vector_store.py
Browse files- vector_store.py +26 -25
vector_store.py
CHANGED
@@ -113,38 +113,39 @@ class LegalDocumentVectorStore:
|
|
113 |
print(f"β Error saving pre-computed embeddings: {e}")
|
114 |
return False
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
|
130 |
# More permissive search settings
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
|
136 |
-
|
137 |
-
|
138 |
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
|
149 |
|
150 |
# Global instance
|
|
|
113 |
print(f"β Error saving pre-computed embeddings: {e}")
|
114 |
return False
|
115 |
|
116 |
+
|
117 |
+
def get_retriever(self, clause_tagger, document_id: str = None):
|
118 |
+
"""Get retriever for chat functionality with improved settings"""
|
119 |
+
try:
|
120 |
+
self._initialize_pinecone()
|
121 |
|
122 |
+
legal_embeddings = InLegalBERTEmbeddings(clause_tagger.embedding_model)
|
123 |
+
index = self.pc.Index(self.index_name)
|
124 |
|
125 |
+
vectorstore = PineconeVectorStore(
|
126 |
+
index=index,
|
127 |
+
embedding=legal_embeddings,
|
128 |
+
text_key="text"
|
129 |
+
)
|
130 |
|
131 |
# More permissive search settings
|
132 |
+
search_kwargs = {
|
133 |
+
'k': 10, # Increased from default 5
|
134 |
+
'include_metadata': True
|
135 |
+
}
|
136 |
|
137 |
+
if document_id:
|
138 |
+
search_kwargs['filter'] = {'document_id': document_id}
|
139 |
|
140 |
+
# Use similarity search without threshold initially
|
141 |
+
return vectorstore.as_retriever(
|
142 |
+
search_type="similarity", # Remove threshold for now
|
143 |
+
search_kwargs=search_kwargs
|
144 |
+
)
|
145 |
|
146 |
+
except Exception as e:
|
147 |
+
print(f"β Error creating retriever: {e}")
|
148 |
+
return None
|
149 |
|
150 |
|
151 |
# Global instance
|