Update vector_store.py
Browse files- vector_store.py +33 -24
vector_store.py
CHANGED
@@ -113,30 +113,39 @@ class LegalDocumentVectorStore:
|
|
113 |
print(f"β Error saving pre-computed embeddings: {e}")
|
114 |
return False
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
# Global instance
|
142 |
vector_store = LegalDocumentVectorStore()
|
|
|
113 |
print(f"β Error saving pre-computed embeddings: {e}")
|
114 |
return False
|
115 |
|
116 |
+
def get_retriever(self, clause_tagger, document_id: str = None):
|
117 |
+
"""Get retriever for chat functionality with improved settings"""
|
118 |
+
try:
|
119 |
+
self._initialize_pinecone()
|
120 |
+
|
121 |
+
legal_embeddings = InLegalBERTEmbeddings(clause_tagger.embedding_model)
|
122 |
+
index = self.pc.Index(self.index_name)
|
123 |
+
|
124 |
+
vectorstore = PineconeVectorStore(
|
125 |
+
index=index,
|
126 |
+
embedding=legal_embeddings,
|
127 |
+
text_key="text"
|
128 |
+
)
|
129 |
+
|
130 |
+
# More permissive search settings
|
131 |
+
search_kwargs = {
|
132 |
+
'k': 10, # Increased from default 5
|
133 |
+
'include_metadata': True
|
134 |
+
}
|
135 |
+
|
136 |
+
if document_id:
|
137 |
+
search_kwargs['filter'] = {'document_id': document_id}
|
138 |
+
|
139 |
+
# Use similarity search without threshold initially
|
140 |
+
return vectorstore.as_retriever(
|
141 |
+
search_type="similarity", # Remove threshold for now
|
142 |
+
search_kwargs=search_kwargs
|
143 |
+
)
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
print(f"β Error creating retriever: {e}")
|
147 |
+
return None
|
148 |
+
|
149 |
|
150 |
# Global instance
|
151 |
vector_store = LegalDocumentVectorStore()
|