sagar008 commited on
Commit
62169f2
Β·
verified Β·
1 Parent(s): 6b496b9

Update vector_store.py

Browse files
Files changed (1) hide show
  1. vector_store.py +33 -24
vector_store.py CHANGED
@@ -113,30 +113,39 @@ class LegalDocumentVectorStore:
113
  print(f"❌ Error saving pre-computed embeddings: {e}")
114
  return False
115
 
116
- def get_retriever(self, clause_tagger, document_id: str = None):
117
- """Get retriever for chat functionality"""
118
- try:
119
- self._initialize_pinecone()
120
-
121
- legal_embeddings = InLegalBERTEmbeddings(clause_tagger.embedding_model)
122
- index = self.pc.Index(self.index_name)
123
-
124
- vectorstore = PineconeVectorStore(
125
- index=index,
126
- embedding=legal_embeddings,
127
- text_key="text"
128
- )
129
-
130
- # Create retriever with optional document filtering
131
- search_kwargs = {'k': 5}
132
- if document_id:
133
- search_kwargs['filter'] = {'document_id': document_id}
134
-
135
- return vectorstore.as_retriever(search_kwargs=search_kwargs)
136
-
137
- except Exception as e:
138
- print(f"❌ Error creating retriever: {e}")
139
- return None
 
 
 
 
 
 
 
 
 
140
 
141
  # Global instance
142
  vector_store = LegalDocumentVectorStore()
 
113
  print(f"❌ Error saving pre-computed embeddings: {e}")
114
  return False
115
 
116
+ def get_retriever(self, clause_tagger, document_id: str = None):
117
+ """Get retriever for chat functionality with improved settings"""
118
+ try:
119
+ self._initialize_pinecone()
120
+
121
+ legal_embeddings = InLegalBERTEmbeddings(clause_tagger.embedding_model)
122
+ index = self.pc.Index(self.index_name)
123
+
124
+ vectorstore = PineconeVectorStore(
125
+ index=index,
126
+ embedding=legal_embeddings,
127
+ text_key="text"
128
+ )
129
+
130
+ # More permissive search settings
131
+ search_kwargs = {
132
+ 'k': 10, # Increased from default 5
133
+ 'include_metadata': True
134
+ }
135
+
136
+ if document_id:
137
+ search_kwargs['filter'] = {'document_id': document_id}
138
+
139
+ # Use similarity search without threshold initially
140
+ return vectorstore.as_retriever(
141
+ search_type="similarity", # Remove threshold for now
142
+ search_kwargs=search_kwargs
143
+ )
144
+
145
+ except Exception as e:
146
+ print(f"❌ Error creating retriever: {e}")
147
+ return None
148
+
149
 
150
  # Global instance
151
  vector_store = LegalDocumentVectorStore()