ppsingh commited on
Commit
0ddb79b
·
1 Parent(s): d854198

change filter and get_context for filter

Browse files
Files changed (2) hide show
  1. app.py +5 -14
  2. utils/retriever.py +22 -50
app.py CHANGED
@@ -17,10 +17,9 @@ except Exception as e:
17
 
18
  def retrieve(
19
  query: str,
20
- reports_filter: str = "",
21
- sources_filter: str = "",
22
- subtype_filter: str = "",
23
- year_filter: str = ""
24
  ) -> list:
25
  """
26
  Retrieve semantically similar documents from the vector database for MCP clients.
@@ -35,20 +34,12 @@ def retrieve(
35
  Returns:
36
  list: List of dictionaries containing document content, metadata, and scores
37
  """
38
- # Parse filter inputs (convert empty strings to None or lists)
39
- reports = [r.strip() for r in reports_filter.split(",") if r.strip()] if reports_filter else []
40
- sources = sources_filter.strip() if sources_filter else None
41
- subtype = subtype_filter.strip() if subtype_filter else None
42
- year = [y.strip() for y in year_filter.split(",") if y.strip()] if year_filter else None
43
 
44
  # Call retriever function and return raw results
45
  results = get_context(
46
  vectorstore=vectorstore,
47
  query=query,
48
- reports=reports,
49
- sources=sources,
50
- subtype=subtype,
51
- year=year
52
  )
53
 
54
  return results
@@ -105,7 +96,7 @@ with gr.Blocks() as ui:
105
  # UI event handler
106
  submit_btn.click(
107
  fn=retrieve,
108
- inputs=[query_input, reports_input, sources_input, subtype_input, year_input],
109
  outputs=output,
110
  api_name="retrieve"
111
  )
 
17
 
18
  def retrieve(
19
  query: str,
20
+ collection_name: str =None,
21
+ top_level_filter: str = None,
22
+ top_level_filter_value:str|list = None
 
23
  ) -> list:
24
  """
25
  Retrieve semantically similar documents from the vector database for MCP clients.
 
34
  Returns:
35
  list: List of dictionaries containing document content, metadata, and scores
36
  """
37
+
 
 
 
 
38
 
39
  # Call retriever function and return raw results
40
  results = get_context(
41
  vectorstore=vectorstore,
42
  query=query,
 
 
 
 
43
  )
44
 
45
  return results
 
96
  # UI event handler
97
  submit_btn.click(
98
  fn=retrieve,
99
+ inputs=[query_input],
100
  outputs=output,
101
  api_name="retrieve"
102
  )
utils/retriever.py CHANGED
@@ -70,10 +70,7 @@ def get_vectorstore() -> VectorStoreInterface:
70
  return vectorstore
71
 
72
  def create_filter(
73
- reports: List[str] = None,
74
- sources: str = None,
75
- subtype: str = None,
76
- year: List[str] = None
77
  ) -> Optional[rest.Filter]:
78
  """
79
  Create a Qdrant filter based on metadata criteria.
@@ -87,50 +84,27 @@ def create_filter(
87
  Returns:
88
  Qdrant Filter object or None if no filters specified
89
  """
90
- if not any([reports, sources, subtype, year]):
91
  return None
92
 
93
  conditions = []
94
-
95
- if reports and len(reports) > 0:
96
- logging.info(f"Defining filter for reports: {reports}")
97
- conditions.append(
98
- rest.FieldCondition(
99
- key="metadata.filename",
100
- match=rest.MatchAny(any=reports)
101
- )
102
- )
103
- else:
104
- if sources:
105
- logging.info(f"Defining filter for sources: {sources}")
106
- conditions.append(
107
- rest.FieldCondition(
108
- key="metadata.source",
109
- match=rest.MatchValue(value=sources)
110
- )
111
- )
112
-
113
- if subtype:
114
- logging.info(f"Defining filter for subtype: {subtype}")
115
- conditions.append(
116
- rest.FieldCondition(
117
- key="metadata.subtype",
118
- match=rest.MatchValue(value=subtype)
119
  )
120
  )
121
-
122
- if year and len(year) > 0:
123
- logging.info(f"Defining filter for years: {year}")
124
- conditions.append(
125
- rest.FieldCondition(
126
- key="metadata.year",
127
- match=rest.MatchAny(any=year)
128
- )
129
  )
130
-
131
- if conditions:
132
- return rest.Filter(must=conditions)
133
- return None
134
 
135
  def rerank_documents(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
136
  """
@@ -193,10 +167,8 @@ def rerank_documents(query: str, documents: List[Dict[str, Any]]) -> List[Dict[s
193
  def get_context(
194
  vectorstore: VectorStoreInterface,
195
  query: str,
196
- reports: List[str] = None,
197
- sources: str = None,
198
- subtype: str = None,
199
- year: List[str] = None
200
  ) -> List[Dict[str, Any]]:
201
  """
202
  Retrieve semantically similar documents from the vector database with optional reranking.
@@ -231,12 +203,12 @@ def get_context(
231
  # with_payload=True)
232
  # filter support for QdrantVectorStore
233
  #if isinstance(vectorstore, QdrantVectorStore):
234
- # filter_obj = create_filter(reports, sources, subtype, year)
235
- # if filter_obj:
236
- # search_kwargs["filter"] = filter_obj
237
 
238
  # Perform initial retrieval
239
- retrieved_docs = vectorstore.search(query, top_k)
240
 
241
  logging.info(f"Retrieved {len(retrieved_docs)} documents for query: {query[:50]}...")
242
 
 
70
  return vectorstore
71
 
72
  def create_filter(
73
+ filter_metadata:list[Dict] = None,
 
 
 
74
  ) -> Optional[rest.Filter]:
75
  """
76
  Create a Qdrant filter based on metadata criteria.
 
84
  Returns:
85
  Qdrant Filter object or None if no filters specified
86
  """
87
+ if filter_metadata == None:
88
  return None
89
 
90
  conditions = []
91
+ logging.info(f"Defining filters for {filter_metadata}")
92
+ for condition in filter_metadata:
93
+ for key, val in condition:
94
+ if isinstance(val, str):
95
+ conditions.append(rest.FieldCondition(
96
+ key=f"metadata.{key}",
97
+ match=rest.MatchValue(value=val)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
  )
100
+ else:
101
+ conditions.append(
102
+ rest.FieldCondition(
103
+ key=f"metadata.{key}",
104
+ match=rest.MatchAny(any=val)
 
 
 
105
  )
106
+ return conditions
107
+
 
 
108
 
109
  def rerank_documents(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
110
  """
 
167
  def get_context(
168
  vectorstore: VectorStoreInterface,
169
  query: str,
170
+ collection_name: str = None,
171
+ filter_metadata = None,
 
 
172
  ) -> List[Dict[str, Any]]:
173
  """
174
  Retrieve semantically similar documents from the vector database with optional reranking.
 
203
  # with_payload=True)
204
  # filter support for QdrantVectorStore
205
  #if isinstance(vectorstore, QdrantVectorStore):
206
+ filter_obj = create_filter(filter_metadata)
207
+ if filter_obj:
208
+ search_kwargs["filter"] = filter_obj
209
 
210
  # Perform initial retrieval
211
+ retrieved_docs = vectorstore.search(query, top_k, **search_kwargs)
212
 
213
  logging.info(f"Retrieved {len(retrieved_docs)} documents for query: {query[:50]}...")
214