Spaces:

GIZ
/

chatfed_retriever0.3

Running on T4

App Files Files Community

ppsingh commited on 6 days ago

Commit

137c471

verified ·

1 Parent(s): e7709a4

Upload 6 files

Browse files

Files changed (6) hide show

app.py +121 -0
params.cfg +26 -0
requirements.txt +121 -0
utils/retriever.py +257 -0
utils/utils.py +107 -0
utils/vectorstore_interface.py +171 -0

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import gradio as gr
+import sys
+from utils.retriever import get_context, get_vectorstore
+# Initialize vector store at startup
+print("Initializing vector store connection...", flush=True)
+try:
+    vectorstore = get_vectorstore()
+    print("Vector store connection initialized successfully", flush=True)
+except Exception as e:
+    print(f"Failed to initialize vector store: {e}", flush=True)
+    raise
+# ---------------------------------------------------------------------
+# MCP - returns raw dictionary format
+# ---------------------------------------------------------------------
+def retrieve(
+    query: str,
+    reports_filter: str = "",
+    sources_filter: str = "",
+    subtype_filter: str = "",
+    year_filter: str = ""
+) -> list:
+    """
+    Retrieve semantically similar documents from the vector database for MCP clients.
+    Args:
+        query (str): The search query text
+        reports_filter (str): Comma-separated list of specific report filenames (optional)
+        sources_filter (str): Filter by document source type (optional)
+        subtype_filter (str): Filter by document subtype (optional)
+        year_filter (str): Comma-separated list of years to filter by (optional)
+    Returns:
+        list: List of dictionaries containing document content, metadata, and scores
+    """
+    # Parse filter inputs (convert empty strings to None or lists)
+    reports = [r.strip() for r in reports_filter.split(",") if r.strip()] if reports_filter else []
+    sources = sources_filter.strip() if sources_filter else None
+    subtype = subtype_filter.strip() if subtype_filter else None
+    year = [y.strip() for y in year_filter.split(",") if y.strip()] if year_filter else None
+    # Call retriever function and return raw results
+    results = get_context(
+        vectorstore=vectorstore,
+        query=query,
+        reports=reports,
+        sources=sources,
+        subtype=subtype,
+        year=year
+    )
+    return results
+# Create the Gradio interface with Blocks to support both UI and MCP
+with gr.Blocks() as ui:
+    gr.Markdown("# ChatFed Retrieval/Reranker Module")
+    gr.Markdown("Retrieves semantically similar documents from vector database and reranks. Intended for use in RAG pipelines as an MCP server with other ChatFed modules.")
+    with gr.Row():
+        with gr.Column():
+            query_input = gr.Textbox(
+                label="Query",
+                lines=2,
+                placeholder="Enter your search query here",
+                info="The query to search for in the vector database"
+            )
+            reports_input = gr.Textbox(
+                label="Reports Filter (optional)",
+                lines=1,
+                placeholder="report1.pdf, report2.pdf",
+                info="Comma-separated list of specific report filenames to search within (leave empty for all)"
+            )
+            sources_input = gr.Textbox(
+                label="Sources Filter (optional)",
+                lines=1,
+                placeholder="annual_report",
+                info="Filter by document source type (leave empty for all)"
+            )
+            subtype_input = gr.Textbox(
+                label="Subtype Filter (optional)",
+                lines=1,
+                placeholder="financial",
+                info="Filter by document subtype (leave empty for all)"
+            )
+            year_input = gr.Textbox(
+                label="Year Filter (optional)",
+                lines=1,
+                placeholder="2023, 2024",
+                info="Comma-separated list of years to filter by (leave empty for all)"
+            )
+            submit_btn = gr.Button("Submit", variant="primary")
+        # Output needs to be in json format to be added as tool in HuggingChat
+        with gr.Column():
+            output = gr.Text(
+                label="Retrieved Context",
+                lines=10,
+                show_copy_button=True
+            )
+    # UI event handler
+    submit_btn.click(
+        fn=retrieve,
+        inputs=[query_input, reports_input, sources_input, subtype_input, year_input],
+        outputs=output,
+        api_name="retrieve"
+    )
+# Launch with MCP server enabled
+if __name__ == "__main__":
+    ui.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        #mcp_server=True,
+        show_error=True
+    )

params.cfg ADDED Viewed

	@@ -0,0 +1,26 @@

+[vectorstore]
+# huggingface_spaces usage:
+# PROVIDER = huggingface
+# URL = GIZ/audit_data
+# COLLECTION_NAME = docling
+# direct Qdrant usage:
+PROVIDER = qdrant
+URL = giz-chatfed-qdrantserver.hf.space
+COLLECTION_NAME = EUDR
+[embeddings]
+MODEL_NAME = BAAI/bge-m3
+# DEVICE = cpu
+[retriever]
+TOP_K = 10
+SCORE_THRESHOLD = 0.6
+[reranker]
+MODEL_NAME = cross-encoder/ms-marco-MiniLM-L-6-v2
+TOP_K = 5
+ENABLED = false
+# use this to scale out the total docs retrieved prior to reranking (i.e. retriever top_k * TOP_K_SCALE_FACTOR)
+TOP_K_SCALE_FACTOR = 2

requirements.txt ADDED Viewed

	@@ -0,0 +1,121 @@

+aiofiles==23.2.1
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+certifi==2025.7.14
+charset-normalizer==3.4.2
+click==8.2.1
+contourpy==1.3.2
+cycler==0.12.1
+dataclasses-json==0.6.7
+fastapi==0.116.1
+ffmpy==0.6.1
+filelock==3.18.0
+fonttools==4.59.0
+frozenlist==1.7.0
+fsspec==2025.7.0
+gradio==4.44.1
+gradio_client==1.3.0
+greenlet==3.2.3
+grpcio==1.74.0
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.1
+huggingface-hub==0.34.0
+hyperframe==6.1.0
+idna==3.10
+importlib_resources==6.5.2
+Jinja2==3.1.6
+joblib==1.5.1
+jsonpatch==1.33
+jsonpointer==3.0.0
+kiwisolver==1.4.8
+langchain==0.3.26
+langchain-community==0.3.27
+langchain-core==0.3.71
+langchain-text-splitters==0.3.8
+langsmith==0.4.8
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.26.1
+matplotlib==3.10.3
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+mypy_extensions==1.1.0
+networkx==3.5
+numpy==2.3.2
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+orjson==3.11.0
+packaging==25.0
+pandas==2.3.1
+pillow==10.4.0
+portalocker==3.2.0
+propcache==0.3.2
+protobuf==6.31.1
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+qdrant-client==1.15.0
+regex==2024.11.6
+requests==2.32.4
+requests-toolbelt==1.0.0
+rich==14.1.0
+ruff==0.12.5
+safetensors==0.5.3
+scikit-learn==1.7.1
+scipy==1.16.0
+semantic-version==2.10.0
+sentence-transformers==5.0.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+SQLAlchemy==2.0.41
+starlette==0.47.2
+sympy==1.14.0
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tokenizers==0.21.2
+tomlkit==0.12.0
+torch==2.7.1
+tqdm==4.67.1
+transformers==4.53.3
+triton==3.3.1
+typer==0.16.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.14.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+websockets==12.0
+yarl==1.20.1
+zstandard==0.23.0

utils/retriever.py ADDED Viewed

	@@ -0,0 +1,257 @@

+from typing import List, Dict, Any, Optional
+from qdrant_client.http import models as rest
+from langchain.schema import Document
+from langchain_community.cross_encoders import HuggingFaceCrossEncoder
+from langchain.retrievers.document_compressors import CrossEncoderReranker
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer('BAAI/bge-m3')
+import logging
+import os
+from .utils import getconfig
+from .vectorstore_interface import create_vectorstore, VectorStoreInterface, QdrantVectorStore
+import sys
+# Configure logging to be more verbose
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+# Load configuration
+config = getconfig("params.cfg")
+# Retriever settings from config
+RETRIEVER_TOP_K = int(config.get("retriever", "TOP_K"))
+SCORE_THRESHOLD = float(config.get("retriever", "SCORE_THRESHOLD"))
+# Reranker settings from config
+RERANKER_ENABLED = config.getboolean("reranker", "ENABLED", fallback=False)
+RERANKER_MODEL = config.get("reranker", "MODEL_NAME", fallback="cross-encoder/ms-marco-MiniLM-L-6-v2")
+RERANKER_TOP_K = int(config.get("reranker", "TOP_K", fallback=5))
+RERANKER_TOP_K_SCALE_FACTOR = int(config.get("reranker", "TOP_K_SCALE_FACTOR", fallback=2))
+# Initialize reranker if enabled
+reranker = None
+if RERANKER_ENABLED:
+    try:
+        print(f"Starting reranker initialization with model: {RERANKER_MODEL}", flush=True)
+        logging.info(f"Initializing reranker with model: {RERANKER_MODEL}")
+        print("Loading HuggingFace cross encoder model", flush=True)
+        # HuggingFaceCrossEncoder doesn't accept cache_dir parameter
+        # The underlying models will use default cache locations
+        cross_encoder_model = HuggingFaceCrossEncoder(model_name=RERANKER_MODEL)
+        print("Cross encoder model loaded successfully", flush=True)
+        print("Creating CrossEncoderReranker...", flush=True)
+        reranker = CrossEncoderReranker(model=cross_encoder_model, top_n=RERANKER_TOP_K)
+        print("Reranker initialized successfully", flush=True)
+        logging.info("Reranker initialized successfully")
+    except Exception as e:
+        print(f"Failed to initialize reranker: {str(e)}", flush=True)
+        logging.error(f"Failed to initialize reranker: {str(e)}")
+        reranker = None
+else:
+    print("Reranker is disabled", flush=True)
+def get_vectorstore() -> VectorStoreInterface:
+    """
+    Create and return a vector store connection.
+    Returns:
+        VectorStoreInterface instance
+    """
+    logging.info("Initializing vector store connection...")
+    vectorstore = create_vectorstore(config)
+    logging.info("Vector store connection initialized successfully")
+    return vectorstore
+def create_filter(
+    reports: List[str] = None,
+    sources: str = None,
+    subtype: str = None,
+    year: List[str] = None
+) -> Optional[rest.Filter]:
+    """
+    Create a Qdrant filter based on metadata criteria.
+    Args:
+        reports: List of specific report filenames to filter by
+        sources: Source type to filter by
+        subtype: Document subtype to filter by
+        year: List of years to filter by
+    Returns:
+        Qdrant Filter object or None if no filters specified
+    """
+    if not any([reports, sources, subtype, year]):
+        return None
+    conditions = []
+    if reports and len(reports) > 0:
+        logging.info(f"Defining filter for reports: {reports}")
+        conditions.append(
+            rest.FieldCondition(
+                key="metadata.filename",
+                match=rest.MatchAny(any=reports)
+            )
+        )
+    else:
+        if sources:
+            logging.info(f"Defining filter for sources: {sources}")
+            conditions.append(
+                rest.FieldCondition(
+                    key="metadata.source",
+                    match=rest.MatchValue(value=sources)
+                )
+            )
+        if subtype:
+            logging.info(f"Defining filter for subtype: {subtype}")
+            conditions.append(
+                rest.FieldCondition(
+                    key="metadata.subtype",
+                    match=rest.MatchValue(value=subtype)
+                )
+            )
+        if year and len(year) > 0:
+            logging.info(f"Defining filter for years: {year}")
+            conditions.append(
+                rest.FieldCondition(
+                    key="metadata.year",
+                    match=rest.MatchAny(any=year)
+                )
+            )
+    if conditions:
+        return rest.Filter(must=conditions)
+    return None
+def rerank_documents(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Rerank documents using cross-encoder (specify in params.cfg)
+    Args:
+        query: The search query
+        documents: List of documents to rerank
+    Returns:
+        Reranked list of documents in original format
+    """
+    if not reranker or not documents:
+        return documents
+    try:
+        logging.info(f"Starting reranking of {len(documents)} documents")
+        # Convert to LangChain Document format using correct keys (need to review this later for portability)
+        langchain_docs = []
+        for doc in documents:
+            # Use correct keys from the data storage test module
+            content = doc.get('answer', '')
+            metadata = doc.get('answer_metadata', {})
+            if not content:
+                logging.warning(f"Document missing content: {doc}")
+                continue
+            langchain_doc = Document(
+                page_content=content,
+                metadata=metadata
+            )
+            langchain_docs.append(langchain_doc)
+        if not langchain_docs:
+            logging.warning("No valid documents found for reranking")
+            return documents
+        # Rerank documents
+        logging.info(f"Reranking {len(langchain_docs)} documents")
+        reranked_docs = reranker.compress_documents(langchain_docs, query)
+        # Convert back to original format
+        result = []
+        for doc in reranked_docs:
+            result.append({
+                'answer': doc.page_content,
+                'answer_metadata': doc.metadata,
+            })
+        logging.info(f"Successfully reranked {len(documents)} documents to top {len(result)}")
+        return result
+    except Exception as e:
+        logging.error(f"Error during reranking: {str(e)}")
+        # Return original documents if reranking fails
+        return documents
+def get_context(
+    vectorstore: VectorStoreInterface,
+    query: str,
+    reports: List[str] = None,
+    sources: str = None,
+    subtype: str = None,
+    year: List[str] = None
+) -> List[Dict[str, Any]]:
+    """
+    Retrieve semantically similar documents from the vector database with optional reranking.
+    Args:
+        vectorstore: The vector store interface to search
+        query: The search query
+        reports: List of specific report filenames to search within
+        sources: Source type to filter by
+        subtype: Document subtype to filter by
+        year: List of years to filter by
+    Returns:
+        List of dictionaries with 'answer', 'answer_metadata', and 'score' keys
+    """
+    try:
+        # Use a higher k for initial retrieval if reranking is enabled (more candidates docs)
+        top_k = RETRIEVER_TOP_K
+        if RERANKER_ENABLED and reranker:
+            top_k = top_k * RERANKER_TOP_K_SCALE_FACTOR
+            logging.info(f"Reranking enabled, retrieving {top_k} candidates")
+        search_kwargs = {
+            "model_name": config.get("embeddings", "MODEL_NAME")
+        }
+        #model = SentenceTransformer(config.get("embeddings", "MODEL_NAME"))
+        #query_vector = model.encode(query).tolist()
+        #retrieved_docs = vectorstore.search(
+        ##              collection_name="EUDR",
+        #              query_vector=query_vector,
+        #              limit=top_k,
+        #              with_payload=True)
+        # filter support for QdrantVectorStore
+        #if isinstance(vectorstore, QdrantVectorStore):
+        #    filter_obj = create_filter(reports, sources, subtype, year)
+        #    if filter_obj:
+        #        search_kwargs["filter"] = filter_obj
+        # Perform initial retrieval
+        retrieved_docs = vectorstore.search(query, top_k)
+        logging.info(f"Retrieved {len(retrieved_docs)} documents for query: {query[:50]}...")
+        # Apply reranking if enabled
+        if RERANKER_ENABLED and reranker and retrieved_docs:
+            logging.info("Applying reranking...")
+            retrieved_docs = rerank_documents(query, retrieved_docs)
+            # Trim to final desired k
+            retrieved_docs = retrieved_docs[:RERANKER_TOP_K]
+        logging.info(f"Returning {len(retrieved_docs)} final documents")
+        logging.info(f"Retrieved results: {retrieved_docs}")
+        return retrieved_docs
+    except Exception as e:
+        logging.error(f"Error during retrieval: {str(e)}")
+        raise e

utils/utils.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import configparser
+import logging
+import os
+import ast
+import re
+from dotenv import load_dotenv
+# Local .env file
+load_dotenv()
+def getconfig(configfile_path: str):
+    """
+    Read the config file
+    Params
+    ----------------
+    configfile_path: file path of .cfg file
+    """
+    config = configparser.ConfigParser()
+    try:
+        config.read_file(open(configfile_path))
+        return config
+    except:
+        logging.warning("config file not found")
+def get_auth(provider: str) -> dict:
+    """Get authentication configuration for different providers"""
+    auth_configs = {
+        "huggingface": {"api_key": os.getenv("HF_TOKEN")},
+        "qdrant": {"api_key": os.getenv("QDRANT_API_KEY")},
+    }
+    provider = provider.lower()  # Normalize to lowercase
+    if provider not in auth_configs:
+        raise ValueError(f"Unsupported provider: {provider}")
+    auth_config = auth_configs[provider]
+    api_key = auth_config.get("api_key")
+    if not api_key:
+        logging.warning(f"No API key found for provider '{provider}'. Please set the appropriate environment variable.")
+        auth_config["api_key"] = None
+    return auth_config
+def process_content(content: str) -> str:
+    """
+    Process and clean malformed content that may contain stringified nested lists.
+    The test DB on qdrant somehow got a bit malformed in the processing - but probably good to have this anyway
+    Args:
+        content: Raw content from vector store
+    Returns:
+        Cleaned, readable text content
+    """
+    if not content:
+        return content
+    # Check if content looks like a stringified list/nested structure
+    content_stripped = content.strip()
+    if content_stripped.startswith('[') and content_stripped.endswith(']'):
+        try:
+            # Parse as literal list structure
+            parsed_content = ast.literal_eval(content_stripped)
+            if isinstance(parsed_content, list):
+                # Flatten nested lists and extract meaningful text
+                def extract_text_from_nested(obj):
+                    if isinstance(obj, list):
+                        text_items = []
+                        for item in obj:
+                            extracted = extract_text_from_nested(item)
+                            if extracted and extracted.strip():
+                                text_items.append(extracted)
+                        return ' '.join(text_items)
+                    elif isinstance(obj, str) and obj.strip():
+                        return obj.strip()
+                    elif isinstance(obj, dict):
+                        # Handle dict structures if present
+                        text_items = []
+                        for key, value in obj.items():
+                            if isinstance(value, str) and value.strip():
+                                text_items.append(f"{key}: {value}")
+                        return ' '.join(text_items)
+                    else:
+                        return ''
+                extracted_text = extract_text_from_nested(parsed_content)
+                if extracted_text and len(extracted_text.strip()) > 0:
+                    # Clean up extra whitespace and format nicely
+                    cleaned_text = re.sub(r'\s+', ' ', extracted_text).strip()
+                    logging.debug(f"Successfully processed nested list content: {len(cleaned_text)} chars")
+                    return cleaned_text
+                else:
+                    logging.warning("Parsed list content but no meaningful text found")
+                    return content  # Return original if no meaningful text extracted
+        except (ValueError, SyntaxError) as e:
+            logging.debug(f"Content looks like list but failed to parse: {e}")
+            # Fall through to return original content
+    # For regular text content, just clean up whitespace
+    return re.sub(r'\s+', ' ', content).strip()

utils/vectorstore_interface.py ADDED Viewed

	@@ -0,0 +1,171 @@

+from abc import ABC, abstractmethod
+from typing import List, Dict, Any, Optional
+from gradio_client import Client
+import logging
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from .utils import get_auth, process_content
+load_dotenv()
+class VectorStoreInterface(ABC):
+    """Abstract interface for different vector store implementations."""
+    @abstractmethod
+    def search(self, query: str, top_k: int, **kwargs) -> List[Dict[str, Any]]:
+        """Search for similar documents."""
+        pass
+class HuggingFaceSpacesVectorStore(VectorStoreInterface):
+    """Vector store implementation for Hugging Face Spaces with MCP endpoints."""
+    def __init__(self, url: str, collection_name: str, api_key: Optional[str] = None):
+        repo_id = url
+        logging.info(f"Connecting to Hugging Face Space: {repo_id}")
+        if api_key:
+            self.client = Client(repo_id, hf_token=api_key)
+        else:
+            self.client = Client(repo_id)
+        self.collection_name = collection_name
+    def search(self, query: str, top_k: int, **kwargs) -> List[Dict[str, Any]]:
+        """Search using Hugging Face Spaces MCP API."""
+        try:
+            # Use the /search_text endpoint as documented in the API
+            result = self.client.predict(
+                query=query,
+                collection_name=self.collection_name,
+                model_name=kwargs.get('model_name'),
+                top_k=top_k,
+                api_name="/search_text"
+            )
+            logging.info(f"Successfully retrieved {len(result) if result else 0} documents")
+            return result
+        except Exception as e:
+            logging.error(f"Error searching Hugging Face Spaces: {str(e)}")
+            raise e
+class QdrantVectorStore(VectorStoreInterface):
+    """Vector store implementation for direct Qdrant connection."""
+    def __init__(self, url: str, collection_name: str, api_key: Optional[str] = None):
+        from qdrant_client import QdrantClient
+        from sentence_transformers import SentenceTransformer
+        self.client = QdrantClient(host = url,
+                                # very important that port to be used for python client
+                                port=443,
+                                https=True,
+                                # api_key = QDRANT_API_KEY_READ,
+                                ## this is for write access
+                                api_key = api_key,
+                                timeout=120,)
+        #self.client = QdrantClient(
+        #    url=url,  # Use url parameter which handles full URLs with protocol
+        #    api_key=api_key
+        #)
+        self.collection_name = collection_name
+        # Initialize embedding model as None - will be loaded on first use
+        self._embedding_model = None
+        self._current_model_name = None
+    def _get_embedding_model(self, model_name: str = None):
+        """Lazy load embedding model to avoid loading if not needed."""
+        if model_name is None:
+            model_name = "BAAI/bge-m3"  # Default from config
+        # Only reload if model name changed
+        if self._embedding_model is None or self._current_model_name != model_name:
+            logging.info(f"Loading embedding model: {model_name}")
+            from sentence_transformers import SentenceTransformer
+            cache_folder = Path(os.getenv("HF_HUB_CACHE", "/tmp/hf_cache"))
+            cache_folder.mkdir(parents=True, exist_ok=True)
+            self._embedding_model = SentenceTransformer(
+                model_name,
+                cache_folder=str(cache_folder)
+            )
+            # self._embedding_model = SentenceTransformer(model_name)
+            self._current_model_name = model_name
+            logging.info(f"Successfully loaded embedding model: {model_name}")
+        return self._embedding_model
+    def search(self, query: str, top_k: int, **kwargs) -> List[Dict[str, Any]]:
+        """Search using direct Qdrant connection."""
+        try:
+            # Get embedding model
+            model_name = kwargs.get('model_name')
+            embedding_model = self._get_embedding_model(model_name)
+            # Convert query to embedding
+            logging.info(f"Converting query to embedding using model: {self._current_model_name}")
+            query_embedding = embedding_model.encode(query).tolist()
+            # Get filter from kwargs if provided
+            filter_obj = kwargs.get('filter', None)
+            # Perform vector search
+            logging.info(f"Searching Qdrant collection '{self.collection_name}' for top {top_k} results")
+            search_result = self.client.search(
+                collection_name=self.collection_name,
+                query_vector=query_embedding,
+                query_filter=filter_obj,  # Add filter support
+                limit=top_k,
+                with_payload=True,
+                with_vectors=False
+            )
+            logging.info(search_result)
+            # Format results to match expected output format
+            results = []
+            for hit in search_result:
+                raw_content = hit.payload.get('text', '')
+                # Process content to handle malformed nested list structures
+                processed_content = process_content(raw_content)
+                result_dict = {
+                    'answer': processed_content,
+                    'answer_metadata': hit.payload.get('metadata', {}),
+                    'score': hit.score
+                }
+                results.append(result_dict)
+            logging.info(f"Successfully retrieved {len(results)} documents from Qdrant")
+            return results
+        except Exception as e:
+            logging.error(f"Error searching Qdrant: {str(e)}")
+            raise e
+def create_vectorstore(config: Any) -> VectorStoreInterface:
+    """Factory function to create appropriate vector store based on configuration."""
+    vectorstore_type = config.get("vectorstore", "PROVIDER")
+    # Get authentication config based on provider
+    auth_config = get_auth(vectorstore_type.lower())
+    if vectorstore_type.lower() == "huggingface":
+        url = config.get("vectorstore", "URL")
+        collection_name = config.get("vectorstore", "COLLECTION_NAME")
+        api_key = auth_config["api_key"]
+        return HuggingFaceSpacesVectorStore(url, collection_name, api_key)
+    elif vectorstore_type.lower() == "qdrant":
+        url = config.get("vectorstore", "URL")  # Use the full URL
+        collection_name = config.get("vectorstore", "COLLECTION_NAME")
+        api_key = auth_config["api_key"]
+        # Remove port parameter since it's included in the URL
+        return QdrantVectorStore(url, collection_name, api_key)
+    else:
+        raise ValueError(f"Unsupported vector store type: {vectorstore_type}")