Spaces:

baderanas
/

rag-medical

Running

App Files Files Community

baderanas commited on 25 days ago

Commit

deca715

verified ·

1 Parent(s): ffc324a

Update chroma_operations/retrieve.py

Browse files

Files changed (1) hide show

chroma_operations/retrieve.py +49 -49

chroma_operations/retrieve.py CHANGED Viewed

@@ -1,49 +1,49 @@
-import os
-import logging
-from typing import List, Optional
-import chromadb
-from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-def search_similar_chunks(
-    query_text: str,
-    document_name: str,
-    collection_name: str = "rag_collection",
-    top_k: int = 5,
-):
-    """Search for top-k chunks similar to query_text within a specific document (source_file)."""
-    try:
-        # Initialize embedding function and Chroma client
-        embedding_function = OpenAIEmbeddingFunction(
-            api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
-        )
-        client = chromadb.PersistentClient(path="./chroma_db")
-        # Load the collection
-        collection = client.get_collection(
-            name=collection_name, embedding_function=embedding_function
-        )
-        # Query similar documents filtered by document_name
-        results = collection.query(
-            query_texts=[query_text],
-            n_results=top_k,
-            where={"source_file": document_name},
-        )
-        documents = results.get("documents", [[]])[0]
-        metadatas = results.get("metadatas", [[]])[0]
-        return documents
-    except Exception as e:
-        logger.error(f"Similarity search failed: {str(e)}")
-        return []

+import os
+import logging
+from typing import List, Optional
+import chromadb
+from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def search_similar_chunks(
+    query_text: str,
+    document_names: List[str],
+    collection_name: str = "rag_collection",
+    top_k: int = 5,
+):
+    """Search for top-k chunks similar to query_text within a specific document (source_file)."""
+    try:
+        # Initialize embedding function and Chroma client
+        embedding_function = OpenAIEmbeddingFunction(
+            api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
+        )
+        client = chromadb.PersistentClient(path="./chroma_db")
+        # Load the collection
+        collection = client.get_collection(
+            name=collection_name, embedding_function=embedding_function
+        )
+        # Query similar documents filtered by document_name
+        results = collection.query(
+            query_texts=[query_text],
+            n_results=top_k,
+            where={"source_file": {"$in": document_names}},
+        )
+        documents = results.get("documents", [[]])[0]
+        metadatas = results.get("metadatas", [[]])[0]
+        return documents
+    except Exception as e:
+        logger.error(f"Similarity search failed: {str(e)}")
+        return []