Spaces:

mwitiderrick
/

medicalchatbot

Sleeping

App Files Files Community

mwitiderrick commited on Jun 12

Commit

875c838

verified ·

1 Parent(s): 38fec79

Update rag_dspy.py

Browse files

Files changed (1) hide show

rag_dspy.py +18 -5

rag_dspy.py CHANGED Viewed

@@ -3,6 +3,8 @@
 import dspy
 from dspy_qdrant import QdrantRM
 from qdrant_client import QdrantClient, models
 from dotenv import load_dotenv
 import os
@@ -10,7 +12,7 @@ load_dotenv()
 # DSPy setup
 lm = dspy.LM("gpt-4", max_tokens=512,api_key=os.environ.get("OPENAI_API_KEY"))
 client = QdrantClient(url=os.environ.get("QDRANT_CLOUD_URL"), api_key=os.environ.get("QDRANT_API_KEY"))
-collection_name = "medical_chat_bot"
 rm = QdrantRM(
     qdrant_collection_name=collection_name,
     qdrant_client=client,
@@ -22,7 +24,7 @@ dspy.settings.configure(lm=lm, rm=rm)
 # Manual reranker using ColBERT multivector field
 # Manual reranker using Qdrant’s native prefetch + ColBERT query
-def rerank_with_colbert(query_text):
     from fastembed import TextEmbedding, LateInteractionTextEmbedding
     # Encode query once with both models
@@ -42,7 +44,14 @@ def rerank_with_colbert(query_text):
         query=colbert_query,
         using="colbert",
         limit=5,
-        with_payload=True
     )
     points = results.points
@@ -56,6 +65,8 @@ def rerank_with_colbert(query_text):
 # DSPy Signature and Module
 class MedicalAnswer(dspy.Signature):
     question = dspy.InputField(desc="The medical question to answer")
     context = dspy.OutputField(desc="The answer to the medical question")
     final_answer = dspy.OutputField(desc="The answer to the medical question")
@@ -63,12 +74,14 @@ class MedicalRAG(dspy.Module):
     def __init__(self):
         super().__init__()
-    def forward(self, question):
-        reranked_docs = rerank_with_colbert(question)
         context_str = "\n".join(reranked_docs)
         return dspy.ChainOfThought(MedicalAnswer)(
             question=question,
             context=context_str
         )

 import dspy
 from dspy_qdrant import QdrantRM
 from qdrant_client import QdrantClient, models
+from qdrant_client.models import Filter, FieldCondition, MatchValue
 from dotenv import load_dotenv
 import os
 # DSPy setup
 lm = dspy.LM("gpt-4", max_tokens=512,api_key=os.environ.get("OPENAI_API_KEY"))
 client = QdrantClient(url=os.environ.get("QDRANT_CLOUD_URL"), api_key=os.environ.get("QDRANT_API_KEY"))
+collection_name = "indexed_medical_chat_bot"
 rm = QdrantRM(
     qdrant_collection_name=collection_name,
     qdrant_client=client,
 # Manual reranker using ColBERT multivector field
 # Manual reranker using Qdrant’s native prefetch + ColBERT query
+def rerank_with_colbert(query_text, year, specialty):
     from fastembed import TextEmbedding, LateInteractionTextEmbedding
     # Encode query once with both models
         query=colbert_query,
         using="colbert",
         limit=5,
+        with_payload=True,
+        query_filter=Filter(
+            must=[
+                FieldCondition(key="specialty", match=MatchValue(value=specialty)),
+                FieldCondition(key="year", match=MatchValue(value=year))
+                ]
+        )
     )
     points = results.points
 # DSPy Signature and Module
 class MedicalAnswer(dspy.Signature):
     question = dspy.InputField(desc="The medical question to answer")
+    year = dspy.InputField(desc="The year of the medical paper")
+    specialty = dspy.InputField(desc="The specialty of the medical paper")
     context = dspy.OutputField(desc="The answer to the medical question")
     final_answer = dspy.OutputField(desc="The answer to the medical question")
     def __init__(self):
         super().__init__()
+    def forward(self, question, year, specialty):
+        reranked_docs = rerank_with_colbert(question, year, specialty)
         context_str = "\n".join(reranked_docs)
         return dspy.ChainOfThought(MedicalAnswer)(
             question=question,
+            year=year,
+            specialty=specialty,
             context=context_str
         )