Spaces:

mwitiderrick
/

MedicalQA

Runtime error

App Files Files Community

mwitiderrick commited on Jun 11

Commit

9de97e7

verified ·

1 Parent(s): fbf94f8

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +23 -0
app.py +17 -0
chainlit.md +43 -0
rag_dspy.py +74 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Hugging Face Spaces Chainlit template
+FROM python:3.11-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git curl build-essential && \
+    rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Install Python deps
+COPY requirements.txt .
+RUN pip install --upgrade pip && pip install -r requirements.txt
+# Copy source
+COPY . .
+# Expose Chainlit port
+EXPOSE 7860
+# Launch Chainlit app
+CMD ["chainlit", "run", "app.py", "-h", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+import chainlit as cl
+from dotenv import load_dotenv
+from rag_dspy import MedicalAnswer, rerank_with_colbert, MedicalRAG
+import dspy
+from dspy_qdrant import QdrantRM
+from qdrant_client import QdrantClient
+# Configure Chainlit
+rag_chain = MedicalRAG()
+@cl.on_message
+async def main(message: cl.Message):
+    user_question = message.content
+    result = rag_chain.forward(user_question)
+    result = result.final_answer
+    await cl.Message(content=result).send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,43 @@

+#  Medical QA Chatbot
+This is a Chain-of-Thought powered medical chatbot that:
+- Retrieves answers from a Qdrant Cloud vector DB using dense + ColBERT multivectors
+- Uses Stanford DSPy to reason step-by-step with retrieved context
+- Supports traceable source highlighting in Chainlit
+- Deployable on Hugging Face Spaces via Docker
+---
+##  How to Deploy
+- Add your `OPENAI_API_KEY` as a secret environment variable in Hugging Face Space settings
+- Make sure `qdrant-client` points to your Qdrant Cloud instance in `rag_dspy.py`
+- Run the Space
+## Sample Questions
+### General Medical Knowledge
+- What are the most common symptoms of lupus?
+- How is type 2 diabetes usually managed in adults?
+- What is the difference between viral and bacterial pneumonia?
+### Treatment & Medication
+- What are the first-line medications for treating hypertension?
+- How does metformin work to lower blood sugar?
+### Diagnosis & Tests
+- What diagnostic tests are used to detect rheumatoid arthritis?
+- When is a colonoscopy recommended for cancer screening?
+### Hospital & Patient Care
+- What are the psychosocial challenges faced by cancer patients?
+- How do hospitals manage patients with multidrug-resistant infections?
+### Clinical Guidelines / Rare Topics
+- What is the recommended treatment for acute myocardial infarction in elderly patients?

rag_dspy.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# rag_dspy.py
+import dspy
+from dspy_qdrant import QdrantRM
+from qdrant_client import QdrantClient, models
+from dotenv import load_dotenv
+import os
+load_dotenv()
+# DSPy setup
+lm = dspy.LM("gpt-4", max_tokens=512,api_key=os.environ.get("OPENAI_API_KEY"))
+client = QdrantClient(url=os.environ.get("QDRANT_CLOUD_URL"), api_key=os.environ.get("QDRANT_API_KEY"))
+collection_name = "medical_chat_bot"
+rm = QdrantRM(
+    qdrant_collection_name=collection_name,
+    qdrant_client=client,
+    vector_name="dense",                 # <-- MATCHES your vector field in upsert
+    document_field="passage_text",        # <-- MATCHES your payload field in upsert
+    k=20)
+dspy.settings.configure(lm=lm, rm=rm)
+# Manual reranker using ColBERT multivector field
+# Manual reranker using Qdrant’s native prefetch + ColBERT query
+def rerank_with_colbert(query_text):
+    from fastembed import TextEmbedding, LateInteractionTextEmbedding
+    # Encode query once with both models
+    dense_model = TextEmbedding("BAAI/bge-small-en")
+    colbert_model = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")
+    dense_query = list(dense_model.embed(query_text))[0]
+    colbert_query = list(colbert_model.embed(query_text))[0]
+    # Combined query: retrieve with dense, rerank with ColBERT
+    results = client.query_points(
+        collection_name=collection_name,
+        prefetch=models.Prefetch(
+            query=dense_query,
+            using="dense"
+        ),
+        query=colbert_query,
+        using="colbert",
+        limit=5,
+        with_payload=True
+    )
+    points = results.points
+    docs = []
+    for point in points:
+        docs.append(point.payload['passage_text'])
+    return docs
+# DSPy Signature and Module
+class MedicalAnswer(dspy.Signature):
+    question = dspy.InputField(desc="The medical question to answer")
+    context = dspy.OutputField(desc="The answer to the medical question")
+    final_answer = dspy.OutputField(desc="The answer to the medical question")
+class MedicalRAG(dspy.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, question):
+        reranked_docs = rerank_with_colbert(question)
+        context_str = "\n".join(reranked_docs)
+        return dspy.ChainOfThought(MedicalAnswer)(
+            question=question,
+            context=context_str
+        )

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+datasets==3.6.0
+chainlit
+git+https://github.com/stanfordnlp/dspy.git
+python-dotenv==1.1.0
+cachetools
+cloudpickle
+qdrant-client[fastembed]>=1.14.2
+dspy-qdrant