Spaces:
Running
Running
Commit
·
d999c28
1
Parent(s):
a0c9251
Upd hybrid history continuity approach with sem-search + SLM verifier on recent sessions
Browse files- app.py +9 -7
- chat-history.md +263 -0
- memory.py +113 -2
app.py
CHANGED
@@ -233,24 +233,26 @@ class RAGMedicalChatbot:
|
|
233 |
## b. Diagnosis RAG from symptom query
|
234 |
diagnosis_guides = retrieve_diagnosis_from_symptoms(user_query) # smart matcher
|
235 |
|
236 |
-
# 2.
|
237 |
-
|
238 |
|
239 |
# 3. Build prompt parts
|
240 |
parts = ["You are a medical chatbot, designed to answer medical questions."]
|
241 |
parts.append("Please format your answer using MarkDown.")
|
242 |
parts.append("**Bold for titles**, *italic for emphasis*, and clear headings.")
|
243 |
-
|
|
|
244 |
if image_diagnosis:
|
245 |
parts.append(
|
246 |
"A user medical image is diagnosed by our VLM agent:\n"
|
247 |
f"{image_diagnosis}\n\n"
|
248 |
"➡️ Please incorporate the above findings in your response if medically relevant.\n\n"
|
249 |
)
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
|
|
254 |
if knowledge_base:
|
255 |
parts.append(f"Example Q&A medical scenario knowledge-base: {knowledge_base}")
|
256 |
# Symptom-Diagnosis prediction RAG
|
|
|
233 |
## b. Diagnosis RAG from symptom query
|
234 |
diagnosis_guides = retrieve_diagnosis_from_symptoms(user_query) # smart matcher
|
235 |
|
236 |
+
# 2. Hybrid Context Retrieval: RAG + Recent History + Intelligent Selection
|
237 |
+
contextual_chunks = memory.get_contextual_chunks(user_id, user_query, lang)
|
238 |
|
239 |
# 3. Build prompt parts
|
240 |
parts = ["You are a medical chatbot, designed to answer medical questions."]
|
241 |
parts.append("Please format your answer using MarkDown.")
|
242 |
parts.append("**Bold for titles**, *italic for emphasis*, and clear headings.")
|
243 |
+
|
244 |
+
# 4. Append image diagnosis from VLM
|
245 |
if image_diagnosis:
|
246 |
parts.append(
|
247 |
"A user medical image is diagnosed by our VLM agent:\n"
|
248 |
f"{image_diagnosis}\n\n"
|
249 |
"➡️ Please incorporate the above findings in your response if medically relevant.\n\n"
|
250 |
)
|
251 |
+
|
252 |
+
# Append contextual chunks from hybrid approach
|
253 |
+
if contextual_chunks:
|
254 |
+
parts.append("Relevant context from conversation history:\n" + "\n".join(contextual_chunks))
|
255 |
+
# Load up guideline (RAG over medical knowledge base)
|
256 |
if knowledge_base:
|
257 |
parts.append(f"Example Q&A medical scenario knowledge-base: {knowledge_base}")
|
258 |
# Symptom-Diagnosis prediction RAG
|
chat-history.md
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 🔄 Hybrid Context Retrieval System
|
2 |
+
|
3 |
+
## Overview
|
4 |
+
|
5 |
+
The Medical Chatbot now implements a **hybrid context retrieval system** that combines **semantic search (RAG)** with **recent chat history** to provide more intelligent and contextually aware responses. This addresses the limitation of pure RAG systems that can miss conversational context like "What's the diagnosis again?" or "Can you clarify that?"
|
6 |
+
|
7 |
+
## 🏗️ Architecture
|
8 |
+
|
9 |
+
### Before (Pure RAG)
|
10 |
+
```
|
11 |
+
User Query → Semantic Search → FAISS Index → Relevant Chunks → LLM Response
|
12 |
+
```
|
13 |
+
|
14 |
+
### After (Hybrid Approach)
|
15 |
+
```
|
16 |
+
User Query → Hybrid Context Retrieval → Intelligent Context Selection → LLM Response
|
17 |
+
↓
|
18 |
+
┌─────────────────┬─────────────────┐
|
19 |
+
│ RAG Search │ Recent History │
|
20 |
+
│ (Semantic) │ (Conversational)│
|
21 |
+
└─────────────────┴─────────────────┘
|
22 |
+
↓
|
23 |
+
Gemini Flash Lite Contextual Analysis
|
24 |
+
↓
|
25 |
+
Selected Relevant Context
|
26 |
+
```
|
27 |
+
|
28 |
+
## 🔧 Key Components
|
29 |
+
|
30 |
+
### 1. Memory Manager (`memory.py`)
|
31 |
+
|
32 |
+
#### New Method: `get_recent_chat_history()`
|
33 |
+
```python
|
34 |
+
def get_recent_chat_history(self, user_id: str, num_turns: int = 3) -> List[Dict]:
|
35 |
+
"""
|
36 |
+
Get the most recent chat history with both user questions and bot responses.
|
37 |
+
Returns: [{"user": "question", "bot": "response", "timestamp": time}, ...]
|
38 |
+
"""
|
39 |
+
```
|
40 |
+
|
41 |
+
**Features:**
|
42 |
+
- Stores last 3 conversations by default
|
43 |
+
- Maintains chronological order
|
44 |
+
- Includes both user questions and bot responses
|
45 |
+
- Accessible for conversational continuity
|
46 |
+
|
47 |
+
#### Existing Method: `get_relevant_chunks()`
|
48 |
+
- Semantic search using FAISS
|
49 |
+
- Cosine similarity-based retrieval
|
50 |
+
- Smart deduplication and scoring
|
51 |
+
|
52 |
+
### 2. Chatbot Class (`app.py`)
|
53 |
+
|
54 |
+
#### New Method: `_get_contextual_chunks()`
|
55 |
+
```python
|
56 |
+
def _get_contextual_chunks(self, user_id: str, current_query: str,
|
57 |
+
recent_history: List[Dict], rag_chunks: List[str],
|
58 |
+
lang: str) -> List[str]:
|
59 |
+
```
|
60 |
+
|
61 |
+
**Purpose:**
|
62 |
+
- Analyzes current query against available context
|
63 |
+
- Uses Gemini Flash Lite for intelligent context selection
|
64 |
+
- Combines RAG results with recent history
|
65 |
+
- Ensures conversational continuity
|
66 |
+
|
67 |
+
## 🚀 How It Works
|
68 |
+
|
69 |
+
### Step 1: Context Collection
|
70 |
+
```python
|
71 |
+
# Get both types of context
|
72 |
+
rag_context = memory.get_relevant_chunks(user_id, user_query, top_k=3)
|
73 |
+
recent_history = memory.get_recent_chat_history(user_id, num_turns=3)
|
74 |
+
```
|
75 |
+
|
76 |
+
### Step 2: Contextual Analysis
|
77 |
+
The system sends both context sources to Gemini Flash Lite with this prompt:
|
78 |
+
|
79 |
+
```
|
80 |
+
You are a medical assistant analyzing conversation context to provide relevant information.
|
81 |
+
|
82 |
+
Current user query: "{current_query}"
|
83 |
+
|
84 |
+
Available context information:
|
85 |
+
{recent_history + rag_chunks}
|
86 |
+
|
87 |
+
Task: Analyze the current query and determine which pieces of context are most relevant.
|
88 |
+
|
89 |
+
Consider:
|
90 |
+
1. Is the user asking for clarification about something mentioned before?
|
91 |
+
2. Is the user referencing a previous diagnosis or recommendation?
|
92 |
+
3. Are there any follow-up questions that build on previous responses?
|
93 |
+
4. Which chunks provide the most relevant medical information for the current query?
|
94 |
+
|
95 |
+
Output: Return only the most relevant context chunks that should be included in the response.
|
96 |
+
```
|
97 |
+
|
98 |
+
### Step 3: Intelligent Selection
|
99 |
+
Gemini Flash Lite analyzes the query and selects relevant context from:
|
100 |
+
- **Recent conversations** (for continuity)
|
101 |
+
- **Semantic chunks** (for topic relevance)
|
102 |
+
- **Combined insights** (for comprehensive understanding)
|
103 |
+
|
104 |
+
### Step 4: Context Integration
|
105 |
+
Selected context is integrated into the main LLM prompt, ensuring the response is both:
|
106 |
+
- **Semantically relevant** (from RAG)
|
107 |
+
- **Conversationally continuous** (from recent history)
|
108 |
+
|
109 |
+
## 📊 Benefits
|
110 |
+
|
111 |
+
### 1. **Conversational Continuity**
|
112 |
+
- Handles follow-up questions naturally
|
113 |
+
- Maintains context across multiple exchanges
|
114 |
+
- Understands references to previous responses
|
115 |
+
|
116 |
+
### 2. **Intelligent Context Selection**
|
117 |
+
- No more irrelevant context injection
|
118 |
+
- Gemini Flash Lite decides what's truly relevant
|
119 |
+
- Balances semantic relevance with conversational flow
|
120 |
+
|
121 |
+
### 3. **Fallback Mechanisms**
|
122 |
+
- If contextual analysis fails, falls back to RAG
|
123 |
+
- If RAG fails, falls back to recent history
|
124 |
+
- Ensures system reliability
|
125 |
+
|
126 |
+
### 4. **Performance Optimization**
|
127 |
+
- Uses lightweight Gemini Flash Lite for context analysis
|
128 |
+
- Maintains existing RAG performance
|
129 |
+
- Minimal additional latency
|
130 |
+
|
131 |
+
## 🧪 Example Scenarios
|
132 |
+
|
133 |
+
### Scenario 1: Follow-up Question
|
134 |
+
```
|
135 |
+
User: "I have a headache"
|
136 |
+
Bot: "This could be a tension headache. Try rest and hydration."
|
137 |
+
|
138 |
+
User: "What medication should I take?"
|
139 |
+
Bot: "For tension headaches, try acetaminophen or ibuprofen..."
|
140 |
+
|
141 |
+
User: "Can you clarify the dosage again?"
|
142 |
+
Bot: "For ibuprofen: 200-400mg every 4-6 hours, max 1200mg/day..."
|
143 |
+
```
|
144 |
+
**Result:** System retrieves ibuprofen dosage from recent conversation, not just semantic search.
|
145 |
+
|
146 |
+
### Scenario 2: Reference to Previous Diagnosis
|
147 |
+
```
|
148 |
+
User: "What was the diagnosis you mentioned?"
|
149 |
+
Bot: "I previously diagnosed this as a tension headache based on your symptoms..."
|
150 |
+
```
|
151 |
+
**Result:** System understands the reference and retrieves previous diagnosis.
|
152 |
+
|
153 |
+
### Scenario 3: Clarification Request
|
154 |
+
```
|
155 |
+
User: "I didn't understand the part about prevention"
|
156 |
+
Bot: "Let me clarify the prevention steps I mentioned earlier..."
|
157 |
+
```
|
158 |
+
**Result:** System identifies the clarification request and retrieves relevant previous response.
|
159 |
+
|
160 |
+
## ⚙️ Configuration
|
161 |
+
|
162 |
+
### Environment Variables
|
163 |
+
```bash
|
164 |
+
FlashAPI=your_gemini_api_key # For both main LLM and contextual analysis
|
165 |
+
```
|
166 |
+
|
167 |
+
### Memory Settings
|
168 |
+
```python
|
169 |
+
memory = MemoryManager(
|
170 |
+
max_users=1000, # Maximum users in memory
|
171 |
+
history_per_user=10, # Chat history per user
|
172 |
+
max_chunks=30 # Maximum chunks per user
|
173 |
+
)
|
174 |
+
```
|
175 |
+
|
176 |
+
### Context Parameters
|
177 |
+
```python
|
178 |
+
# Recent history retrieval
|
179 |
+
recent_history = memory.get_recent_chat_history(user_id, num_turns=3)
|
180 |
+
|
181 |
+
# RAG retrieval
|
182 |
+
rag_chunks = memory.get_relevant_chunks(user_id, query, top_k=3, min_sim=0.30)
|
183 |
+
|
184 |
+
# Contextual analysis
|
185 |
+
contextual_chunks = self._get_contextual_chunks(
|
186 |
+
user_id, current_query, recent_history, rag_chunks, lang
|
187 |
+
)
|
188 |
+
```
|
189 |
+
|
190 |
+
## 🔍 Monitoring & Debugging
|
191 |
+
|
192 |
+
### Logging
|
193 |
+
The system provides comprehensive logging:
|
194 |
+
```python
|
195 |
+
logger.info(f"[Contextual] Gemini selected {len(relevant_chunks)} relevant chunks")
|
196 |
+
logger.warning(f"[Contextual] Gemini contextual analysis failed: {e}")
|
197 |
+
```
|
198 |
+
|
199 |
+
### Performance Metrics
|
200 |
+
- Context retrieval time
|
201 |
+
- Number of relevant chunks selected
|
202 |
+
- Fallback usage statistics
|
203 |
+
|
204 |
+
## 🚨 Error Handling
|
205 |
+
|
206 |
+
### Fallback Strategy
|
207 |
+
1. **Primary:** Gemini Flash Lite contextual analysis
|
208 |
+
2. **Secondary:** RAG semantic search
|
209 |
+
3. **Tertiary:** Recent chat history
|
210 |
+
4. **Final:** No context (minimal response)
|
211 |
+
|
212 |
+
### Error Scenarios
|
213 |
+
- Gemini API failure → Fall back to RAG
|
214 |
+
- RAG failure → Fall back to recent history
|
215 |
+
- Memory corruption → Reset user session
|
216 |
+
|
217 |
+
## 🔮 Future Enhancements
|
218 |
+
|
219 |
+
### 1. **Context Scoring**
|
220 |
+
- Implement confidence scores for context relevance
|
221 |
+
- Weight recent history vs. semantic chunks
|
222 |
+
- Dynamic threshold adjustment
|
223 |
+
|
224 |
+
### 2. **Multi-turn Context**
|
225 |
+
- Extend beyond 3 recent turns
|
226 |
+
- Implement conversation threading
|
227 |
+
- Handle multiple conversation topics
|
228 |
+
|
229 |
+
### 3. **Context Compression**
|
230 |
+
- Summarize long conversation histories
|
231 |
+
- Implement context pruning strategies
|
232 |
+
- Optimize memory usage
|
233 |
+
|
234 |
+
### 4. **Language-specific Context**
|
235 |
+
- Enhance context analysis for different languages
|
236 |
+
- Implement language-aware context selection
|
237 |
+
- Cultural context considerations
|
238 |
+
|
239 |
+
## 📝 Testing
|
240 |
+
|
241 |
+
Run the test script to verify functionality:
|
242 |
+
```bash
|
243 |
+
cd Medical-Chatbot
|
244 |
+
python test_hybrid_context.py
|
245 |
+
```
|
246 |
+
|
247 |
+
This will demonstrate:
|
248 |
+
- Memory management
|
249 |
+
- Context retrieval
|
250 |
+
- Hybrid approach simulation
|
251 |
+
- Expected behavior examples
|
252 |
+
|
253 |
+
## 🎯 Summary
|
254 |
+
|
255 |
+
The hybrid context retrieval system transforms the Medical Chatbot from a simple RAG system to an intelligent, contextually aware assistant that:
|
256 |
+
|
257 |
+
✅ **Maintains conversational continuity**
|
258 |
+
✅ **Provides semantically relevant responses**
|
259 |
+
✅ **Handles follow-up questions naturally**
|
260 |
+
✅ **Uses AI for intelligent context selection**
|
261 |
+
✅ **Maintains performance and reliability**
|
262 |
+
|
263 |
+
This system addresses real-world conversational patterns that pure RAG systems miss, making the chatbot more human-like and useful in extended medical consultations.
|
memory.py
CHANGED
@@ -45,7 +45,7 @@ class MemoryManager:
|
|
45 |
continue # skip duplicate
|
46 |
vec = self._embed(chunk["text"])
|
47 |
self.chunk_index[user_id].add(np.array([vec]))
|
48 |
-
# Store each chunk
|
49 |
chunk_with_vec = {
|
50 |
**chunk,
|
51 |
"vec": vec,
|
@@ -81,11 +81,122 @@ class MemoryManager:
|
|
81 |
# logger.info(f"[Memory] RAG Retrieved Topic: {results}") # Inspect vector data
|
82 |
return [f"### Topic: {c['tag']}\n{c['text']}" for _, c in results]
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
def get_context(self, user_id: str, num_turns: int = 3) -> str:
|
86 |
history = list(self.text_cache.get(user_id, []))[-num_turns:]
|
87 |
return "\n".join(f"User: {q}\nBot: {r}" for q, r in history)
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def reset(self, user_id: str):
|
90 |
self._drop_user(user_id)
|
91 |
|
@@ -108,7 +219,7 @@ class MemoryManager:
|
|
108 |
"""Trim chunk list + rebuild FAISS index for user."""
|
109 |
self.chunk_meta[user_id] = self.chunk_meta[user_id][-keep_last:]
|
110 |
index = self._new_index()
|
111 |
-
# Store each chunk
|
112 |
for chunk in self.chunk_meta[user_id]:
|
113 |
index.add(np.array([chunk["vec"]]))
|
114 |
self.chunk_index[user_id] = index
|
|
|
45 |
continue # skip duplicate
|
46 |
vec = self._embed(chunk["text"])
|
47 |
self.chunk_index[user_id].add(np.array([vec]))
|
48 |
+
# Store each chunk's vector once and reuse it
|
49 |
chunk_with_vec = {
|
50 |
**chunk,
|
51 |
"vec": vec,
|
|
|
81 |
# logger.info(f"[Memory] RAG Retrieved Topic: {results}") # Inspect vector data
|
82 |
return [f"### Topic: {c['tag']}\n{c['text']}" for _, c in results]
|
83 |
|
84 |
+
def get_recent_chat_history(self, user_id: str, num_turns: int = 3) -> List[Dict]:
|
85 |
+
"""
|
86 |
+
Get the most recent chat history with both user questions and bot responses.
|
87 |
+
Returns: [{"user": "question", "bot": "response", "timestamp": time}, ...]
|
88 |
+
"""
|
89 |
+
if user_id not in self.text_cache:
|
90 |
+
return []
|
91 |
+
# Get the most recent chat history
|
92 |
+
recent_history = list(self.text_cache[user_id])[-num_turns:]
|
93 |
+
formatted_history = []
|
94 |
+
# Format the history
|
95 |
+
for query, response in recent_history:
|
96 |
+
formatted_history.append({
|
97 |
+
"user": query,
|
98 |
+
"bot": response,
|
99 |
+
"timestamp": time.time() # We could store actual timestamps if needed
|
100 |
+
})
|
101 |
+
|
102 |
+
return formatted_history
|
103 |
|
104 |
def get_context(self, user_id: str, num_turns: int = 3) -> str:
|
105 |
history = list(self.text_cache.get(user_id, []))[-num_turns:]
|
106 |
return "\n".join(f"User: {q}\nBot: {r}" for q, r in history)
|
107 |
|
108 |
+
def get_contextual_chunks(self, user_id: str, current_query: str, lang: str = "EN") -> List[str]:
|
109 |
+
"""
|
110 |
+
Use Gemini Flash Lite to intelligently select relevant context from both recent history and RAG chunks.
|
111 |
+
This ensures conversational continuity while maintaining semantic relevance.
|
112 |
+
"""
|
113 |
+
# Get both types of context
|
114 |
+
recent_history = self.get_recent_chat_history(user_id, num_turns=3)
|
115 |
+
rag_chunks = self.get_relevant_chunks(user_id, current_query, top_k=3)
|
116 |
+
|
117 |
+
if not recent_history and not rag_chunks:
|
118 |
+
return []
|
119 |
+
|
120 |
+
# Prepare context for Gemini to analyze
|
121 |
+
context_parts = []
|
122 |
+
|
123 |
+
# Add recent chat history
|
124 |
+
if recent_history:
|
125 |
+
history_text = "\n".join([
|
126 |
+
f"User: {item['user']}\nBot: {item['bot']}"
|
127 |
+
for item in recent_history
|
128 |
+
])
|
129 |
+
context_parts.append(f"Recent conversation history:\n{history_text}")
|
130 |
+
|
131 |
+
# Add RAG chunks
|
132 |
+
if rag_chunks:
|
133 |
+
context_parts.append(f"Semantically relevant chunks:\n" + "\n".join(rag_chunks))
|
134 |
+
|
135 |
+
# Build contextual awareness prompt
|
136 |
+
contextual_prompt = f"""
|
137 |
+
You are a medical assistant analyzing conversation context to provide relevant information.
|
138 |
+
|
139 |
+
Current user query: "{current_query}"
|
140 |
+
|
141 |
+
Available context information:
|
142 |
+
{chr(10).join(context_parts)}
|
143 |
+
|
144 |
+
Task: Analyze the current query and determine which pieces of context are most relevant.
|
145 |
+
|
146 |
+
Consider:
|
147 |
+
1. Is the user asking for clarification about something mentioned before?
|
148 |
+
2. Is the user referencing a previous diagnosis or recommendation?
|
149 |
+
3. Are there any follow-up questions that build on previous responses?
|
150 |
+
4. Which chunks provide the most relevant medical information for the current query?
|
151 |
+
|
152 |
+
Output: Return only the most relevant context chunks that should be included in the response.
|
153 |
+
Format each chunk with a brief explanation of why it's relevant.
|
154 |
+
If no context is relevant, return "No relevant context found."
|
155 |
+
|
156 |
+
Language context: {lang}
|
157 |
+
"""
|
158 |
+
|
159 |
+
try:
|
160 |
+
# Use Gemini Flash Lite for contextual analysis
|
161 |
+
client = genai.Client(api_key=os.getenv("FlashAPI"))
|
162 |
+
result = client.models.generate_content(
|
163 |
+
model=_LLM_SMALL,
|
164 |
+
contents=contextual_prompt
|
165 |
+
)
|
166 |
+
contextual_response = result.text.strip()
|
167 |
+
|
168 |
+
# Parse the response to extract relevant chunks
|
169 |
+
if "No relevant context found" in contextual_response:
|
170 |
+
return []
|
171 |
+
|
172 |
+
# Extract relevant chunks from Gemini's analysis
|
173 |
+
relevant_chunks = []
|
174 |
+
lines = contextual_response.strip().split('\n')
|
175 |
+
current_chunk = ""
|
176 |
+
|
177 |
+
for line in lines:
|
178 |
+
if line.strip().startswith(('Chunk:', 'Context:', 'Relevant:')):
|
179 |
+
if current_chunk.strip():
|
180 |
+
relevant_chunks.append(current_chunk.strip())
|
181 |
+
current_chunk = line
|
182 |
+
else:
|
183 |
+
current_chunk += "\n" + line
|
184 |
+
|
185 |
+
if current_chunk.strip():
|
186 |
+
relevant_chunks.append(current_chunk.strip())
|
187 |
+
|
188 |
+
logger.info(f"[Contextual] Gemini selected {len(relevant_chunks)} relevant chunks")
|
189 |
+
return relevant_chunks
|
190 |
+
|
191 |
+
except Exception as e:
|
192 |
+
logger.warning(f"[Contextual] Gemini contextual analysis failed: {e}")
|
193 |
+
# Fallback: return RAG chunks if available, otherwise recent history
|
194 |
+
if rag_chunks:
|
195 |
+
return rag_chunks
|
196 |
+
elif recent_history:
|
197 |
+
return [f"Recent context: {item['user']} → {item['bot']}" for item in recent_history[-2:]]
|
198 |
+
return []
|
199 |
+
|
200 |
def reset(self, user_id: str):
|
201 |
self._drop_user(user_id)
|
202 |
|
|
|
219 |
"""Trim chunk list + rebuild FAISS index for user."""
|
220 |
self.chunk_meta[user_id] = self.chunk_meta[user_id][-keep_last:]
|
221 |
index = self._new_index()
|
222 |
+
# Store each chunk's vector once and reuse it.
|
223 |
for chunk in self.chunk_meta[user_id]:
|
224 |
index.add(np.array([chunk["vec"]]))
|
225 |
self.chunk_index[user_id] = index
|