Spaces:
Running
Running
Commit
·
7b77d36
1
Parent(s):
d999c28
Upd hybrid RAG with summariser prompting
Browse files
memory.py
CHANGED
@@ -105,21 +105,24 @@ class MemoryManager:
|
|
105 |
history = list(self.text_cache.get(user_id, []))[-num_turns:]
|
106 |
return "\n".join(f"User: {q}\nBot: {r}" for q, r in history)
|
107 |
|
108 |
-
def get_contextual_chunks(self, user_id: str, current_query: str, lang: str = "EN") ->
|
109 |
"""
|
110 |
-
Use Gemini Flash Lite to
|
111 |
-
This ensures conversational continuity while
|
112 |
"""
|
113 |
# Get both types of context
|
114 |
recent_history = self.get_recent_chat_history(user_id, num_turns=3)
|
115 |
rag_chunks = self.get_relevant_chunks(user_id, current_query, top_k=3)
|
116 |
|
117 |
-
|
118 |
-
|
119 |
|
120 |
-
#
|
|
|
|
|
|
|
|
|
121 |
context_parts = []
|
122 |
-
|
123 |
# Add recent chat history
|
124 |
if recent_history:
|
125 |
history_text = "\n".join([
|
@@ -127,75 +130,69 @@ class MemoryManager:
|
|
127 |
for item in recent_history
|
128 |
])
|
129 |
context_parts.append(f"Recent conversation history:\n{history_text}")
|
130 |
-
|
131 |
# Add RAG chunks
|
132 |
if rag_chunks:
|
133 |
-
|
|
|
134 |
|
135 |
-
# Build
|
136 |
-
|
137 |
-
You are a medical assistant
|
138 |
|
139 |
Current user query: "{current_query}"
|
140 |
|
141 |
Available context information:
|
142 |
{chr(10).join(context_parts)}
|
143 |
|
144 |
-
Task:
|
145 |
|
146 |
-
|
147 |
-
1.
|
148 |
-
2.
|
149 |
-
3.
|
150 |
-
4.
|
|
|
151 |
|
152 |
-
Output:
|
153 |
-
|
154 |
-
If no context is relevant, return "No relevant context found."
|
155 |
|
156 |
Language context: {lang}
|
157 |
"""
|
158 |
|
|
|
|
|
159 |
try:
|
160 |
-
# Use Gemini Flash Lite for
|
161 |
client = genai.Client(api_key=os.getenv("FlashAPI"))
|
162 |
result = client.models.generate_content(
|
163 |
model=_LLM_SMALL,
|
164 |
-
contents=
|
165 |
)
|
166 |
-
|
|
|
|
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
return []
|
171 |
-
|
172 |
-
# Extract relevant chunks from Gemini's analysis
|
173 |
-
relevant_chunks = []
|
174 |
-
lines = contextual_response.strip().split('\n')
|
175 |
-
current_chunk = ""
|
176 |
-
|
177 |
-
for line in lines:
|
178 |
-
if line.strip().startswith(('Chunk:', 'Context:', 'Relevant:')):
|
179 |
-
if current_chunk.strip():
|
180 |
-
relevant_chunks.append(current_chunk.strip())
|
181 |
-
current_chunk = line
|
182 |
-
else:
|
183 |
-
current_chunk += "\n" + line
|
184 |
-
|
185 |
-
if current_chunk.strip():
|
186 |
-
relevant_chunks.append(current_chunk.strip())
|
187 |
-
|
188 |
-
logger.info(f"[Contextual] Gemini selected {len(relevant_chunks)} relevant chunks")
|
189 |
-
return relevant_chunks
|
190 |
|
191 |
except Exception as e:
|
192 |
-
logger.warning(f"[Contextual] Gemini
|
193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
if rag_chunks:
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
199 |
|
200 |
def reset(self, user_id: str):
|
201 |
self._drop_user(user_id)
|
|
|
105 |
history = list(self.text_cache.get(user_id, []))[-num_turns:]
|
106 |
return "\n".join(f"User: {q}\nBot: {r}" for q, r in history)
|
107 |
|
108 |
+
def get_contextual_chunks(self, user_id: str, current_query: str, lang: str = "EN") -> str:
|
109 |
"""
|
110 |
+
Use Gemini Flash Lite to create a summarization of relevant context from both recent history and RAG chunks.
|
111 |
+
This ensures conversational continuity while providing a concise summary for the main LLM.
|
112 |
"""
|
113 |
# Get both types of context
|
114 |
recent_history = self.get_recent_chat_history(user_id, num_turns=3)
|
115 |
rag_chunks = self.get_relevant_chunks(user_id, current_query, top_k=3)
|
116 |
|
117 |
+
logger.info(f"[Contextual] Retrieved {len(recent_history)} recent history items")
|
118 |
+
logger.info(f"[Contextual] Retrieved {len(rag_chunks)} RAG chunks")
|
119 |
|
120 |
+
# Return empty string if no context is found
|
121 |
+
if not recent_history and not rag_chunks:
|
122 |
+
logger.info(f"[Contextual] No context found, returning empty string")
|
123 |
+
return ""
|
124 |
+
# Prepare context for Gemini to summarize
|
125 |
context_parts = []
|
|
|
126 |
# Add recent chat history
|
127 |
if recent_history:
|
128 |
history_text = "\n".join([
|
|
|
130 |
for item in recent_history
|
131 |
])
|
132 |
context_parts.append(f"Recent conversation history:\n{history_text}")
|
|
|
133 |
# Add RAG chunks
|
134 |
if rag_chunks:
|
135 |
+
rag_text = "\n".join(rag_chunks)
|
136 |
+
context_parts.append(f"Semantically relevant medical information:\n{rag_text}")
|
137 |
|
138 |
+
# Build summarization prompt
|
139 |
+
summarization_prompt = f"""
|
140 |
+
You are a medical assistant creating a concise summary of conversation context for continuity.
|
141 |
|
142 |
Current user query: "{current_query}"
|
143 |
|
144 |
Available context information:
|
145 |
{chr(10).join(context_parts)}
|
146 |
|
147 |
+
Task: Create a brief, coherent summary that captures the key points from the conversation history and relevant medical information that are important for understanding the current query.
|
148 |
|
149 |
+
Guidelines:
|
150 |
+
1. Focus on medical symptoms, diagnoses, treatments, or recommendations mentioned
|
151 |
+
2. Include any patient concerns or questions that are still relevant
|
152 |
+
3. Highlight any follow-up needs or pending clarifications
|
153 |
+
4. Keep the summary concise but comprehensive enough for context
|
154 |
+
5. Maintain conversational flow and continuity
|
155 |
|
156 |
+
Output: Provide a single, well-structured summary paragraph that can be used as context for the main LLM to provide a coherent response.
|
157 |
+
If no relevant context exists, return "No relevant context found."
|
|
|
158 |
|
159 |
Language context: {lang}
|
160 |
"""
|
161 |
|
162 |
+
logger.debug(f"[Contextual] Full prompt: {summarization_prompt}")
|
163 |
+
# Loop through the prompt and log the length of each part
|
164 |
try:
|
165 |
+
# Use Gemini Flash Lite for summarization
|
166 |
client = genai.Client(api_key=os.getenv("FlashAPI"))
|
167 |
result = client.models.generate_content(
|
168 |
model=_LLM_SMALL,
|
169 |
+
contents=summarization_prompt
|
170 |
)
|
171 |
+
summary = result.text.strip()
|
172 |
+
if "No relevant context found" in summary:
|
173 |
+
logger.info(f"[Contextual] Gemini indicated no relevant context found")
|
174 |
+
return ""
|
175 |
|
176 |
+
logger.info(f"[Contextual] Gemini created summary: {summary[:100]}...")
|
177 |
+
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
except Exception as e:
|
180 |
+
logger.warning(f"[Contextual] Gemini summarization failed: {e}")
|
181 |
+
logger.info(f"[Contextual] Using fallback summarization method")
|
182 |
+
# Fallback: create a simple summary
|
183 |
+
fallback_summary = []
|
184 |
+
# Fallback: add recent history
|
185 |
+
if recent_history:
|
186 |
+
recent_summary = f"Recent conversation: User asked about {recent_history[-1]['user'][:50]}... and received a response about {recent_history[-1]['bot'][:50]}..."
|
187 |
+
fallback_summary.append(recent_summary)
|
188 |
+
logger.info(f"[Contextual] Fallback: Added recent history summary")
|
189 |
+
# Fallback: add RAG chunks
|
190 |
if rag_chunks:
|
191 |
+
rag_summary = f"Relevant medical information: {len(rag_chunks)} chunks found covering various medical topics."
|
192 |
+
fallback_summary.append(rag_summary)
|
193 |
+
logger.info(f"[Contextual] Fallback: Added RAG chunks summary")
|
194 |
+
final_fallback = " ".join(fallback_summary) if fallback_summary else ""
|
195 |
+
return final_fallback
|
196 |
|
197 |
def reset(self, user_id: str):
|
198 |
self._drop_user(user_id)
|