LiamKhoaLe commited on
Commit
7b77d36
·
1 Parent(s): d999c28

Upd hybrid RAG with summariser prompting

Browse files
Files changed (1) hide show
  1. memory.py +49 -52
memory.py CHANGED
@@ -105,21 +105,24 @@ class MemoryManager:
105
  history = list(self.text_cache.get(user_id, []))[-num_turns:]
106
  return "\n".join(f"User: {q}\nBot: {r}" for q, r in history)
107
 
108
- def get_contextual_chunks(self, user_id: str, current_query: str, lang: str = "EN") -> List[str]:
109
  """
110
- Use Gemini Flash Lite to intelligently select relevant context from both recent history and RAG chunks.
111
- This ensures conversational continuity while maintaining semantic relevance.
112
  """
113
  # Get both types of context
114
  recent_history = self.get_recent_chat_history(user_id, num_turns=3)
115
  rag_chunks = self.get_relevant_chunks(user_id, current_query, top_k=3)
116
 
117
- if not recent_history and not rag_chunks:
118
- return []
119
 
120
- # Prepare context for Gemini to analyze
 
 
 
 
121
  context_parts = []
122
-
123
  # Add recent chat history
124
  if recent_history:
125
  history_text = "\n".join([
@@ -127,75 +130,69 @@ class MemoryManager:
127
  for item in recent_history
128
  ])
129
  context_parts.append(f"Recent conversation history:\n{history_text}")
130
-
131
  # Add RAG chunks
132
  if rag_chunks:
133
- context_parts.append(f"Semantically relevant chunks:\n" + "\n".join(rag_chunks))
 
134
 
135
- # Build contextual awareness prompt
136
- contextual_prompt = f"""
137
- You are a medical assistant analyzing conversation context to provide relevant information.
138
 
139
  Current user query: "{current_query}"
140
 
141
  Available context information:
142
  {chr(10).join(context_parts)}
143
 
144
- Task: Analyze the current query and determine which pieces of context are most relevant.
145
 
146
- Consider:
147
- 1. Is the user asking for clarification about something mentioned before?
148
- 2. Is the user referencing a previous diagnosis or recommendation?
149
- 3. Are there any follow-up questions that build on previous responses?
150
- 4. Which chunks provide the most relevant medical information for the current query?
 
151
 
152
- Output: Return only the most relevant context chunks that should be included in the response.
153
- Format each chunk with a brief explanation of why it's relevant.
154
- If no context is relevant, return "No relevant context found."
155
 
156
  Language context: {lang}
157
  """
158
 
 
 
159
  try:
160
- # Use Gemini Flash Lite for contextual analysis
161
  client = genai.Client(api_key=os.getenv("FlashAPI"))
162
  result = client.models.generate_content(
163
  model=_LLM_SMALL,
164
- contents=contextual_prompt
165
  )
166
- contextual_response = result.text.strip()
 
 
 
167
 
168
- # Parse the response to extract relevant chunks
169
- if "No relevant context found" in contextual_response:
170
- return []
171
-
172
- # Extract relevant chunks from Gemini's analysis
173
- relevant_chunks = []
174
- lines = contextual_response.strip().split('\n')
175
- current_chunk = ""
176
-
177
- for line in lines:
178
- if line.strip().startswith(('Chunk:', 'Context:', 'Relevant:')):
179
- if current_chunk.strip():
180
- relevant_chunks.append(current_chunk.strip())
181
- current_chunk = line
182
- else:
183
- current_chunk += "\n" + line
184
-
185
- if current_chunk.strip():
186
- relevant_chunks.append(current_chunk.strip())
187
-
188
- logger.info(f"[Contextual] Gemini selected {len(relevant_chunks)} relevant chunks")
189
- return relevant_chunks
190
 
191
  except Exception as e:
192
- logger.warning(f"[Contextual] Gemini contextual analysis failed: {e}")
193
- # Fallback: return RAG chunks if available, otherwise recent history
 
 
 
 
 
 
 
 
194
  if rag_chunks:
195
- return rag_chunks
196
- elif recent_history:
197
- return [f"Recent context: {item['user']} {item['bot']}" for item in recent_history[-2:]]
198
- return []
 
199
 
200
  def reset(self, user_id: str):
201
  self._drop_user(user_id)
 
105
  history = list(self.text_cache.get(user_id, []))[-num_turns:]
106
  return "\n".join(f"User: {q}\nBot: {r}" for q, r in history)
107
 
108
+ def get_contextual_chunks(self, user_id: str, current_query: str, lang: str = "EN") -> str:
109
  """
110
+ Use Gemini Flash Lite to create a summarization of relevant context from both recent history and RAG chunks.
111
+ This ensures conversational continuity while providing a concise summary for the main LLM.
112
  """
113
  # Get both types of context
114
  recent_history = self.get_recent_chat_history(user_id, num_turns=3)
115
  rag_chunks = self.get_relevant_chunks(user_id, current_query, top_k=3)
116
 
117
+ logger.info(f"[Contextual] Retrieved {len(recent_history)} recent history items")
118
+ logger.info(f"[Contextual] Retrieved {len(rag_chunks)} RAG chunks")
119
 
120
+ # Return empty string if no context is found
121
+ if not recent_history and not rag_chunks:
122
+ logger.info(f"[Contextual] No context found, returning empty string")
123
+ return ""
124
+ # Prepare context for Gemini to summarize
125
  context_parts = []
 
126
  # Add recent chat history
127
  if recent_history:
128
  history_text = "\n".join([
 
130
  for item in recent_history
131
  ])
132
  context_parts.append(f"Recent conversation history:\n{history_text}")
 
133
  # Add RAG chunks
134
  if rag_chunks:
135
+ rag_text = "\n".join(rag_chunks)
136
+ context_parts.append(f"Semantically relevant medical information:\n{rag_text}")
137
 
138
+ # Build summarization prompt
139
+ summarization_prompt = f"""
140
+ You are a medical assistant creating a concise summary of conversation context for continuity.
141
 
142
  Current user query: "{current_query}"
143
 
144
  Available context information:
145
  {chr(10).join(context_parts)}
146
 
147
+ Task: Create a brief, coherent summary that captures the key points from the conversation history and relevant medical information that are important for understanding the current query.
148
 
149
+ Guidelines:
150
+ 1. Focus on medical symptoms, diagnoses, treatments, or recommendations mentioned
151
+ 2. Include any patient concerns or questions that are still relevant
152
+ 3. Highlight any follow-up needs or pending clarifications
153
+ 4. Keep the summary concise but comprehensive enough for context
154
+ 5. Maintain conversational flow and continuity
155
 
156
+ Output: Provide a single, well-structured summary paragraph that can be used as context for the main LLM to provide a coherent response.
157
+ If no relevant context exists, return "No relevant context found."
 
158
 
159
  Language context: {lang}
160
  """
161
 
162
+ logger.debug(f"[Contextual] Full prompt: {summarization_prompt}")
163
+ # Loop through the prompt and log the length of each part
164
  try:
165
+ # Use Gemini Flash Lite for summarization
166
  client = genai.Client(api_key=os.getenv("FlashAPI"))
167
  result = client.models.generate_content(
168
  model=_LLM_SMALL,
169
+ contents=summarization_prompt
170
  )
171
+ summary = result.text.strip()
172
+ if "No relevant context found" in summary:
173
+ logger.info(f"[Contextual] Gemini indicated no relevant context found")
174
+ return ""
175
 
176
+ logger.info(f"[Contextual] Gemini created summary: {summary[:100]}...")
177
+ return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  except Exception as e:
180
+ logger.warning(f"[Contextual] Gemini summarization failed: {e}")
181
+ logger.info(f"[Contextual] Using fallback summarization method")
182
+ # Fallback: create a simple summary
183
+ fallback_summary = []
184
+ # Fallback: add recent history
185
+ if recent_history:
186
+ recent_summary = f"Recent conversation: User asked about {recent_history[-1]['user'][:50]}... and received a response about {recent_history[-1]['bot'][:50]}..."
187
+ fallback_summary.append(recent_summary)
188
+ logger.info(f"[Contextual] Fallback: Added recent history summary")
189
+ # Fallback: add RAG chunks
190
  if rag_chunks:
191
+ rag_summary = f"Relevant medical information: {len(rag_chunks)} chunks found covering various medical topics."
192
+ fallback_summary.append(rag_summary)
193
+ logger.info(f"[Contextual] Fallback: Added RAG chunks summary")
194
+ final_fallback = " ".join(fallback_summary) if fallback_summary else ""
195
+ return final_fallback
196
 
197
  def reset(self, user_id: str):
198
  self._drop_user(user_id)