Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Running

App Files Files Community

LiamKhoaLe commited on Jul 6

Commit

c29409a

1 Parent(s): 1ca4ee7

Migrate system using Gem Flash lite for NLP taskings

Browse files

Files changed (1) hide show

memory.py +42 -42

memory.py CHANGED Viewed

@@ -5,20 +5,12 @@ import faiss
 from collections import defaultdict, deque
 from typing import List
 from sentence_transformers import SentenceTransformer
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-# Embedding model (384d)
 embedding_model = SentenceTransformer("/app/model_cache", device="cpu").half()
-# English summarizer
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
-# Lightweight MarianMT translation models (VI → EN and ZH → EN)
-translation_models = {
-    "VI": pipeline("translation", model="Helsinki-NLP/opus-mt-vi-en", device=-1),
-    "ZH": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=-1)
-}
 class MemoryManager:
     def __init__(self, max_users=1000, history_per_user=10):
         self.text_cache = defaultdict(lambda: deque(maxlen=history_per_user))
@@ -32,10 +24,11 @@ class MemoryManager:
                 oldest = self.user_queue.popleft()
                 self._drop_user(oldest)
             self.user_queue.append(user_id)
-        # Normalize info
         self.text_cache[user_id].append((query.strip(), response.strip()))
         chunks = self.chunk_response(response, lang)
-        # Encode history
         for chunk in chunks:
             vec = embedding_model.encode(chunk, convert_to_numpy=True)
             self.chunk_index[user_id].add(np.array([vec]))
@@ -44,7 +37,7 @@ class MemoryManager:
     def get_relevant_chunks(self, user_id: str, query: str, top_k: int = 2):
         if user_id not in self.chunk_index or self.chunk_index[user_id].ntotal == 0:
             return []
-        # Encode query
         vec = embedding_model.encode(query, convert_to_numpy=True)
         D, I = self.chunk_index[user_id].search(np.array([vec]), k=top_k)
         return [self.chunk_texts[user_id][i] for i in I[0] if i < len(self.chunk_texts[user_id])]
@@ -65,32 +58,39 @@ class MemoryManager:
     def chunk_response(self, response: str, lang: str) -> List[str]:
         """
-        Smart multilingual chunking and summarization:
-        - Translate VI/ZH into English for processing.
-        - Chunk semantically.
-        - Summarize large parts.
         """
-        # Step 1: Translate if needed
-        if lang.upper() in translation_models:
-            try:
-                translated = translation_models[lang.upper()](response, max_length=512)[0]['translation_text']
-            except Exception:
-                translated = response  # fallback
-        else:
-            translated = response
-        # Step 2: Split into rough semantic blocks
-        raw_chunks = [c.strip() for c in re.split(
-            r'\n{2,}|\n(?=\*\*)|(?<=\.)\s+(?=[A-Z])', translated) if c.strip()]
-        # Step 3: Summarize long ones
-        summarized_chunks = []
-        for chunk in raw_chunks:
-            if len(chunk.split()) > 50:
-                try:
-                    summary = summarizer(chunk, max_length=60, min_length=10, do_sample=False)[0]['summary_text']
-                    summarized_chunks.append(summary.strip())
-                except Exception:
-                    summarized_chunks.append(chunk)
-            else:
-                summarized_chunks.append(chunk)
-        # Final
-        return summarized_chunks

 from collections import defaultdict, deque
 from typing import List
 from sentence_transformers import SentenceTransformer
+from google import genai  # must be configured in app.py and imported globally
+_LLM = "gemini-2.5-flash-lite-preview-06-17" # Small model for NLP simple tasks
+# Load embedding model
 embedding_model = SentenceTransformer("/app/model_cache", device="cpu").half()
 class MemoryManager:
     def __init__(self, max_users=1000, history_per_user=10):
         self.text_cache = defaultdict(lambda: deque(maxlen=history_per_user))
                 oldest = self.user_queue.popleft()
                 self._drop_user(oldest)
             self.user_queue.append(user_id)
         self.text_cache[user_id].append((query.strip(), response.strip()))
+        # Use Gemini to summarize and chunk smartly
         chunks = self.chunk_response(response, lang)
+        # Encode chunk
         for chunk in chunks:
             vec = embedding_model.encode(chunk, convert_to_numpy=True)
             self.chunk_index[user_id].add(np.array([vec]))
     def get_relevant_chunks(self, user_id: str, query: str, top_k: int = 2):
         if user_id not in self.chunk_index or self.chunk_index[user_id].ntotal == 0:
             return []
+        # Encode user query
         vec = embedding_model.encode(query, convert_to_numpy=True)
         D, I = self.chunk_index[user_id].search(np.array([vec]), k=top_k)
         return [self.chunk_texts[user_id][i] for i in I[0] if i < len(self.chunk_texts[user_id])]
     def chunk_response(self, response: str, lang: str) -> List[str]:
         """
+        Use Gemini to translate (if needed), summarize, and chunk the response.
+        Assumes Gemini API is configured via google.genai globally in app.py.
+        """
+        # Full instruction
+        instructions = []
+        # Only add translation if necessary
+        if lang.upper() != "EN":
+            instructions.append("- Translate the response to English.")
+        instructions.append("- Break the translated (or original) text into semantically distinct parts, grouped by medical topic or symptom.")
+        instructions.append("- For each part, generate a clear, concise summary. The summary may vary in length depending on the complexity of the topic — do not omit key clinical instructions.")
+        instructions.append("- Separate each part using three dashes `---` on a new line.")
+        # Grouped sub-instructions
+        joined_instructions = "\n".join(instructions)
+        # Prompting
+        prompt = f"""
+        You are a medical assistant helping organize and condense a clinical response.
+        Below is the user-provided medical response written in `{lang}`:
+        ------------------------
+        {response}
+        ------------------------
+        Please perform the following tasks:
+        {joined_instructions}
+        Output only the structured summaries, separated by dashes.
         """
+        try:
+            client = genai.Client()
+            result = client.models.generate_content(
+                model=_LLM,
+                contents=prompt,
+                generation_config={"temperature": 0.4}
+            )
+            output = result.text.strip()
+            return [chunk.strip() for chunk in output.split('---') if chunk.strip()]
+        except Exception as e:
+            print(f"❌ Gemini chunking failed: {e}")
+            return [response.strip()]