Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Running

App Files Files Community

LiamKhoaLe commited on 16 days ago

Commit

4f5341e

1 Parent(s): a8b5cb5

Lazy load models

Browse files

Files changed (4) hide show

Dockerfile +5 -1
download_model.py +24 -26
memory.py +1 -3
warmup.py +10 -1

Dockerfile CHANGED Viewed

@@ -23,7 +23,11 @@ ENV MEDGEMMA_HOME="/home/user/.cache/huggingface/sentence-transformers"
 RUN mkdir -p /app/model_cache /home/user/.cache/huggingface/sentence-transformers && \
     chown -R user:user /app/model_cache /home/user/.cache/huggingface
-# Pre-load model in a separate script
 RUN python /app/download_model.py && python /app/warmup.py
 # Ensure ownership and permissions remain intact

 RUN mkdir -p /app/model_cache /home/user/.cache/huggingface/sentence-transformers && \
     chown -R user:user /app/model_cache /home/user/.cache/huggingface
+# Control preloading to avoid exhausting build disk on HF Spaces
+ENV PRELOAD_TRANSLATORS="0"
+ENV EMBEDDING_HALF="0"
+# Pre-load model in a separate script (translation preload disabled by default)
 RUN python /app/download_model.py && python /app/warmup.py
 # Ensure ownership and permissions remain intact

download_model.py CHANGED Viewed

@@ -7,34 +7,21 @@ from huggingface_hub import snapshot_download
 # Set up paths
 MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
 MODEL_CACHE_DIR = "/app/model_cache"
 print("⏳ Downloading the SentenceTransformer model...")
-model_path = snapshot_download(repo_id=MODEL_REPO, cache_dir=MODEL_CACHE_DIR)
 print("Model path: ", model_path)
-# Ensure the directory exists
 if not os.path.exists(MODEL_CACHE_DIR):
     os.makedirs(MODEL_CACHE_DIR)
-# Move all contents from the snapshot folder
-if os.path.exists(model_path):
-    print(f"📂 Moving model files from {model_path} to {MODEL_CACHE_DIR}...")
-    for item in os.listdir(model_path):
-        source = os.path.join(model_path, item)
-        destination = os.path.join(MODEL_CACHE_DIR, item)
-        if os.path.isdir(source):
-            shutil.copytree(source, destination, dirs_exist_ok=True)
-        else:
-            shutil.copy2(source, destination)
-    print(f"✅ Model extracted and flattened in {MODEL_CACHE_DIR}")
-else:
-    print("❌ No snapshot directory found!")
-    exit(1)
 # Verify structure after moving
 print("\n📂 LLM Model Structure (Build Level):")
 for root, dirs, files in os.walk(MODEL_CACHE_DIR):
@@ -44,8 +31,19 @@ for root, dirs, files in os.walk(MODEL_CACHE_DIR):
 ### --- B. translation modules ---
-from transformers import pipeline
-print("⏬ Downloading Vietnamese–English translator...")
-_ = pipeline("translation", model="VietAI/envit5-translation", src_lang="vi", tgt_lang="en")
-print("⏬ Downloading Chinese–English translator...")
-_ = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")

 # Set up paths
 MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
 MODEL_CACHE_DIR = "/app/model_cache"
+HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")
 print("⏳ Downloading the SentenceTransformer model...")
+# Download directly into /app/model_cache to avoid duplicating files from HF cache
+model_path = snapshot_download(
+    repo_id=MODEL_REPO,
+    cache_dir=HF_CACHE_DIR,              # Store HF cache in user cache dir
+    local_dir=MODEL_CACHE_DIR,           # Place usable model here
+    local_dir_use_symlinks=False         # Copy files into local_dir (no symlinks)
+)
 print("Model path: ", model_path)
 if not os.path.exists(MODEL_CACHE_DIR):
     os.makedirs(MODEL_CACHE_DIR)
 # Verify structure after moving
 print("\n📂 LLM Model Structure (Build Level):")
 for root, dirs, files in os.walk(MODEL_CACHE_DIR):
 ### --- B. translation modules ---
+# Optional pre-download of translation models. These can be very large and
+# may exceed build storage limits on constrained environments (e.g., HF Spaces).
+# Control with env var PRELOAD_TRANSLATORS ("1" to enable; default: disabled).
+PRELOAD_TRANSLATORS = os.getenv("PRELOAD_TRANSLATORS", "0")
+if PRELOAD_TRANSLATORS == "1":
+    try:
+        from transformers import pipeline
+        print("⏬ Pre-downloading Vietnamese–English translator...")
+        _ = pipeline("translation", model="VietAI/envit5-translation", src_lang="vi", tgt_lang="en", device=-1)
+        print("⏬ Pre-downloading Chinese–English translator...")
+        _ = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=-1)
+        print("✅ Translators preloaded.")
+    except Exception as e:
+        print(f"⚠️ Skipping translator preload due to error: {e}")
+else:
+    print("ℹ️ Skipping translator pre-download (PRELOAD_TRANSLATORS != '1'). They will lazy-load at runtime.")

memory.py CHANGED Viewed

@@ -421,6 +421,4 @@ class MemoryManager:
                     first = " ".join(words[:16])
                 # ensure capitalized
                 return first.strip().rstrip(':')
-        return topic

                     first = " ".join(words[:16])
                 # ensure capitalized
                 return first.strip().rstrip(':')
+        return topic

warmup.py CHANGED Viewed

@@ -1,8 +1,17 @@
 from sentence_transformers import SentenceTransformer
 import torch
 print("🚀 Warming up model...")
 embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
-embedding_model = embedding_model.half()  # Reduce memory
 embedding_model.to(torch.device("cpu"))
 print("✅ Model warm-up complete!")

 from sentence_transformers import SentenceTransformer
 import torch
+import os
 print("🚀 Warming up model...")
 embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
+# Some CPU backends on HF Spaces fail on .half(); make it configurable
+USE_HALF = os.getenv("EMBEDDING_HALF", "1") == "1"
+try:
+    if USE_HALF and torch.cuda.is_available():
+        embedding_model = embedding_model.half()
+except Exception as e:
+    print(f"⚠️ Skipping half precision due to: {e}")
 embedding_model.to(torch.device("cpu"))
 print("✅ Model warm-up complete!")