Spaces:

MoraxCheng
/

Transeption_iGEM_BASISCHINA_2025

Running on Zero

App Files Files Community

MoraxCheng commited on 21 days ago

Commit

a0e970b

1 Parent(s): 86d5c5f

Implement model caching and preload functionality to optimize loading in Zero GPU spaces

Browse files

Files changed (1) hide show

app.py +77 -30

app.py CHANGED Viewed

@@ -4,6 +4,11 @@ Tranception Design App - Hugging Face Spaces Version (Zero GPU Fixed)
 """
 import os
 import sys
 import torch
 import transformers
 from transformers import PreTrainedTokenizerFast
@@ -63,15 +68,49 @@ if not os.path.exists("tranception"):
 import tranception
 from tranception import config, model_pytorch
-# Download model checkpoints if not present
-def download_model_from_hf(model_name):
-    """Download model from Hugging Face Hub if not present locally"""
-    model_path = f"./{model_name}"
-    if not os.path.exists(model_path):
-        print(f"Loading {model_name} model from Hugging Face Hub...")
-        # All models are available on HF Hub
-        return f"PascalNotin/{model_name}"
-    return model_path
 AA_vocab = "ACDEFGHIKLMNPQRSTVWY"
 tokenizer = PreTrainedTokenizerFast(tokenizer_file="./tranception/utils/tokenizers/Basic_tokenizer",
@@ -265,22 +304,11 @@ def score_and_create_matrix_all_singles_impl(sequence,mutation_range_start=None,
   assert mutation_range_start <= mutation_range_end, "mutation range is invalid"
   assert mutation_range_end <= len(sequence), f"End position ({mutation_range_end}) exceeds sequence length ({len(sequence)})"
-  # Load model with HF Space compatibility
-  try:
-    if model_type=="Small":
-      model_path = download_model_from_hf("Tranception_Small")
-      model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
-    elif model_type=="Medium":
-      model_path = download_model_from_hf("Tranception_Medium")
-      model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
-    elif model_type=="Large":
-      model_path = download_model_from_hf("Tranception_Large")
-      model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
-  except Exception as e:
-    print(f"Error loading {model_type} model: {e}")
-    print("Falling back to Medium model...")
-    model_path = download_model_from_hf("Tranception_Medium")
-    model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
   # Device selection - Zero GPU will provide CUDA when decorated with @spaces.GPU
   print(f"GPU Available: {torch.cuda.is_available()}")
@@ -347,12 +375,13 @@ def score_and_create_matrix_all_singles_impl(sequence,mutation_range_start=None,
     return score_heatmaps, suggest_mutations(scores), csv_files
   finally:
-    # Always clean up model from memory
     if 'model' in locals():
-      del model
-      gc.collect()
-      if torch.cuda.is_available():
-        torch.cuda.empty_cache()
 # Apply Zero GPU decorator if available
 if SPACES_AVAILABLE:
@@ -497,7 +526,25 @@ with tranception_design:
     gr.Markdown("<p><b>Tranception: Protein Fitness Prediction with Autoregressive Transformers and Inference-time Retrieval</b><br>Pascal Notin, Mafalda Dias, Jonathan Frazer, Javier Marchena-Hurtado, Aidan N. Gomez, Debora S. Marks<sup>*</sup>, Yarin Gal<sup>*</sup><br><sup>* equal senior authorship</sup></p>")
     gr.Markdown("Links: <a href='https://proceedings.mlr.press/v162/notin22a.html' target='_blank'>Paper</a>  <a href='https://github.com/OATML-Markslab/Tranception' target='_blank'>Code</a>  <a href='https://sites.google.com/view/proteingym/substitutions' target='_blank'>ProteinGym</a>  <a href='https://igem.org/teams/5247' target='_blank'>BASIS-China iGEM Team</a>")
 if __name__ == "__main__":
     # Simple launch without queue to avoid Zero GPU conflicts
     tranception_design.launch(
         server_name="0.0.0.0",

 """
 import os
 import sys
+# Set up caching to avoid re-downloading models
+os.environ['HF_HOME'] = '/tmp/huggingface'
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface/transformers'
+os.environ['HF_DATASETS_CACHE'] = '/tmp/huggingface/datasets'
 import torch
 import transformers
 from transformers import PreTrainedTokenizerFast
 import tranception
 from tranception import config, model_pytorch
+# Model loading configuration
+MODEL_CACHE = {}
+def get_model_path(model_name):
+    """Get model path - always use HF Hub for Zero GPU spaces"""
+    # In HF Spaces, models are cached automatically by the transformers library
+    # Always return the HF Hub path to leverage this caching
+    return f"PascalNotin/{model_name}"
+def load_model_cached(model_type):
+    """Load model with caching to avoid re-downloading"""
+    global MODEL_CACHE
+    # Check if model is already in cache
+    if model_type in MODEL_CACHE:
+        print(f"Using cached {model_type} model")
+        return MODEL_CACHE[model_type]
+    print(f"Loading {model_type} model...")
+    model_name = f"Tranception_{model_type}"
+    model_path = get_model_path(model_name)
+    try:
+        # Create cache directory if it doesn't exist
+        cache_dir = "/tmp/huggingface/transformers"
+        os.makedirs(cache_dir, exist_ok=True)
+        model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(
+            pretrained_model_name_or_path=model_path,
+            cache_dir=cache_dir,
+            local_files_only=False,  # Allow downloading if not cached
+            resume_download=True     # Resume incomplete downloads
+        )
+        MODEL_CACHE[model_type] = model
+        print(f"{model_type} model loaded and cached")
+        return model
+    except Exception as e:
+        print(f"Error loading {model_type} model: {e}")
+        # Fallback to Medium if requested model fails
+        if model_type != "Medium":
+            print("Falling back to Medium model...")
+            return load_model_cached("Medium")
+        raise
 AA_vocab = "ACDEFGHIKLMNPQRSTVWY"
 tokenizer = PreTrainedTokenizerFast(tokenizer_file="./tranception/utils/tokenizers/Basic_tokenizer",
   assert mutation_range_start <= mutation_range_end, "mutation range is invalid"
   assert mutation_range_end <= len(sequence), f"End position ({mutation_range_end}) exceeds sequence length ({len(sequence)})"
+  # Load model with caching
+  model = load_model_cached(model_type)
+  # Move model to appropriate device INSIDE the GPU decorated function
+  # This is crucial for Zero GPU - the model must be moved to GPU inside the decorated function
   # Device selection - Zero GPU will provide CUDA when decorated with @spaces.GPU
   print(f"GPU Available: {torch.cuda.is_available()}")
     return score_heatmaps, suggest_mutations(scores), csv_files
   finally:
+    # Clean up GPU memory but keep model in cache
+    # Move model back to CPU to free GPU memory
     if 'model' in locals():
+      model.cpu()
+    if torch.cuda.is_available():
+      torch.cuda.empty_cache()
+    gc.collect()
 # Apply Zero GPU decorator if available
 if SPACES_AVAILABLE:
     gr.Markdown("<p><b>Tranception: Protein Fitness Prediction with Autoregressive Transformers and Inference-time Retrieval</b><br>Pascal Notin, Mafalda Dias, Jonathan Frazer, Javier Marchena-Hurtado, Aidan N. Gomez, Debora S. Marks<sup>*</sup>, Yarin Gal<sup>*</sup><br><sup>* equal senior authorship</sup></p>")
     gr.Markdown("Links: <a href='https://proceedings.mlr.press/v162/notin22a.html' target='_blank'>Paper</a>  <a href='https://github.com/OATML-Markslab/Tranception' target='_blank'>Code</a>  <a href='https://sites.google.com/view/proteingym/substitutions' target='_blank'>ProteinGym</a>  <a href='https://igem.org/teams/5247' target='_blank'>BASIS-China iGEM Team</a>")
+# Preload models function
+def preload_models():
+    """Preload models at startup to avoid downloading during inference"""
+    print("Preloading models at startup...")
+    try:
+        # Try to load Small model (fastest)
+        load_model_cached("Small")
+        print("Small model preloaded successfully")
+    except Exception as e:
+        print(f"Could not preload Small model: {e}")
+    # Optionally preload other models
+    # load_model_cached("Medium")
+    # load_model_cached("Large")
 if __name__ == "__main__":
+    # Preload models before launching
+    preload_models()
     # Simple launch without queue to avoid Zero GPU conflicts
     tranception_design.launch(
         server_name="0.0.0.0",