MoraxCheng commited on
Commit
042f856
·
1 Parent(s): e809d91

Add Zero GPU support for H200 GPU on Hugging Face Spaces

Browse files

- Import spaces module for Zero GPU support
- Add @spaces.GPU decorator to inference function with 5-minute duration
- Fix device selection to properly use GPU when available
- Use model.to(device) instead of model.cuda() for consistency
- Print GPU name when running on GPU
- Increase batch size to 50 for GPU inference
- Add spaces>=0.19.0 to requirements.txt

Files changed (2) hide show
  1. app.py +8 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -17,6 +17,7 @@ import zipfile
17
  import shutil
18
  import uuid
19
  import gc
 
20
 
21
  # Add current directory to path
22
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -224,6 +225,7 @@ def get_mutated_protein(sequence,mutant):
224
  mutated_sequence[int(mutant[1:-1])-1]=mutant[-1]
225
  return ''.join(mutated_sequence)
226
 
 
227
  def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutation_range_end=None,model_type="Large",scoring_mirror=False,batch_size_inference=20,max_number_positions_per_heatmap=50,num_workers=0,AA_vocab=AA_vocab):
228
  # Clean up old files periodically
229
  cleanup_old_files()
@@ -259,14 +261,16 @@ def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutat
259
  model_path = download_model_from_hf("Tranception_Medium")
260
  model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
261
 
262
- # Device selection - for HF Spaces, typically CPU
263
  if torch.cuda.is_available():
264
  device = torch.device("cuda")
265
- model.cuda()
266
- print("Inference will take place on NVIDIA GPU")
 
 
267
  else:
268
  device = torch.device("cpu")
269
- model.to(device)
270
  print("Inference will take place on CPU")
271
  # Reduce batch size for CPU inference
272
  batch_size_inference = min(batch_size_inference, 10)
 
17
  import shutil
18
  import uuid
19
  import gc
20
+ import spaces
21
 
22
  # Add current directory to path
23
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 
225
  mutated_sequence[int(mutant[1:-1])-1]=mutant[-1]
226
  return ''.join(mutated_sequence)
227
 
228
+ @spaces.GPU(duration=300) # Request GPU for up to 5 minutes
229
  def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutation_range_end=None,model_type="Large",scoring_mirror=False,batch_size_inference=20,max_number_positions_per_heatmap=50,num_workers=0,AA_vocab=AA_vocab):
230
  # Clean up old files periodically
231
  cleanup_old_files()
 
261
  model_path = download_model_from_hf("Tranception_Medium")
262
  model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
263
 
264
+ # Device selection - Zero GPU will provide CUDA when decorated with @spaces.GPU
265
  if torch.cuda.is_available():
266
  device = torch.device("cuda")
267
+ model = model.to(device)
268
+ print(f"Inference will take place on {torch.cuda.get_device_name(0)}")
269
+ # Increase batch size for GPU inference
270
+ batch_size_inference = min(batch_size_inference, 50)
271
  else:
272
  device = torch.device("cpu")
273
+ model = model.to(device)
274
  print("Inference will take place on CPU")
275
  # Reduce batch size for CPU inference
276
  batch_size_inference = min(batch_size_inference, 10)
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  torch>=1.12.0
2
  transformers==4.17.0
3
  tokenizers==0.11.6
 
4
  numpy>=1.21.0,<1.24.0
5
  pandas>=1.3.0,<2.0.0
6
  scipy>=1.7.0,<1.11.0
 
1
  torch>=1.12.0
2
  transformers==4.17.0
3
  tokenizers==0.11.6
4
+ spaces>=0.19.0
5
  numpy>=1.21.0,<1.24.0
6
  pandas>=1.3.0,<2.0.0
7
  scipy>=1.7.0,<1.11.0