Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
042f856
1
Parent(s):
e809d91
Add Zero GPU support for H200 GPU on Hugging Face Spaces
Browse files- Import spaces module for Zero GPU support
- Add @spaces.GPU decorator to inference function with 5-minute duration
- Fix device selection to properly use GPU when available
- Use model.to(device) instead of model.cuda() for consistency
- Print GPU name when running on GPU
- Increase batch size to 50 for GPU inference
- Add spaces>=0.19.0 to requirements.txt
- app.py +8 -4
- requirements.txt +1 -0
app.py
CHANGED
@@ -17,6 +17,7 @@ import zipfile
|
|
17 |
import shutil
|
18 |
import uuid
|
19 |
import gc
|
|
|
20 |
|
21 |
# Add current directory to path
|
22 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
@@ -224,6 +225,7 @@ def get_mutated_protein(sequence,mutant):
|
|
224 |
mutated_sequence[int(mutant[1:-1])-1]=mutant[-1]
|
225 |
return ''.join(mutated_sequence)
|
226 |
|
|
|
227 |
def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutation_range_end=None,model_type="Large",scoring_mirror=False,batch_size_inference=20,max_number_positions_per_heatmap=50,num_workers=0,AA_vocab=AA_vocab):
|
228 |
# Clean up old files periodically
|
229 |
cleanup_old_files()
|
@@ -259,14 +261,16 @@ def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutat
|
|
259 |
model_path = download_model_from_hf("Tranception_Medium")
|
260 |
model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
|
261 |
|
262 |
-
# Device selection -
|
263 |
if torch.cuda.is_available():
|
264 |
device = torch.device("cuda")
|
265 |
-
model.
|
266 |
-
print("Inference will take place on
|
|
|
|
|
267 |
else:
|
268 |
device = torch.device("cpu")
|
269 |
-
model.to(device)
|
270 |
print("Inference will take place on CPU")
|
271 |
# Reduce batch size for CPU inference
|
272 |
batch_size_inference = min(batch_size_inference, 10)
|
|
|
17 |
import shutil
|
18 |
import uuid
|
19 |
import gc
|
20 |
+
import spaces
|
21 |
|
22 |
# Add current directory to path
|
23 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
225 |
mutated_sequence[int(mutant[1:-1])-1]=mutant[-1]
|
226 |
return ''.join(mutated_sequence)
|
227 |
|
228 |
+
@spaces.GPU(duration=300) # Request GPU for up to 5 minutes
|
229 |
def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutation_range_end=None,model_type="Large",scoring_mirror=False,batch_size_inference=20,max_number_positions_per_heatmap=50,num_workers=0,AA_vocab=AA_vocab):
|
230 |
# Clean up old files periodically
|
231 |
cleanup_old_files()
|
|
|
261 |
model_path = download_model_from_hf("Tranception_Medium")
|
262 |
model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path=model_path)
|
263 |
|
264 |
+
# Device selection - Zero GPU will provide CUDA when decorated with @spaces.GPU
|
265 |
if torch.cuda.is_available():
|
266 |
device = torch.device("cuda")
|
267 |
+
model = model.to(device)
|
268 |
+
print(f"Inference will take place on {torch.cuda.get_device_name(0)}")
|
269 |
+
# Increase batch size for GPU inference
|
270 |
+
batch_size_inference = min(batch_size_inference, 50)
|
271 |
else:
|
272 |
device = torch.device("cpu")
|
273 |
+
model = model.to(device)
|
274 |
print("Inference will take place on CPU")
|
275 |
# Reduce batch size for CPU inference
|
276 |
batch_size_inference = min(batch_size_inference, 10)
|
requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
torch>=1.12.0
|
2 |
transformers==4.17.0
|
3 |
tokenizers==0.11.6
|
|
|
4 |
numpy>=1.21.0,<1.24.0
|
5 |
pandas>=1.3.0,<2.0.0
|
6 |
scipy>=1.7.0,<1.11.0
|
|
|
1 |
torch>=1.12.0
|
2 |
transformers==4.17.0
|
3 |
tokenizers==0.11.6
|
4 |
+
spaces>=0.19.0
|
5 |
numpy>=1.21.0,<1.24.0
|
6 |
pandas>=1.3.0,<2.0.0
|
7 |
scipy>=1.7.0,<1.11.0
|