YuLu0713 commited on
Commit
f54abf9
·
verified ·
1 Parent(s): cdd30fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -5
app.py CHANGED
@@ -116,15 +116,13 @@ snapshot_download(
116
  allow_patterns=["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"],
117
  )
118
 
119
- @spaces.GPU(duration=120)
120
- def load_model(save_dir):
121
- model = LLM(model=save_dir,
122
  max_num_seqs=512,
123
  # tensor_parallel_size=8,
124
  enable_prefix_caching=True,
125
- gpu_memory_utilization=0.95)
126
 
127
- model = load_model(save_dir)
128
  decoding_params = BeamSearchParams(beam_width=4,
129
  max_tokens=512)
130
 
 
116
  allow_patterns=["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"],
117
  )
118
 
119
+ device = "cuda"
120
+ model = LLM(model=save_dir,
 
121
  max_num_seqs=512,
122
  # tensor_parallel_size=8,
123
  enable_prefix_caching=True,
124
+ gpu_memory_utilization=0.95).to(device)
125
 
 
126
  decoding_params = BeamSearchParams(beam_width=4,
127
  max_tokens=512)
128