Update app.py
Browse files
app.py
CHANGED
@@ -116,15 +116,13 @@ snapshot_download(
|
|
116 |
allow_patterns=["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"],
|
117 |
)
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
model = LLM(model=save_dir,
|
122 |
max_num_seqs=512,
|
123 |
# tensor_parallel_size=8,
|
124 |
enable_prefix_caching=True,
|
125 |
-
gpu_memory_utilization=0.95)
|
126 |
|
127 |
-
model = load_model(save_dir)
|
128 |
decoding_params = BeamSearchParams(beam_width=4,
|
129 |
max_tokens=512)
|
130 |
|
|
|
116 |
allow_patterns=["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"],
|
117 |
)
|
118 |
|
119 |
+
device = "cuda"
|
120 |
+
model = LLM(model=save_dir,
|
|
|
121 |
max_num_seqs=512,
|
122 |
# tensor_parallel_size=8,
|
123 |
enable_prefix_caching=True,
|
124 |
+
gpu_memory_utilization=0.95).to(device)
|
125 |
|
|
|
126 |
decoding_params = BeamSearchParams(beam_width=4,
|
127 |
max_tokens=512)
|
128 |
|