Update app.py
Browse files
app.py
CHANGED
@@ -116,12 +116,15 @@ snapshot_download(
|
|
116 |
allow_patterns=["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"],
|
117 |
)
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
124 |
|
|
|
125 |
decoding_params = BeamSearchParams(beam_width=4,
|
126 |
max_tokens=512)
|
127 |
|
|
|
116 |
allow_patterns=["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"],
|
117 |
)
|
118 |
|
119 |
+
@spaces.GPU(duration=120)
|
120 |
+
def load_model(save_dir):
|
121 |
+
model = LLM(model=save_dir,
|
122 |
+
max_num_seqs=512,
|
123 |
+
# tensor_parallel_size=8,
|
124 |
+
enable_prefix_caching=True,
|
125 |
+
gpu_memory_utilization=0.95)
|
126 |
|
127 |
+
model = load_model(save_dir)
|
128 |
decoding_params = BeamSearchParams(beam_width=4,
|
129 |
max_tokens=512)
|
130 |
|