fix: set smaller max_new_tokens
Browse files
app.py
CHANGED
@@ -143,6 +143,8 @@ def initialize_model():
|
|
143 |
tokenizer, model, spt = load_model(MODEL_PATH, SPT_CONFIG_PATH, SPT_CHECKPOINT_PATH)
|
144 |
spt = spt.to(device)
|
145 |
model = model.to(device)
|
|
|
|
|
146 |
print("Model initialization completed!")
|
147 |
|
148 |
return tokenizer, model, spt, device
|
|
|
143 |
tokenizer, model, spt = load_model(MODEL_PATH, SPT_CONFIG_PATH, SPT_CHECKPOINT_PATH)
|
144 |
spt = spt.to(device)
|
145 |
model = model.to(device)
|
146 |
+
# limit max new tokens to avoid timeouts
|
147 |
+
model.generation_config.max_new_tokens = 4096
|
148 |
print("Model initialization completed!")
|
149 |
|
150 |
return tokenizer, model, spt, device
|