Spaces:

1inkusFace
/

qwen2.5-32b-instruct

Paused

1inkusFace commited on Apr 25

Commit

a0a145a

verified ·

1 Parent(s): 61e9845

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,18 +4,21 @@ import torch
 import gradio as gr
 import os
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
 torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 torch.backends.cudnn.benchmark = False
-#torch.backends.cuda.preferred_blas_library="cublas"
-# torch.backends.cuda.preferred_linalg_library="cusolver"
 torch.set_float32_matmul_precision("highest")
-os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
-os.environ["SAFETENSORS_FAST_GPU"] = "1"
 model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"

 import gradio as gr
 import os
+os.putenv('TORCH_LINALG_PREFER_CUSOLVER','1')
+os.putenv('PYTORCH_CUDA_ALLOC_CONF','max_split_size_mb:128')
+os.environ["SAFETENSORS_FAST_GPU"] = "1"
+os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
 torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 torch.backends.cudnn.benchmark = False
+torch.backends.cuda.preferred_blas_library="cublas"
+torch.backends.cuda.preferred_linalg_library="cusolver"
 torch.set_float32_matmul_precision("highest")
 model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"