Spaces:

1inkusFace
/

qwen2.5-32b-instruct

Running on Zero

1inkusFace commited on 21 days ago

Commit

140e826

verified ·

1 Parent(s): 72f8b83

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,13 +16,13 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 import gradio as gr
-torch.backends.cuda.matmul.allow_tf32 = False
-torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
-torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
-torch.backends.cudnn.allow_tf32 = False
-torch.backends.cudnn.deterministic = False
-torch.backends.cudnn.benchmark = False
-torch.set_float32_matmul_precision("highest")
 # --- Model and Tokenizer Configuration ---
 model_name = "FelixChao/vicuna-33b-coder"
@@ -35,7 +35,7 @@ quantization_config_4bit = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
 )
 print(f"Loading model: {model_name} with quantization")
@@ -119,7 +119,7 @@ def generate_code(prompt: str) -> str:
     with torch.no_grad():
         generated_ids = model.generate(
             **model_inputs, # Pass tokenized inputs
-            max_new_tokens=1024,
             min_new_tokens=256,
             do_sample=True,
             temperature=0.7,

 import torch
 import gradio as gr
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True
+torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
+torch.backends.cudnn.allow_tf32 = True
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = True
+torch.set_float32_matmul_precision("high")
 # --- Model and Tokenizer Configuration ---
 model_name = "FelixChao/vicuna-33b-coder"
     load_in_4bit=True,
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16
 )
 print(f"Loading model: {model_name} with quantization")
     with torch.no_grad():
         generated_ids = model.generate(
             **model_inputs, # Pass tokenized inputs
+            max_new_tokens=768,
             min_new_tokens=256,
             do_sample=True,
             temperature=0.7,