VisoLearn commited on
Commit
692b14d
·
verified ·
1 Parent(s): 13a516c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -11,12 +11,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
11
 
12
  # === GPTQ 2-bit QUANTIZATION CONFIG ===
13
  quantize_config = BaseQuantizeConfig(
14
- load_in_4bit=False,
15
- load_in_8bit=False,
16
- quantization_bit=2,
17
- compute_dtype=torch.float16,
18
- use_double_quant=True,
19
- quant_type="nf4"
20
  )
21
 
22
  # === LOAD GPTQ-QUANTIZED MODEL ===
@@ -132,4 +129,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
132
 
133
  clear_button.click(lambda: ([], []), None, [chatbot, history_state])
134
 
135
- demo.launch(ssr_mode=False)
 
 
 
 
 
11
 
12
  # === GPTQ 2-bit QUANTIZATION CONFIG ===
13
  quantize_config = BaseQuantizeConfig(
14
+ bits=2, # 2-bit quantization
15
+ group_size=128, # grouping size
16
+ desc_act=False # disable descending activations for speed
 
 
 
17
  )
18
 
19
  # === LOAD GPTQ-QUANTIZED MODEL ===
 
129
 
130
  clear_button.click(lambda: ([], []), None, [chatbot, history_state])
131
 
132
+ demo.launch(ssr_mode=False)
133
+
134
+ # Note:
135
+ # To get CUDA extensions (nf4, double quant, etc.) back, reinstall AutoGPTQ with CUDA support:
136
+ # pip install git+https://github.com/PanQiWei/AutoGPTQ.git#egg=auto-gptq[cuda]