Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,12 +11,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
11 |
|
12 |
# === GPTQ 2-bit QUANTIZATION CONFIG ===
|
13 |
quantize_config = BaseQuantizeConfig(
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
compute_dtype=torch.float16,
|
18 |
-
use_double_quant=True,
|
19 |
-
quant_type="nf4"
|
20 |
)
|
21 |
|
22 |
# === LOAD GPTQ-QUANTIZED MODEL ===
|
@@ -132,4 +129,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
132 |
|
133 |
clear_button.click(lambda: ([], []), None, [chatbot, history_state])
|
134 |
|
135 |
-
demo.launch(ssr_mode=False)
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# === GPTQ 2-bit QUANTIZATION CONFIG ===
|
13 |
quantize_config = BaseQuantizeConfig(
|
14 |
+
bits=2, # 2-bit quantization
|
15 |
+
group_size=128, # grouping size
|
16 |
+
desc_act=False # disable descending activations for speed
|
|
|
|
|
|
|
17 |
)
|
18 |
|
19 |
# === LOAD GPTQ-QUANTIZED MODEL ===
|
|
|
129 |
|
130 |
clear_button.click(lambda: ([], []), None, [chatbot, history_state])
|
131 |
|
132 |
+
demo.launch(ssr_mode=False)
|
133 |
+
|
134 |
+
# Note:
|
135 |
+
# To get CUDA extensions (nf4, double quant, etc.) back, reinstall AutoGPTQ with CUDA support:
|
136 |
+
# pip install git+https://github.com/PanQiWei/AutoGPTQ.git#egg=auto-gptq[cuda]
|