david-thrower commited on
Commit
2bb4d5a
·
verified ·
1 Parent(s): 9442ab6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import bitsandbytes
 
5
 
6
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -12,8 +13,12 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
 
13
  model =\
14
  AutoModelForCausalLM\
15
- .from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)\
16
- .to(DEVICE)
 
 
 
 
17
 
18
  #########
19
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, HqqConfig
4
+
5
+ quant_config = HqqConfig(nbits=8, group_size=64)
6
 
7
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
8
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
13
 
14
  model =\
15
  AutoModelForCausalLM\
16
+ .from_pretrained(
17
+ MODEL_ID,
18
+ torch_dtype=torch.float16,
19
+ # device_map="cuda",
20
+ quantization_config=quant_config
21
+ ).to(DEVICE)
22
 
23
  #########
24