david-thrower commited on
Commit
8c33b5c
·
verified ·
1 Parent(s): 4c1dfd9

Update app.py

Browse files

1. Corrected model variable name
2. Added garbage collection after the quantization (Hoping it reduces RAM spend)

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -37,13 +37,13 @@ quantization_config = TorchAoConfig(quant_type=quant_config)
37
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
40
- quantized_model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_ID,
42
  torch_dtype="auto",
43
  device_map="auto",
44
- quantization_config=quantization_config
45
- )
46
 
 
47
 
48
  #########
49
 
 
37
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
40
+ model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_ID,
42
  torch_dtype="auto",
43
  device_map="auto",
44
+ quantization_config=quantization_config)
 
45
 
46
+ gc.collect()
47
 
48
  #########
49