david-thrower commited on
Commit
16b18f5
·
verified ·
1 Parent(s): 48b635b

Update app.py

Browse files

Added export to inference mode.

Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -29,9 +29,9 @@ import gradio as gr
29
 
30
  import torch
31
  from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
32
- from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Float8WeightOnlyConfig
33
  # quant_config = Float8WeightOnlyConfig()
34
- quant_config = Float8DynamicActivationFloat8WeightConfig()
35
  quantization_config = TorchAoConfig(quant_type=quant_config)
36
 
37
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
@@ -45,6 +45,10 @@ model = AutoModelForCausalLM.from_pretrained(
45
 
46
  gc.collect()
47
 
 
 
 
 
48
  #########
49
 
50
  # from unsloth import FastLanguageModel
 
29
 
30
  import torch
31
  from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
32
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Float8WeightOnlyConfig, Float4DynamicActivationFloat4WeightConfig
33
  # quant_config = Float8WeightOnlyConfig()
34
+ quant_config = Float4DynamicActivationFloat4WeightConfig() # Float8DynamicActivationFloat8WeightConfig()
35
  quantization_config = TorchAoConfig(quant_type=quant_config)
36
 
37
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
 
45
 
46
  gc.collect()
47
 
48
+ pt2e.move_exported_model_to_eval(model)
49
+
50
+ gc.collect()
51
+
52
  #########
53
 
54
  # from unsloth import FastLanguageModel