FlawedLLM commited on
Commit
5678d62
·
verified ·
1 Parent(s): acee492

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -11,9 +11,8 @@ from bitsandbytes.functional import quantize_blockwise
11
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
12
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
13
  def quantize_model(model):
14
- # Iterate over model parameters, not the entire model object
15
  for name, module in model.named_modules():
16
- if isinstance(module, torch.nn.Linear): # Quantize only Linear layers
17
  module = quantize_blockwise(module)
18
 
19
  # Quantize the model (modified)
 
11
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
12
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
13
  def quantize_model(model):
 
14
  for name, module in model.named_modules():
15
+ if isinstance(module, torch.nn.Linear) and not isinstance(module, torch.nn.Linear4bit):
16
  module = quantize_blockwise(module)
17
 
18
  # Quantize the model (modified)