DesiredName commited on
Commit
bc8a564
·
verified ·
1 Parent(s): d909a05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -6,8 +6,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
6
  model_name = "Tap-M/Luna-AI-Llama2-Uncensored"
7
 
8
  bnb_config = BitsAndBytesConfig(
9
- load_in_4bit=True, # Enable 4-bit quantization
10
- bnb_4bit_compute_dtype=torch.float16
 
 
11
  )
12
 
13
  model = AutoModelForCausalLM.from_pretrained(
@@ -15,7 +17,6 @@ model = AutoModelForCausalLM.from_pretrained(
15
  device_map="auto", # Auto-distribute across GPU/CPU
16
  quantization_config=bnb_config,
17
  offload_folder="./offload", # Temporary directory
18
- low_cpu_mem_usage=True, # Reduces CPU memory spikes
19
  trust_remote_code=True # Required for some models
20
  )
21
 
 
6
  model_name = "Tap-M/Luna-AI-Llama2-Uncensored"
7
 
8
  bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ bnb_4bit_quant_type="nf4", # Must be 'nf4' for CPU compatibility
11
+ bnb_4bit_compute_dtype="float32", # Use float32 for CPU computation
12
+ bnb_4bit_use_double_quant=True
13
  )
14
 
15
  model = AutoModelForCausalLM.from_pretrained(
 
17
  device_map="auto", # Auto-distribute across GPU/CPU
18
  quantization_config=bnb_config,
19
  offload_folder="./offload", # Temporary directory
 
20
  trust_remote_code=True # Required for some models
21
  )
22