DesiredName commited on
Commit
70f94ec
·
verified ·
1 Parent(s): 1aba7f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -1,12 +1,20 @@
1
  from fastapi import FastAPI
2
  import uvicorn
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  model_name = "Tap-M/Luna-AI-Llama2-Uncensored"
6
 
 
 
 
 
 
7
  model = AutoModelForCausalLM.from_pretrained(
8
  model_name, # Example model
9
  device_map="auto", # Auto-distribute across GPU/CPU
 
 
 
10
  trust_remote_code=True # Required for some models
11
  )
12
 
 
1
  from fastapi import FastAPI
2
  import uvicorn
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
 
5
  model_name = "Tap-M/Luna-AI-Llama2-Uncensored"
6
 
7
+ bnb_config = BitsAndBytesConfig(
8
+ load_in_4bit=True, # Enable 4-bit quantization
9
+ bnb_4bit_compute_dtype=torch.float16
10
+ )
11
+
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name, # Example model
14
  device_map="auto", # Auto-distribute across GPU/CPU
15
+ quantization_config=bnb_config,
16
+ offload_folder="./offload", # Temporary directory
17
+ low_cpu_mem_usage=True, # Reduces CPU memory spikes
18
  trust_remote_code=True # Required for some models
19
  )
20