Pipatpong commited on
Commit
716692e
·
1 Parent(s): 739d665

add config

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -6,9 +6,12 @@ import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  checkpoint = "Pipatpong/vcm_santa"
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
10
  tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
11
- model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True, device_map="auto")
12
 
13
  def generate(text, max_length, num_return_sequences=1):
14
  inputs = tokenizer.encode(text, padding=False, add_special_tokens=False, return_tensors="pt")
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  checkpoint = "Pipatpong/vcm_santa"
9
+ device = "cuda" if torch.cuda.is_available() else "CPU"
10
+
11
+ quantization_config = BitsAndBytesConfig(load_in_8bit_fp32_cpu_offload=True)
12
+
13
  tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True, low_cpu_mem_usage=True, load_in_8bit=True, device_map="auto", quantization_config=quantization_config)
15
 
16
  def generate(text, max_length, num_return_sequences=1):
17
  inputs = tokenizer.encode(text, padding=False, add_special_tokens=False, return_tensors="pt")