Pipatpong commited on
Commit
2bc1bd8
·
2 Parent(s): a465184 b0400a5

Merge branch 'main' of https://huggingface.co/spaces/Pipatpong/VCM_Demo

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -3,12 +3,15 @@
3
  import gradio as gr
4
  import re
5
  import torch
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  checkpoint = "Pipatpong/vcm_santa"
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
10
  tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
11
- model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True, device_map="auto")
12
 
13
  def generate(text, max_length, num_return_sequences=1):
14
  inputs = tokenizer.encode(text, padding=False, add_special_tokens=False, return_tensors="pt")
 
3
  import gradio as gr
4
  import re
5
  import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
7
 
8
  checkpoint = "Pipatpong/vcm_santa"
9
+ device = "cuda" if torch.cuda.is_available() else "CPU"
10
+
11
+ quantization_config = BitsAndBytesConfig(load_in_8bit_fp32_cpu_offload=True)
12
+
13
  tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True, low_cpu_mem_usage=True, load_in_8bit=True, device_map="auto", quantization_config=quantization_config)
15
 
16
  def generate(text, max_length, num_return_sequences=1):
17
  inputs = tokenizer.encode(text, padding=False, add_special_tokens=False, return_tensors="pt")