Merge branch 'main' of https://huggingface.co/spaces/Pipatpong/VCM_Demo
Browse files
app.py
CHANGED
@@ -3,12 +3,15 @@
|
|
3 |
import gradio as gr
|
4 |
import re
|
5 |
import torch
|
6 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
|
8 |
checkpoint = "Pipatpong/vcm_santa"
|
9 |
-
device = "cuda" if torch.cuda.is_available() else "
|
|
|
|
|
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
|
11 |
-
model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True, device_map="auto")
|
12 |
|
13 |
def generate(text, max_length, num_return_sequences=1):
|
14 |
inputs = tokenizer.encode(text, padding=False, add_special_tokens=False, return_tensors="pt")
|
|
|
3 |
import gradio as gr
|
4 |
import re
|
5 |
import torch
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
7 |
|
8 |
checkpoint = "Pipatpong/vcm_santa"
|
9 |
+
device = "cuda" if torch.cuda.is_available() else "CPU"
|
10 |
+
|
11 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit_fp32_cpu_offload=True)
|
12 |
+
|
13 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
|
14 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True, low_cpu_mem_usage=True, load_in_8bit=True, device_map="auto", quantization_config=quantization_config)
|
15 |
|
16 |
def generate(text, max_length, num_return_sequences=1):
|
17 |
inputs = tokenizer.encode(text, padding=False, add_special_tokens=False, return_tensors="pt")
|