Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import torch
|
4 |
import datetime
|
5 |
|
@@ -20,12 +20,19 @@ st.session_state.setdefault('messages', [])
|
|
20 |
@st.cache_resource
|
21 |
def load_model():
|
22 |
model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with the correct model path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
24 |
model = AutoModelForCausalLM.from_pretrained(
|
25 |
model_name,
|
|
|
26 |
torch_dtype=torch.float16,
|
27 |
-
device_map="auto"
|
28 |
-
load_in_8bit=True # Optional: Use if supported for reduced memory usage
|
29 |
)
|
30 |
return tokenizer, model
|
31 |
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
3 |
import torch
|
4 |
import datetime
|
5 |
|
|
|
20 |
@st.cache_resource
|
21 |
def load_model():
|
22 |
model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with the correct model path
|
23 |
+
|
24 |
+
# Define BitsAndBytesConfig for 8-bit quantization
|
25 |
+
quantization_config = BitsAndBytesConfig(
|
26 |
+
load_in_8bit=True, # Enable 8-bit loading
|
27 |
+
llm_int8_enable_fp32_cpu_offload=True # Optional: Enables offloading to CPU
|
28 |
+
)
|
29 |
+
|
30 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
31 |
model = AutoModelForCausalLM.from_pretrained(
|
32 |
model_name,
|
33 |
+
quantization_config=quantization_config,
|
34 |
torch_dtype=torch.float16,
|
35 |
+
device_map="auto"
|
|
|
36 |
)
|
37 |
return tokenizer, model
|
38 |
|