Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Iterator
|
|
| 5 |
import gradio as gr
|
| 6 |
import torch
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
|
| 8 |
|
| 9 |
MAX_MAX_NEW_TOKENS = 4096
|
| 10 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
@@ -16,7 +17,7 @@ DESCRIPTION = """\
|
|
| 16 |
|
| 17 |
# Load model with appropriate device configuration
|
| 18 |
def load_model():
|
| 19 |
-
model_id = "CreitinGameplays/
|
| 20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
|
| 22 |
# If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
|
|
@@ -30,7 +31,8 @@ def load_model():
|
|
| 30 |
model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
model_id,
|
| 32 |
torch_dtype=torch.float16,
|
| 33 |
-
device_map="auto"
|
|
|
|
| 34 |
)
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
|
| 36 |
tokenizer.use_default_system_prompt = False
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
import torch
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 8 |
+
import bitsandbytes
|
| 9 |
|
| 10 |
MAX_MAX_NEW_TOKENS = 4096
|
| 11 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
|
|
| 17 |
|
| 18 |
# Load model with appropriate device configuration
|
| 19 |
def load_model():
|
| 20 |
+
model_id = "CreitinGameplays/Mistral-Nemo-12B-R1-v0.1"
|
| 21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 22 |
|
| 23 |
# If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
|
|
|
|
| 31 |
model = AutoModelForCausalLM.from_pretrained(
|
| 32 |
model_id,
|
| 33 |
torch_dtype=torch.float16,
|
| 34 |
+
device_map="auto",
|
| 35 |
+
load_in_8bit=True
|
| 36 |
)
|
| 37 |
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
|
| 38 |
tokenizer.use_default_system_prompt = False
|