Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from threading import Thread
|
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
MODEL = "AGI-0/Artificium-llama3.1-8B-001"
|
12 |
|
13 |
-
TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Artificium-llama3.1-8B-001" title="Visit the model repository on Hugging Face">AGI-0/Artificium-llama3.1-8B-001</a> please leave a like to the repository if you liked it
|
14 |
|
15 |
PLACEHOLDER = """
|
16 |
<center>
|
@@ -36,6 +36,7 @@ device = "cuda" # for GPU usage or "cpu" for CPU usage
|
|
36 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
37 |
model = AutoModelForCausalLM.from_pretrained(
|
38 |
MODEL,
|
|
|
39 |
torch_dtype=torch.bfloat16,
|
40 |
device_map="auto")
|
41 |
|
|
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
MODEL = "AGI-0/Artificium-llama3.1-8B-001"
|
12 |
|
13 |
+
TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Artificium-llama3.1-8B-001" title="Visit the model repository on Hugging Face">AGI-0/Artificium-llama3.1-8B-001</a> please leave a like to the repository if you liked it. THIS INFERENCE IS 4-Bit Quantized</h2>"""
|
14 |
|
15 |
PLACEHOLDER = """
|
16 |
<center>
|
|
|
36 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
37 |
model = AutoModelForCausalLM.from_pretrained(
|
38 |
MODEL,
|
39 |
+
load_in_4bit=True,
|
40 |
torch_dtype=torch.bfloat16,
|
41 |
device_map="auto")
|
42 |
|