freeCS-dot-org commited on
Commit
ce5fb34
·
verified ·
1 Parent(s): a215ba5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -10,7 +10,7 @@ from threading import Thread
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL = "AGI-0/Artificium-llama3.1-8B-001"
12
 
13
- TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Artificium-llama3.1-8B-001" title="Visit the model repository on Hugging Face">AGI-0/Artificium-llama3.1-8B-001</a> please leave a like to the repository if you liked it.</h2>"""
14
 
15
  PLACEHOLDER = """
16
  <center>
@@ -36,6 +36,7 @@ device = "cuda" # for GPU usage or "cpu" for CPU usage
36
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL,
 
39
  torch_dtype=torch.bfloat16,
40
  device_map="auto")
41
 
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL = "AGI-0/Artificium-llama3.1-8B-001"
12
 
13
+ TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Artificium-llama3.1-8B-001" title="Visit the model repository on Hugging Face">AGI-0/Artificium-llama3.1-8B-001</a> please leave a like to the repository if you liked it. THIS INFERENCE IS 4-Bit Quantized</h2>"""
14
 
15
  PLACEHOLDER = """
16
  <center>
 
36
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL,
39
+ load_in_4bit=True,
40
  torch_dtype=torch.bfloat16,
41
  device_map="auto")
42