Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,24 +1,20 @@
|
|
1 |
-
|
2 |
-
from transformers import (
|
3 |
-
AutoTokenizer,
|
4 |
-
AutoModelForCausalLM,
|
5 |
-
BitsAndBytesConfig,
|
6 |
-
pipeline,
|
7 |
-
)
|
8 |
-
|
9 |
-
MODEL_ID = "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ" # ← 8 GB quantised
|
10 |
-
bnb_cfg = BitsAndBytesConfig(load_in_4bit=True)
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
|
|
14 |
model = AutoModelForCausalLM.from_pretrained(
|
15 |
MODEL_ID,
|
16 |
device_map="auto",
|
17 |
trust_remote_code=True,
|
18 |
-
quantization_config=bnb_cfg,
|
19 |
)
|
20 |
|
21 |
-
|
22 |
"Summarise the following transcript in short in 1 or 2 paragraph and point wise and don't miss any key information cover all"
|
23 |
)
|
24 |
|
|
|
1 |
+
from transformers import BitsAndBytesConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1" # FP16 weights
|
4 |
+
bnb_cfg = BitsAndBytesConfig(
|
5 |
+
load_in_4bit=True,
|
6 |
+
bnb_4bit_compute_dtype=torch.float16, # keeps mat-mul fast
|
7 |
+
)
|
8 |
|
9 |
+
tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
|
10 |
model = AutoModelForCausalLM.from_pretrained(
|
11 |
MODEL_ID,
|
12 |
device_map="auto",
|
13 |
trust_remote_code=True,
|
14 |
+
quantization_config=bnb_cfg, # perfectly fine here
|
15 |
)
|
16 |
|
17 |
+
prompt_tpl = (
|
18 |
"Summarise the following transcript in short in 1 or 2 paragraph and point wise and don't miss any key information cover all"
|
19 |
)
|
20 |
|