Aneeshmishra commited on
Commit
a2f4562
·
verified ·
1 Parent(s): 05b7b67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -13
app.py CHANGED
@@ -1,24 +1,20 @@
1
- import os, textwrap, torch, gradio as gr
2
- from transformers import (
3
- AutoTokenizer,
4
- AutoModelForCausalLM,
5
- BitsAndBytesConfig,
6
- pipeline,
7
- )
8
-
9
- MODEL_ID = "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ" # ← 8 GB quantised
10
- bnb_cfg = BitsAndBytesConfig(load_in_4bit=True)
11
 
12
- tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
 
 
 
 
13
 
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_ID,
16
  device_map="auto",
17
  trust_remote_code=True,
18
- quantization_config=bnb_cfg,
19
  )
20
 
21
- prompt_tmpl = (
22
  "Summarise the following transcript in short in 1 or 2 paragraph and point wise and don't miss any key information cover all"
23
  )
24
 
 
1
+ from transformers import BitsAndBytesConfig
 
 
 
 
 
 
 
 
 
2
 
3
+ MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1" # FP16 weights
4
+ bnb_cfg = BitsAndBytesConfig(
5
+ load_in_4bit=True,
6
+ bnb_4bit_compute_dtype=torch.float16, # keeps mat-mul fast
7
+ )
8
 
9
+ tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_ID,
12
  device_map="auto",
13
  trust_remote_code=True,
14
+ quantization_config=bnb_cfg, # perfectly fine here
15
  )
16
 
17
+ prompt_tpl = (
18
  "Summarise the following transcript in short in 1 or 2 paragraph and point wise and don't miss any key information cover all"
19
  )
20