Spaces:

Aneeshmishra
/

Tridev

Sleeping

Aneeshmishra commited on Jul 4

Commit

a2f4562

verified ·

1 Parent(s): 05b7b67

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,24 +1,20 @@
-import os, textwrap, torch, gradio as gr
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    BitsAndBytesConfig,
-    pipeline,
-)
-MODEL_ID = "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"   # ← 8 GB quantised
-bnb_cfg  = BitsAndBytesConfig(load_in_4bit=True)
-tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     device_map="auto",
     trust_remote_code=True,
-    quantization_config=bnb_cfg,
 )
-prompt_tmpl = (
     "Summarise the following transcript in short in 1 or 2 paragraph and point wise and don't miss any key information cover all"
 )

+from transformers import BitsAndBytesConfig
+MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"   # FP16 weights
+bnb_cfg  = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,   # keeps mat-mul fast
+)
+tok   = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     device_map="auto",
     trust_remote_code=True,
+    quantization_config=bnb_cfg,            # perfectly fine here
 )
+prompt_tpl = (
     "Summarise the following transcript in short in 1 or 2 paragraph and point wise and don't miss any key information cover all"
 )