asadsandhu commited on
Commit
876d145
·
1 Parent(s): dd74b32
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import faiss
4
  import numpy as np
5
  import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
  from sentence_transformers import SentenceTransformer
8
 
9
  # Load retrieval corpus & FAISS index
@@ -12,14 +12,21 @@ index = faiss.read_index("faiss_index.bin")
12
 
13
  # Load embedding model
14
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
15
-
16
- # Swap to BioMedLM 2.7B (CPU-friendly biomedical model)
17
  model_id = "stanford-crfm/BioMedLM"
18
 
 
 
 
 
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- tokenizer.pad_token = tokenizer.eos_token # fix padding issue
21
 
22
- generation_model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
23
 
24
  def retrieve_top_k(query, k=5):
25
  query_embedding = embedding_model.encode([query]).astype("float32")
 
3
  import faiss
4
  import numpy as np
5
  import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
  from sentence_transformers import SentenceTransformer
8
 
9
  # Load retrieval corpus & FAISS index
 
12
 
13
  # Load embedding model
14
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
15
  model_id = "stanford-crfm/BioMedLM"
16
 
17
+ bnb_config = BitsAndBytesConfig(
18
+ load_in_8bit=True,
19
+ llm_int8_threshold=6.0,
20
+ )
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ tokenizer.pad_token = tokenizer.eos_token
24
 
25
+ generation_model = AutoModelForCausalLM.from_pretrained(
26
+ model_id,
27
+ device_map="auto",
28
+ quantization_config=bnb_config,
29
+ )
30
 
31
  def retrieve_top_k(query, k=5):
32
  query_embedding = embedding_model.encode([query]).astype("float32")