FlawedLLM commited on
Commit
e1efcdb
·
verified ·
1 Parent(s): d9389a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -13
app.py CHANGED
@@ -3,20 +3,32 @@ import spaces
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  import torch
 
6
 
7
- tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
8
- quantization_config = BitsAndBytesConfig(
9
- load_in_4bit=True,
10
- bnb_4bit_use_double_quant=True,
11
- bnb_4bit_quant_type="nf4",
12
- bnb_4bit_compute_dtype=torch.float16)
13
- model = AutoModelForCausalLM.from_pretrained("FlawedLLM/BhashiniLLM",
14
- device_map="auto",
15
- quantization_config=quantization_config,
16
- torch_dtype =torch.float16,
17
- low_cpu_mem_usage=True,
18
- use_safetensors=True,
19
- )
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  @spaces.GPU(duration=300)
 
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  import torch
6
+ from peft import PeftModel
7
 
8
+
9
+ # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
10
+ # quantization_config = BitsAndBytesConfig(
11
+ # load_in_4bit=True,
12
+ # bnb_4bit_use_double_quant=True,
13
+ # bnb_4bit_quant_type="nf4",
14
+ # bnb_4bit_compute_dtype=torch.float16)
15
+ # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/BhashiniLLM",
16
+ # device_map="auto",
17
+ # quantization_config=quantization_config,
18
+ # torch_dtype =torch.float16,
19
+ # low_cpu_mem_usage=True,
20
+ # use_safetensors=True,
21
+ # )
22
+
23
+ # Assuming you have your HF repository in this format: "your_username/your_model_name"
24
+ model_id = "FlawedLLM/BhashiniLLM"
25
+
26
+ # Load the base model (the one you fine-tuned with LoRA)
27
+ base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
28
+
29
+ # Load the LoRA adapter weights
30
+ model = PeftModel.from_pretrained(base_model, model_id)
31
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
32
 
33
 
34
  @spaces.GPU(duration=300)