FlawedLLM
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,10 +3,10 @@ import spaces
|
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 5 |
import torch
|
| 6 |
-
from peft import PeftModel
|
| 7 |
|
| 8 |
|
| 9 |
-
|
| 10 |
# quantization_config = BitsAndBytesConfig(
|
| 11 |
# load_in_4bit=True,
|
| 12 |
# bnb_4bit_use_double_quant=True,
|
|
@@ -20,18 +20,23 @@ from peft import PeftModel
|
|
| 20 |
# use_safetensors=True,
|
| 21 |
# )
|
| 22 |
|
| 23 |
-
# Assuming you have your HF repository in this format: "your_username/your_model_name"
|
| 24 |
-
model_id = "FlawedLLM/BhashiniLLM"
|
| 25 |
|
| 26 |
-
# Load the base model (the one you fine-tuned with LoRA)
|
| 27 |
-
base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
|
| 28 |
-
for param in base_model.parameters():
|
| 29 |
-
|
| 30 |
|
| 31 |
-
# Load the LoRA adapter weights
|
| 32 |
-
model = PeftModel.from_pretrained(base_model, model_id)
|
|
|
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
@spaces.GPU(duration=300)
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 5 |
import torch
|
| 6 |
+
from peft import PeftModel, PeftConfig
|
| 7 |
|
| 8 |
|
| 9 |
+
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
|
| 10 |
# quantization_config = BitsAndBytesConfig(
|
| 11 |
# load_in_4bit=True,
|
| 12 |
# bnb_4bit_use_double_quant=True,
|
|
|
|
| 20 |
# use_safetensors=True,
|
| 21 |
# )
|
| 22 |
|
| 23 |
+
# # Assuming you have your HF repository in this format: "your_username/your_model_name"
|
| 24 |
+
# model_id = "FlawedLLM/BhashiniLLM"
|
| 25 |
|
| 26 |
+
# # Load the base model (the one you fine-tuned with LoRA)
|
| 27 |
+
# base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
|
| 28 |
+
# for param in base_model.parameters():
|
| 29 |
+
# param.data = param.data.to(torch.float16) # or torch.float32
|
| 30 |
|
| 31 |
+
# # Load the LoRA adapter weights
|
| 32 |
+
# model = PeftModel.from_pretrained(base_model, model_id)
|
| 33 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 34 |
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
config = PeftConfig.from_pretrained("FlawedLLM/BhashiniLLM")
|
| 38 |
+
base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit", device_map='auto')
|
| 39 |
+
model = PeftModel.from_pretrained(base_model, "FlawedLLM/BhashiniLLM")
|
| 40 |
|
| 41 |
|
| 42 |
@spaces.GPU(duration=300)
|