Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
from peft import PeftModel | |
base_model = "mistralai/Mistral-7B-v0.1" | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit= True, | |
bnb_4bit_quant_type= "nf4", | |
bnb_4bit_compute_dtype= torch.bfloat16, | |
bnb_4bit_use_double_quant= False, | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
base_model, | |
quantization_config=bnb_config, | |
device_map={"": 0} | |
) | |
ft_model = PeftModel.from_pretrained(model, 'kiki7sun/mixtral-academic-finetune-QLoRA-0121') | |
tokenizer = AutoTokenizer.from_pretrained( | |
base_model_id, | |
add_bos_token=True, | |
trust_remote_code=True, | |
) | |
ft_model.eval() | |
def greet(eval_prompt,max_new_tokens): | |
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda") | |
with torch.no_grad(): | |
generation = ft_model.generate(**model_input, max_new_tokens = max_new_tokens) | |
result = tokenizer.decode(generation[0], skip_special_tokens=True) | |
return result | |
demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox") | |
demo.queue().launch(debug=True, share=True, inline=False) |