|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
from peft import PeftModel |
|
import torch |
|
import clip |
|
from PIL import Image |
|
import torch.nn as nn |
|
from model import Projections |
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
import gradio as gr |
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
projections = Projections(512, 3072) |
|
projections.load_state_dict(torch.load('checkpoint_dir/checkpoint-6000/projection_layer/pytorch_model.bin', map_location=device), strict=False) |
|
projections = projections.to(device) |
|
projections = projections.to(torch.bfloat16) |
|
|
|
checkpoint_path = "microsoft/Phi-3-mini-4k-instruct" |
|
model_kwargs = dict( |
|
use_cache=False, |
|
trust_remote_code=True, |
|
attn_implementation='eager', |
|
torch_dtype=torch.bfloat16, |
|
device_map=None |
|
) |
|
base_model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs) |
|
|
|
new_model = "checkpoint_dir/checkpoint-6000/phi_model" |
|
|
|
model = PeftModel.from_pretrained(base_model, new_model) |
|
model = model.merge_and_unload() |
|
model = model.to(device) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True) |
|
tokenizer.model_max_length = 2048 |
|
tokenizer.pad_token = tokenizer.unk_token |
|
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token) |
|
tokenizer.padding_side = 'right' |
|
tokenizer.chat_template = "{% for message in messages %}{% if message['from'] == 'system' %}{{'<|system|>' + message['value'] + '<|end|>'}}{% elif message['from'] ==\ |
|
'human' %}{{'<|user|>' + message['value'] + '<|end|>'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>' + message['value'] +\ |
|
'<|end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}" |
|
|
|
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device) |
|
|
|
|
|
whisper_model_name = "openai/whisper-small" |
|
whisper_processor = WhisperProcessor.from_pretrained(whisper_model_name) |
|
whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name) |
|
|
|
def infer(message, history): |
|
return message.keys() |
|
|
|
examples=[{'text':"I am planning to buy a dog and a cat. Suggest some breeds that get along with each other"}, |
|
{'text':"Explain biased coin flip"}, |
|
{'text': "I want to buy a house. Suggest some factors to consider while making final decision"}] |
|
|
|
gr.ChatInterface(infer, chatbot=gr.Chatbot(height=600), |
|
textbox=gr.Textbox(placeholder="How can I help you today", container=False, |
|
scale=7), theme="soft", examples=examples, undo_btn=None, |
|
title="Phi-3 Multimodel Assistant").launch() |