from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch
import clip
from PIL import Image
import torch.nn as nn
from model import Projections
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import gradio as gr

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
projections = Projections(512, 3072)
projections.load_state_dict(torch.load('checkpoint_dir/checkpoint-6000/projection_layer/pytorch_model.bin', map_location=device), strict=False)
projections = projections.to(device)
projections = projections.to(torch.bfloat16)

checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
    attn_implementation='eager',
    torch_dtype=torch.bfloat16,
    device_map=None
)
base_model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)

new_model = "checkpoint_dir/checkpoint-6000/phi_model"  # change to the path where your model is saved

model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()
model = model.to(device)

tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True)
tokenizer.model_max_length = 2048
tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'
tokenizer.chat_template = "{% for message in messages %}{% if message['from'] == 'system' %}{{'<|system|>' + message['value'] + '<|end|>'}}{% elif message['from'] ==\
 'human' %}{{'<|user|>' + message['value'] + '<|end|>'}}{% elif message['from'] == 'gpt' %}{{'<|assistant|>' + message['value'] +\
 '<|end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}"

clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

# Load Whisper model and processor
whisper_model_name = "openai/whisper-small"
whisper_processor = WhisperProcessor.from_pretrained(whisper_model_name)
whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name)

def infer(message, history):
    return message.keys()

examples=[{'text':"I am planning to buy a dog and a cat. Suggest some breeds that get along with each other"},
          {'text':"Explain biased coin flip"},
           {'text': "I want to buy a house. Suggest some factors to consider while making final decision"}]

gr.ChatInterface(infer, chatbot=gr.Chatbot(height=600),
                textbox=gr.Textbox(placeholder="How can I help you today", container=False,
                                     scale=7), theme="soft", examples=examples, undo_btn=None,
                title="Phi-3 Multimodel Assistant").launch()