import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the Hugging Face model and tokenizer
model_name = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

# Define custom system content
custom_system_content = """
You are a helpful chatbot designed to assist users with any questions or tasks they may have.
Please provide thoughtful and concise responses.
"""

# Function to generate chatbot responses
def chatbot_response(user_input):
    inputs = tokenizer(custom_system_content + user_input, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=256)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response[len(custom_system_content):]

# Gradio Blocks UI
with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown("<h2>Zephyr-7B Chatbot</h2>")
    
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Your message", placeholder="Type your message here...")
            chatbot_output = gr.Chatbot(label="Chatbot Response", placeholder="Chatbot will respond here...")

        with gr.Column():
            submit_btn = gr.Button("Send")

    submit_btn.click(fn=chatbot_response, inputs=user_input, outputs=chatbot_output)

demo.launch()