import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the Hugging Face model and tokenizer model_name = "HuggingFaceH4/zephyr-7b-beta" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) # Define custom system content custom_system_content = """ You are a helpful chatbot designed to assist users with any questions or tasks they may have. Please provide thoughtful and concise responses. """ # Function to generate chatbot responses def chatbot_response(user_input): inputs = tokenizer(custom_system_content + user_input, return_tensors="pt") outputs = model.generate(**inputs, max_length=256) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response[len(custom_system_content):] # Gradio Blocks UI with gr.Blocks() as demo: with gr.Row(): gr.Markdown("

Zephyr-7B Chatbot

") with gr.Row(): with gr.Column(): user_input = gr.Textbox(label="Your message", placeholder="Type your message here...") chatbot_output = gr.Chatbot(label="Chatbot Response", placeholder="Chatbot will respond here...") with gr.Column(): submit_btn = gr.Button("Send") submit_btn.click(fn=chatbot_response, inputs=user_input, outputs=chatbot_output) demo.launch()