import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the Hugging Face model and tokenizer model_name = "HuggingFaceH4/zephyr-7b-beta" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) # Define custom system content custom_system_content = """ You are a helpful chatbot designed to assist users with any questions or tasks they may have. Please provide thoughtful and concise responses. """ # Function to generate chatbot responses def chatbot_response(user_input): inputs = tokenizer(custom_system_content + user_input, return_tensors="pt") outputs = model.generate(**inputs, max_length=256) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response[len(custom_system_content):] # Gradio Blocks UI with gr.Blocks() as demo: with gr.Row(): gr.Markdown("