import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load model and tokenizer model_name = "meta-llama/Llama-2-7b-hf" # Use Hugging Face authentication token token = 'HUGGINGFACE_TOKEN' # Replace this with your Hugging Face token # Load model and tokenizer from Hugging Face tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token) model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token) # Function to generate response from the model def generate_response(user_input, chat_history=None): if chat_history is None: chat_history = [] # Format the input for the model input_text = user_input + ' ' # Add a space for separation between user input and the response # Encode the input inputs = tokenizer.encode(input_text, return_tensors="pt") # Generate a response from the model outputs = model.generate(inputs, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id) # Decode the response response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Append the response to the chat history chat_history.append((user_input, response)) # Return the response and updated chat history return response, chat_history # Gradio Interface def respond(user_input, chat_history=None): response, chat_history = generate_response(user_input, chat_history) return response, chat_history # Set up Gradio interface iface = gr.Interface( fn=respond, inputs=[ gr.Textbox(label="Your Message", placeholder="Ask me anything!", lines=2), gr.State() ], outputs=[ gr.Textbox(label="Response", lines=3), gr.State() ], title="Llama-2 Chatbot", description="Ask me anything, and I'll respond using Llama-2 model.", live=True ) # Launch the Gradio interface iface.launch()