import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Set seed for reproducibility torch.random.manual_seed(0) # Load the model and tokenizer model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3.5-mini-instruct", device_map="cpu", torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct") # Define the pipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) # System message (invisible to the user) SYSTEM_MESSAGE = {"role": "system", "content": "You are a helpful AI assistant."} # Function to process the user input and generate output def chatbot_response(conversation_history): # Build message sequence messages = [SYSTEM_MESSAGE] + [ {"role": "user", "content": message["user_input"]} for message in conversation_history ] # Pass messages to the model generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, } output = pipe(messages, **generation_args) assistant_reply = output[0]["generated_text"] # Append assistant's response to history conversation_history[-1]["assistant_reply"] = assistant_reply return conversation_history # Define Gradio interface with gr.Blocks() as demo: gr.Markdown("# AI Chatbot with System Message") with gr.Row(): with gr.Column(): chatbox = gr.Chatbot() input_box = gr.Textbox(label="Your Message") submit_btn = gr.Button("Submit") conversation_state = gr.State([]) # Maintain conversation history def update_conversation(user_input, history): if user_input.strip(): history.append({"user_input": user_input}) updated_history = chatbot_response(history) return updated_history, "" return history, "" submit_btn.click( update_conversation, inputs=[input_box, conversation_state], outputs=[conversation_state, input_box], ) chatbox.update(chatbot_response(conversation_state)) # Launch the interface demo.launch()