import gradio as gr from huggingface_hub import InferenceClient, login import random from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline from langchain.schema import AIMessage, HumanMessage import os login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"]) llm = HuggingFaceEndpoint( repo_id="HuggingFaceH4/zephyr-7b-beta", task="text-generation", max_new_tokens=512, do_sample=False, repetition_penalty=1.03, ) model = ChatHuggingFace(llm=llm) def predict(message, history): history_langchain_format = [] for msg in history: if msg['role'] == "user": history_langchain_format.append(HumanMessage(content=msg['content'])) elif msg['role'] == "assistant": history_langchain_format.append(AIMessage(content=msg['content'])) history_langchain_format.append(HumanMessage(content=message)) gpt_response = model.invoke(history_langchain_format) return gpt_response.content demo = gr.ChatInterface( predict, type="messages" ) demo.launch()