Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient, login | |
import random | |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline | |
from langchain.schema import AIMessage, HumanMessage | |
import os | |
login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"]) | |
llm = HuggingFaceEndpoint( | |
repo_id="HuggingFaceH4/zephyr-7b-beta", | |
task="text-generation", | |
max_new_tokens=512, | |
do_sample=False, | |
repetition_penalty=1.03, | |
) | |
model = ChatHuggingFace(llm=llm) | |
def predict(message, history): | |
history_langchain_format = [] | |
for msg in history: | |
if msg['role'] == "user": | |
history_langchain_format.append(HumanMessage(content=msg['content'])) | |
elif msg['role'] == "assistant": | |
history_langchain_format.append(AIMessage(content=msg['content'])) | |
history_langchain_format.append(HumanMessage(content=message)) | |
gpt_response = model.invoke(history_langchain_format) | |
return gpt_response.content | |
demo = gr.ChatInterface( | |
predict, | |
type="messages" | |
) | |
demo.launch() |