|
from huggingface_hub import InferenceClient |
|
import gradio as gr |
|
|
|
client = InferenceClient() |
|
|
|
def respond( |
|
prompt: str, |
|
history, |
|
): |
|
if not history: |
|
history = [{"role": "system", "content": "You are a friendly chatbot"}] |
|
history.append({"role": "user", "content": prompt}) |
|
|
|
yield history |
|
|
|
response = {"role": "assistant", "content": ""} |
|
for message in client.chat_completion( |
|
history, |
|
temperature=0.95, |
|
top_p=0.9, |
|
max_tokens=512, |
|
stream=True, |
|
model="HuggingFaceH4/zephyr-7b-beta" |
|
): |
|
response["content"] += message.choices[0].delta.content or "" |
|
|
|
yield history + [response] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Chat with Hugging Face Zephyr 7b 🤗") |
|
chatbot = gr.Chatbot( |
|
label="Agent", |
|
type="messages", |
|
avatar_images=( |
|
None, |
|
"https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png", |
|
), |
|
) |
|
prompt = gr.Textbox(max_lines=1, label="Chat Message") |
|
prompt.submit(respond, [prompt, chatbot], [chatbot]) |
|
prompt.submit(lambda: "", None, [prompt]) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |