Spaces:
Sleeping
Sleeping
File size: 2,152 Bytes
c3d5f5c ed1fa2d c3d5f5c ed1fa2d 90fabc5 ed1fa2d 90fabc5 ed1fa2d 90fabc5 ed1fa2d c3d5f5c ed1fa2d 90fabc5 ed1fa2d 90fabc5 c3d5f5c ed1fa2d 90fabc5 c3d5f5c 90fabc5 ed1fa2d c3d5f5c ed1fa2d c3d5f5c 1f7117d 55698e4 90fabc5 c3d5f5c 90fabc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from huggingface_hub import InferenceClient
import gradio as gr
import json
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, chat_history, system_message, max_tokens, temperature, top_p):
try:
# 1. Log received data (for debugging)
data_received = {
"message": message,
"chat_history": chat_history, # Correct name
"system_message": system_message,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
}
print(json.dumps(data_received, indent=4))
# 2. Convert chat_history to messages format
messages = [{"role": "system", "content": system_message}]
for user_msg, bot_msg in chat_history: #unpack the chat history
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
# 3. Call Inference API
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
print(f"An error occurred: {e}")
yield "An error occurred during processing." # Important: Yield an error message
demo = gr.Chatbot( # Use gr.Chatbot
respond,
additional_inputs=[
gr.Textbox(value="You are an AI assistant responding to missed calls with text messaging. Keep responses short and specific.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=100, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.2, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.50, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()
|