missed-call / app.py
ajmerphull's picture
Update app.py
1f7117d verified
raw
history blame
2.15 kB
from huggingface_hub import InferenceClient
import gradio as gr
import json
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, chat_history, system_message, max_tokens, temperature, top_p):
try:
# 1. Log received data (for debugging)
data_received = {
"message": message,
"chat_history": chat_history, # Correct name
"system_message": system_message,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
}
print(json.dumps(data_received, indent=4))
# 2. Convert chat_history to messages format
messages = [{"role": "system", "content": system_message}]
for user_msg, bot_msg in chat_history: #unpack the chat history
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
# 3. Call Inference API
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
print(f"An error occurred: {e}")
yield "An error occurred during processing." # Important: Yield an error message
demo = gr.Chatbot( # Use gr.Chatbot
respond,
additional_inputs=[
gr.Textbox(value="You are an AI assistant responding to missed calls with text messaging. Keep responses short and specific.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=100, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.2, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.50, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()