File size: 2,152 Bytes
c3d5f5c
ed1fa2d
 
c3d5f5c
 
 
ed1fa2d
90fabc5
ed1fa2d
90fabc5
 
ed1fa2d
90fabc5
 
 
 
 
ed1fa2d
c3d5f5c
ed1fa2d
90fabc5
ed1fa2d
 
 
 
 
90fabc5
c3d5f5c
ed1fa2d
90fabc5
 
 
 
 
 
 
 
 
 
 
c3d5f5c
90fabc5
ed1fa2d
 
 
c3d5f5c
 
ed1fa2d
c3d5f5c
 
1f7117d
55698e4
 
90fabc5
c3d5f5c
 
 
 
 
90fabc5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from huggingface_hub import InferenceClient
import gradio as gr
import json

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def respond(message, chat_history, system_message, max_tokens, temperature, top_p):
    try:
        # 1. Log received data (for debugging)
        data_received = {
            "message": message,
            "chat_history": chat_history,  # Correct name
            "system_message": system_message,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
        }
        print(json.dumps(data_received, indent=4))

        # 2. Convert chat_history to messages format
        messages = [{"role": "system", "content": system_message}]
        for user_msg, bot_msg in chat_history: #unpack the chat history
            if user_msg:
                messages.append({"role": "user", "content": user_msg})
            if bot_msg:
                messages.append({"role": "assistant", "content": bot_msg})
        messages.append({"role": "user", "content": message})

        # 3. Call Inference API
        response = ""
        for message in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            response += token
            yield response

    except Exception as e:
        print(f"An error occurred: {e}")
        yield "An error occurred during processing."  # Important: Yield an error message



demo = gr.Chatbot(  # Use gr.Chatbot
    respond,
    additional_inputs=[
        gr.Textbox(value="You are an AI assistant responding to missed calls with text messaging. Keep responses short and specific.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=100, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.2, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.50, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch()