File size: 2,586 Bytes
10e1692
 
f956d70
10e1692
 
05bde1f
10e1692
 
 
 
05bde1f
 
a6c3106
05bde1f
10e1692
05bde1f
 
 
 
10e1692
 
05bde1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1a0824
 
05bde1f
 
 
 
 
 
d1a0824
 
 
 
 
 
 
05bde1f
 
 
a6c3106
 
 
 
 
 
 
 
05bde1f
 
33f1e81
05bde1f
 
30c43ba
369961f
2ba3e9d
05bde1f
 
 
 
 
 
 
 
 
 
 
8dd48d6
33f1e81
2ba3e9d
 
 
8dd48d6
30c43ba
05bde1f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
cf https://huggingface.co/spaces/Nymbo/Qwen-2.5-72B-Instruct/blob/main/app.py
   https://huggingface.co/spaces/prithivMLmods/Llama-3.1-8B-Instruct/blob/main/app.py
https://github.com/huggingface/huggingface-llama-recipes/blob/main/api_inference/inference-api.ipynb
"""
import gradio as gr

# from openai import OpenAI
from huggingface_hub import InferenceClient

import os

# ACCESS_TOKEN = os.getenv("HF_TOKEN")

_ = """
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
# """
client = InferenceClient()

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    try:
        _ = client.chat.completions.create(
        model="Qwen/Qwen2.5-72B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        messages=messages,
        )
        for message in _:
            token = message.choices[0].delta.content
            response += token
            yield response
    except Exception as e:
        yield str(e)
        
chatbot = gr.Chatbot(height=600)

css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
    visibility: hidden
}
'''

demo = gr.ChatInterface(
    respond,
    type='messages',
    additional_inputs=[
        gr.Textbox(value="", label="System message"),
        # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=1, maximum=32768 // 2 - 500, value=32768 // 2 - 500, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P",
        ),
        
    ],
    fill_height=True,
    chatbot=chatbot,
    css=css,
    examples=[[{"role": "user", "content": "Define 'deep learning' in once sentence."}]],
    # retry_btn="Retry",  # unexpected keyword argument 'retry_btn'
    # undo_btn="Undo",
    # clear_btn="Clear",
    # theme="allenai/gradio-theme",
    # theme="Nymbo/Alyx_Theme",
)
if __name__ == "__main__":
    demo.launch()