Spaces:
Sleeping
Sleeping
File size: 2,586 Bytes
10e1692 f956d70 10e1692 05bde1f 10e1692 05bde1f a6c3106 05bde1f 10e1692 05bde1f 10e1692 05bde1f d1a0824 05bde1f d1a0824 05bde1f a6c3106 05bde1f 33f1e81 05bde1f 30c43ba 369961f 2ba3e9d 05bde1f 8dd48d6 33f1e81 2ba3e9d 8dd48d6 30c43ba 05bde1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""
cf https://huggingface.co/spaces/Nymbo/Qwen-2.5-72B-Instruct/blob/main/app.py
https://huggingface.co/spaces/prithivMLmods/Llama-3.1-8B-Instruct/blob/main/app.py
https://github.com/huggingface/huggingface-llama-recipes/blob/main/api_inference/inference-api.ipynb
"""
import gradio as gr
# from openai import OpenAI
from huggingface_hub import InferenceClient
import os
# ACCESS_TOKEN = os.getenv("HF_TOKEN")
_ = """
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
# """
client = InferenceClient()
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
try:
_ = client.chat.completions.create(
model="Qwen/Qwen2.5-72B-Instruct",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
)
for message in _:
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
yield str(e)
chatbot = gr.Chatbot(height=600)
css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
visibility: hidden
}
'''
demo = gr.ChatInterface(
respond,
type='messages',
additional_inputs=[
gr.Textbox(value="", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=1, maximum=32768 // 2 - 500, value=32768 // 2 - 500, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
],
fill_height=True,
chatbot=chatbot,
css=css,
examples=[[{"role": "user", "content": "Define 'deep learning' in once sentence."}]],
# retry_btn="Retry", # unexpected keyword argument 'retry_btn'
# undo_btn="Undo",
# clear_btn="Clear",
# theme="allenai/gradio-theme",
# theme="Nymbo/Alyx_Theme",
)
if __name__ == "__main__":
demo.launch() |