|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
def respond( |
|
message, |
|
history: list[dict[str, str]], |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
hf_token: gr.OAuthToken, |
|
): |
|
""" |
|
Uses the Hugging Face InferenceClient with a token (OAuth) to access the model. |
|
This works with any text-to-text model like BART, T5, Pegasus, etc. |
|
""" |
|
client = InferenceClient( |
|
token=hf_token.token, |
|
model="Bocklitz-Lab/lit2vec-tldr-bart-model" |
|
) |
|
|
|
|
|
full_input = f"{system_message.strip()}\n\n{message.strip()}" |
|
|
|
|
|
response = client.text_generation( |
|
full_input, |
|
max_new_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
stream=False |
|
) |
|
|
|
yield response |
|
|
|
|
|
chatbot = gr.ChatInterface( |
|
respond, |
|
chatbot=gr.Chatbot(), |
|
textbox=gr.Textbox(placeholder="Enter text to summarize...", container=False, scale=7), |
|
additional_inputs=[ |
|
gr.Textbox(value="Summarize the following scientific text.", label="System message"), |
|
gr.Slider(minimum=16, maximum=1024, value=256, step=8, label="Max new tokens"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
|
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), |
|
], |
|
) |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Sidebar(): |
|
gr.LoginButton() |
|
chatbot.render() |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|