File size: 3,873 Bytes
9c12531 8f31e52 c7f8a5d 9c12531 c181c4d 9c12531 c181c4d c7f8a5d 8dad82c c181c4d 8b66151 c7f8a5d 8b66151 c7f8a5d 8b66151 88a0b72 c7f8a5d 88a0b72 c7f8a5d 88a0b72 c7f8a5d 8b66151 c7f8a5d 8b66151 8dad82c c7f8a5d 88a0b72 c7f8a5d 88a0b72 c7f8a5d c181c4d 88a0b72 c181c4d 88a0b72 c7f8a5d 88a0b72 9c12531 0abdfaa 8b66151 c7f8a5d 8dad82c c7f8a5d 8dad82c c7f8a5d 8dad82c 9c12531 c7f8a5d 9c12531 c7f8a5d 9c12531 c7f8a5d 8dad82c c7f8a5d 9c12531 c7f8a5d 8dad82c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message="You are a helpful assistant.",
max_tokens=512,
temperature=0.7,
top_p=0.95,
frequency_penalty=0.0,
seed=-1
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
print("Initial messages array constructed.")
# Add conversation history to the context
for val in history:
user_part = val[0]
assistant_part = val[1]
if user_part:
messages.append({"role": "user", "content": user_part})
print(f"Added user message to context: {user_part}")
if assistant_part:
messages.append({"role": "assistant", "content": assistant_part})
print(f"Added assistant message to context: {assistant_part}")
# Append the latest user message
messages.append({"role": "user", "content": message})
print("Latest user message appended.")
# Set the model to "meta" by default
model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
print(f"Model selected for inference: {model_to_use}")
# Start with an empty string to build the response as tokens stream in
response = ""
print("Sending request to OpenAI API.")
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
token_text = message_chunk.choices[0].delta.content
print(f"Received token: {token_text}")
response += token_text
yield response
print("Completed response generation.")
# GRADIO UI
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Start chatting!", likeable=True, layout="panel")
print("Chatbot interface created.")
system_message_box = gr.Textbox(value="You are a helpful assistant.", label="System Prompt", visible=False)
max_tokens_slider = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max new tokens"
)
temperature_slider = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p_slider = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
frequency_penalty_slider = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty"
)
seed_slider = gr.Slider(
minimum=-1,
maximum=65535,
value=-1,
step=1,
label="Seed (-1 for random)"
)
# Removed the custom_model_box as the model is pre-set
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
system_message_box,
max_tokens_slider,
temperature_slider,
top_p_slider,
frequency_penalty_slider,
seed_slider,
],
fill_height=True,
chatbot=chatbot,
theme="Nymbo/Nymbo_Theme",
)
print("ChatInterface object created.")
with demo:
# No need for a model selection accordion since the model is fixed to "meta-llama"
pass
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch()
|