|
import gradio as gr |
|
from openai import OpenAI |
|
import os |
|
|
|
|
|
ACCESS_TOKEN = os.getenv("HF_TOKEN") |
|
print("Access token loaded.") |
|
|
|
|
|
client = OpenAI( |
|
base_url="https://api-inference.huggingface.co/v1/", |
|
api_key=ACCESS_TOKEN, |
|
) |
|
print("OpenAI client initialized.") |
|
|
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
frequency_penalty, |
|
seed, |
|
custom_model |
|
): |
|
""" |
|
This function handles the chatbot response. It takes in: |
|
- message: the user's new message |
|
- history: the list of previous messages, each as a tuple (user_msg, assistant_msg) |
|
- system_message: the system prompt |
|
- max_tokens: the maximum number of tokens to generate in the response |
|
- temperature: sampling temperature |
|
- top_p: top-p (nucleus) sampling |
|
- frequency_penalty: penalize repeated tokens in the output |
|
- seed: a fixed seed for reproducibility; -1 will mean 'random' |
|
- custom_model: the final model name in use, which may be set by selecting from the Featured Models radio or by typing a custom model |
|
""" |
|
|
|
print(f"Received message: {message}") |
|
print(f"History: {history}") |
|
print(f"System message: {system_message}") |
|
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}") |
|
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}") |
|
print(f"Selected model (custom_model): {custom_model}") |
|
|
|
|
|
if seed == -1: |
|
seed = None |
|
|
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
print("Initial messages array constructed.") |
|
|
|
|
|
for val in history: |
|
user_part = val[0] |
|
assistant_part = val[1] |
|
if user_part: |
|
messages.append({"role": "user", "content": user_part}) |
|
print(f"Added user message to context: {user_part}") |
|
if assistant_part: |
|
messages.append({"role": "assistant", "content": assistant_part}) |
|
print(f"Added assistant message to context: {assistant_part}") |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
print("Latest user message appended.") |
|
|
|
|
|
model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct" |
|
print(f"Model selected for inference: {model_to_use}") |
|
|
|
|
|
response = "" |
|
print("Sending request to OpenAI API.") |
|
|
|
|
|
for message_chunk in client.chat.completions.create( |
|
model=model_to_use, |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
frequency_penalty=frequency_penalty, |
|
seed=seed, |
|
messages=messages, |
|
): |
|
|
|
token_text = message_chunk.choices[0].delta.content |
|
print(f"Received token: {token_text}") |
|
response += token_text |
|
|
|
yield response |
|
|
|
print("Completed response generation.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot(height=600) |
|
print("Chatbot interface created.") |
|
|
|
|
|
system_message_box = gr.Textbox(value="", label="System message") |
|
|
|
max_tokens_slider = gr.Slider( |
|
minimum=1, |
|
maximum=4096, |
|
value=512, |
|
step=1, |
|
label="Max new tokens" |
|
) |
|
temperature_slider = gr.Slider( |
|
minimum=0.1, |
|
maximum=4.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature" |
|
) |
|
top_p_slider = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-P" |
|
) |
|
frequency_penalty_slider = gr.Slider( |
|
minimum=-2.0, |
|
maximum=2.0, |
|
value=0.0, |
|
step=0.1, |
|
label="Frequency Penalty" |
|
) |
|
seed_slider = gr.Slider( |
|
minimum=-1, |
|
maximum=65535, |
|
value=-1, |
|
step=1, |
|
label="Seed (-1 for random)" |
|
) |
|
|
|
|
|
custom_model_box = gr.Textbox( |
|
value="", |
|
label="Custom Model", |
|
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model." |
|
) |
|
|
|
|
|
def set_custom_model_from_radio(selected): |
|
""" |
|
This function will get triggered whenever someone picks a model from the 'Featured Models' radio. |
|
We will update the Custom Model text box with that selection automatically. |
|
""" |
|
print(f"Featured model selected: {selected}") |
|
return selected |
|
|
|
|
|
demo = gr.ChatInterface( |
|
fn=respond, |
|
additional_inputs=[ |
|
system_message_box, |
|
max_tokens_slider, |
|
temperature_slider, |
|
top_p_slider, |
|
frequency_penalty_slider, |
|
seed_slider, |
|
custom_model_box |
|
], |
|
fill_height=True, |
|
chatbot=chatbot, |
|
theme="Nymbo/Nymbo_Theme", |
|
) |
|
|
|
print("ChatInterface object created.") |
|
|
|
|
|
|
|
|
|
with demo: |
|
with gr.Accordion("Featured Models", open=False): |
|
model_search_box = gr.Textbox( |
|
label="Filter Models", |
|
placeholder="Search for a featured model...", |
|
lines=1 |
|
) |
|
print("Model search box created.") |
|
|
|
|
|
models_list = [ |
|
"meta-llama/Llama-3.3-70B-Instruct", |
|
"meta-llama/Llama-3.2-3B-Instruct", |
|
"meta-llama/Llama-3.2-1B-Instruct", |
|
"meta-llama/Llama-3.1-8B-Instruct", |
|
"NousResearch/Hermes-3-Llama-3.1-8B", |
|
"google/gemma-2-27b-it", |
|
"google/gemma-2-9b-it", |
|
"google/gemma-2-2b-it", |
|
"mistralai/Mistral-Nemo-Instruct-2407", |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
"mistralai/Mistral-7B-Instruct-v0.3", |
|
"Qwen/Qwen2.5-72B-Instruct", |
|
"Qwen/QwQ-32B-Preview", |
|
"PowerInfer/SmallThinker-3B-Preview", |
|
"HuggingFaceTB/SmolLM2-1.7B-Instruct", |
|
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", |
|
"microsoft/Phi-3.5-mini-instruct", |
|
] |
|
print("Models list initialized.") |
|
|
|
featured_model_radio = gr.Radio( |
|
label="Select a model below", |
|
choices=models_list, |
|
value="meta-llama/Llama-3.3-70B-Instruct", |
|
interactive=True |
|
) |
|
print("Featured models radio button created.") |
|
|
|
|
|
def filter_models(search_term): |
|
print(f"Filtering models with search term: {search_term}") |
|
filtered = [m for m in models_list if search_term.lower() in m.lower()] |
|
print(f"Filtered models: {filtered}") |
|
return gr.update(choices=filtered) |
|
|
|
|
|
model_search_box.change( |
|
fn=filter_models, |
|
inputs=model_search_box, |
|
outputs=featured_model_radio |
|
) |
|
print("Model search box change event linked.") |
|
|
|
|
|
featured_model_radio.change( |
|
fn=set_custom_model_from_radio, |
|
inputs=featured_model_radio, |
|
outputs=custom_model_box |
|
) |
|
print("Featured model radio button change event linked.") |
|
|
|
|
|
|
|
|
|
with gr.Tab("Information"): |
|
with gr.Row(): |
|
|
|
with gr.Accordion("Featured Models", open=False): |
|
gr.HTML( |
|
""" |
|
<table style="width:100%; text-align:center; margin:auto;"> |
|
<tr> |
|
<th>Model Name</th> |
|
<th>Typography</th> |
|
<th>Notes</th> |
|
</tr> |
|
<tr> |
|
<td>meta-llama/Llama-3.3-70B-Instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>meta-llama/Llama-3.2-3B-Instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>meta-llama/Llama-3.2-1B-Instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>meta-llama/Llama-3.1-8B-Instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>NousResearch/Hermes-3-Llama-3.1-8B</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>google/gemma-2-27b-it</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>google/gemma-2-9b-it</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>google/gemma-2-2b-it</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>mistralai/Mistral-Nemo-Instruct-2407</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>mistralai/Mixtral-8x7B-Instruct-v0.1</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>mistralai/Mistral-7B-Instruct-v0.3</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>Qwen/Qwen2.5-72B-Instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>Qwen/QwQ-32B-Preview</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>PowerInfer/SmallThinker-3B-Preview</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>HuggingFaceTB/SmolLM2-1.7B-Instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>TinyLlama/TinyLlama-1.1B-Chat-v1.0</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
<tr> |
|
<td>microsoft/Phi-3.5-mini-instruct</td> |
|
<td>✅</td> |
|
<td></td> |
|
</tr> |
|
</table> |
|
""" |
|
) |
|
|
|
|
|
with gr.Accordion("Parameters Overview", open=False): |
|
gr.Markdown( |
|
""" |
|
## System Message |
|
###### This box is for setting the initial context or instructions for the AI. It helps guide the AI on how to respond to your inputs. |
|
|
|
## Max New Tokens |
|
###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI can generate in a single response. The default value is 512, and the maximum is 4096. |
|
|
|
## Temperature |
|
###### Temperature controls the randomness of the AI's responses. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused. The default value is 0.7. |
|
|
|
## Top-P (Nucleus Sampling) |
|
###### Top-P sampling is another way to control the diversity of the AI's responses. It ensures that the AI only considers the most likely tokens up to a cumulative probability of P. The default value is 0.95. |
|
|
|
## Frequency Penalty |
|
###### This penalty discourages the AI from repeating the same tokens (words or phrases) in its responses. A higher penalty reduces repetition. The default value is 0.0. |
|
|
|
## Seed |
|
###### The seed is a number that ensures the reproducibility of the AI's responses. If you set a specific seed, the AI will generate the same response every time for the same input. If you set it to -1, the AI will generate a random seed each time. |
|
|
|
## Custom Model |
|
###### You can specify a custom Hugging Face model path here. This will override any selected featured model. This is optional and allows you to use models not listed in the featured models. |
|
|
|
### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating! |
|
""" |
|
) |
|
|
|
print("Gradio interface initialized.") |
|
|
|
if __name__ == "__main__": |
|
print("Launching the demo application.") |
|
demo.launch() |