Llama-3.1

Sleeping

App Files Files Community

Llama-3.1 / app.py

Nymbo

Update app.py

a8fc89d verified 8 months ago

raw

history blame

16.1 kB

	import gradio as gr
	from openai import OpenAI
	import os

	# Retrieve the access token from the environment variable
	ACCESS_TOKEN = os.getenv("HF_TOKEN")
	print("Access token loaded.")

	# Initialize the OpenAI client with the Hugging Face Inference API endpoint
	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1/",
	api_key=ACCESS_TOKEN,
	)
	print("OpenAI client initialized.")

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	frequency_penalty,
	seed,
	custom_model
	):
	"""
	This function handles the chatbot response. It takes in:
	- message: the user's new message
	- history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
	- system_message: the system prompt
	- max_tokens: the maximum number of tokens to generate in the response
	- temperature: sampling temperature
	- top_p: top-p (nucleus) sampling
	- frequency_penalty: penalize repeated tokens in the output
	- seed: a fixed seed for reproducibility; -1 will mean 'random'
	- custom_model: the final model name in use, which may be set by selecting from the Featured Models radio or by typing a custom model
	"""

	print(f"Received message: {message}")
	print(f"History: {history}")
	print(f"System message: {system_message}")
	print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
	print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
	print(f"Selected model (custom_model): {custom_model}")

	# Convert seed to None if -1 (meaning random)
	if seed == -1:
	seed = None

	# Construct the messages array required by the API
	messages = [{"role": "system", "content": system_message}]
	print("Initial messages array constructed.")

	# Add conversation history to the context
	for val in history:
	user_part = val[0] # Extract user message from the tuple
	assistant_part = val[1] # Extract assistant message from the tuple
	if user_part:
	messages.append({"role": "user", "content": user_part}) # Append user message
	print(f"Added user message to context: {user_part}")
	if assistant_part:
	messages.append({"role": "assistant", "content": assistant_part}) # Append assistant message
	print(f"Added assistant message to context: {assistant_part}")

	# Append the latest user message
	messages.append({"role": "user", "content": message})
	print("Latest user message appended.")

	# If user provided a model, use that; otherwise, fall back to a default model
	model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
	print(f"Model selected for inference: {model_to_use}")

	# Start with an empty string to build the response as tokens stream in
	response = ""
	print("Sending request to OpenAI API.")

	# Make the streaming request to the HF Inference API via openai-like client
	for message_chunk in client.chat.completions.create(
	model=model_to_use, # Use either the user-provided or default model
	max_tokens=max_tokens, # Maximum tokens for the response
	stream=True, # Enable streaming responses
	temperature=temperature, # Adjust randomness in response
	top_p=top_p, # Control diversity in response generation
	frequency_penalty=frequency_penalty, # Penalize repeated phrases
	seed=seed, # Set random seed for reproducibility
	messages=messages, # Contextual conversation messages
	):
	# Extract the token text from the response chunk
	token_text = message_chunk.choices[0].delta.content
	print(f"Received token: {token_text}")
	response += token_text
	# Yield the partial response to Gradio so it can display in real-time
	yield response

	print("Completed response generation.")

	# -------------------------
	# GRADIO UI CONFIGURATION
	# -------------------------

	# Create a Chatbot component with a specified height
	chatbot = gr.Chatbot(height=600) # Define the height of the chatbot interface
	print("Chatbot interface created.")

	# Create textboxes and sliders for system prompt, tokens, and other parameters
	system_message_box = gr.Textbox(value="", label="System message") # Input box for system message

	max_tokens_slider = gr.Slider(
	minimum=1, # Minimum allowable tokens
	maximum=4096, # Maximum allowable tokens
	value=512, # Default value
	step=1, # Increment step size
	label="Max new tokens" # Slider label
	)
	temperature_slider = gr.Slider(
	minimum=0.1, # Minimum temperature
	maximum=4.0, # Maximum temperature
	value=0.7, # Default value
	step=0.1, # Increment step size
	label="Temperature" # Slider label
	)
	top_p_slider = gr.Slider(
	minimum=0.1, # Minimum top-p value
	maximum=1.0, # Maximum top-p value
	value=0.95, # Default value
	step=0.05, # Increment step size
	label="Top-P" # Slider label
	)
	frequency_penalty_slider = gr.Slider(
	minimum=-2.0, # Minimum penalty
	maximum=2.0, # Maximum penalty
	value=0.0, # Default value
	step=0.1, # Increment step size
	label="Frequency Penalty" # Slider label
	)
	seed_slider = gr.Slider(
	minimum=-1, # -1 for random seed
	maximum=65535, # Maximum seed value
	value=-1, # Default value
	step=1, # Increment step size
	label="Seed (-1 for random)" # Slider label
	)

	# The custom_model_box is what the respond function sees as "custom_model"
	custom_model_box = gr.Textbox(
	value="", # Default value
	label="Custom Model", # Label for the textbox
	info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model." # Additional info
	)

	# Define a function that updates the custom model box when a featured model is selected
	def set_custom_model_from_radio(selected):
	"""
	This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
	We will update the Custom Model text box with that selection automatically.
	"""
	print(f"Featured model selected: {selected}") # Log selected model
	return selected

	# Create the main ChatInterface object
	demo = gr.ChatInterface(
	fn=respond, # The function to handle responses
	additional_inputs=[
	system_message_box, # System message input
	max_tokens_slider, # Max tokens slider
	temperature_slider, # Temperature slider
	top_p_slider, # Top-P slider
	frequency_penalty_slider, # Frequency penalty slider
	seed_slider, # Seed slider
	custom_model_box # Custom model input
	],
	fill_height=True, # Allow the chatbot to fill the container height
	chatbot=chatbot, # Chatbot UI component
	theme="Nymbo/Nymbo_Theme", # Theme for the interface
	)

	print("ChatInterface object created.")

	# -----------
	# ADDING THE "FEATURED MODELS" ACCORDION
	# -----------
	with demo:
	with gr.Accordion("Featured Models", open=False): # Collapsible section for featured models
	model_search_box = gr.Textbox(
	label="Filter Models", # Label for the search box
	placeholder="Search for a featured model...", # Placeholder text
	lines=1 # Single-line input
	)
	print("Model search box created.")

	# Sample list of popular text models
	models_list = [
	"meta-llama/Llama-3.3-70B-Instruct",
	"meta-llama/Llama-3.2-3B-Instruct",
	"meta-llama/Llama-3.2-1B-Instruct",
	"meta-llama/Llama-3.1-8B-Instruct",
	"NousResearch/Hermes-3-Llama-3.1-8B",
	"google/gemma-2-27b-it",
	"google/gemma-2-9b-it",
	"google/gemma-2-2b-it",
	"mistralai/Mistral-Nemo-Instruct-2407",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"Qwen/Qwen2.5-72B-Instruct",
	"Qwen/QwQ-32B-Preview",
	"PowerInfer/SmallThinker-3B-Preview",
	"HuggingFaceTB/SmolLM2-1.7B-Instruct",
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"microsoft/Phi-3.5-mini-instruct",
	]
	print("Models list initialized.")

	featured_model_radio = gr.Radio(
	label="Select a model below", # Label for the radio buttons
	choices=models_list, # List of available models
	value="meta-llama/Llama-3.3-70B-Instruct", # Default selection
	interactive=True # Allow user interaction
	)
	print("Featured models radio button created.")

	# Filter function for the radio button list
	def filter_models(search_term):
	print(f"Filtering models with search term: {search_term}") # Log the search term
	filtered = [m for m in models_list if search_term.lower() in m.lower()] # Filter models by search term
	print(f"Filtered models: {filtered}") # Log filtered models
	return gr.update(choices=filtered)

	# Update the radio list when the search box value changes
	model_search_box.change(
	fn=filter_models, # Function to filter models
	inputs=model_search_box, # Input: search box value
	outputs=featured_model_radio # Output: update radio button list
	)
	print("Model search box change event linked.")

	# Update the custom model textbox when a featured model is selected
	featured_model_radio.change(
	fn=set_custom_model_from_radio, # Function to set custom model
	inputs=featured_model_radio, # Input: selected model
	outputs=custom_model_box # Output: update custom model textbox
	)
	print("Featured model radio button change event linked.")

	# -----------
	# ADDING THE "INFORMATION" TAB
	# -----------
	with gr.Tab("Information"):
	with gr.Row():
	# Accordion for Featured Models
	with gr.Accordion("Featured Models", open=False):
	gr.HTML(
	"""
	<table style="width:100%; text-align:center; margin:auto;">
	<tr>
	<th>Model Name</th>
	<th>Typography</th>
	<th>Notes</th>
	</tr>
	<tr>
	<td>meta-llama/Llama-3.3-70B-Instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>meta-llama/Llama-3.2-3B-Instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>meta-llama/Llama-3.2-1B-Instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>meta-llama/Llama-3.1-8B-Instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>NousResearch/Hermes-3-Llama-3.1-8B</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>google/gemma-2-27b-it</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>google/gemma-2-9b-it</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>google/gemma-2-2b-it</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>mistralai/Mistral-Nemo-Instruct-2407</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>mistralai/Mixtral-8x7B-Instruct-v0.1</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>mistralai/Mistral-7B-Instruct-v0.3</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>Qwen/Qwen2.5-72B-Instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>Qwen/QwQ-32B-Preview</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>PowerInfer/SmallThinker-3B-Preview</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>HuggingFaceTB/SmolLM2-1.7B-Instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>TinyLlama/TinyLlama-1.1B-Chat-v1.0</td>
	<td>✅</td>
	<td></td>
	</tr>
	<tr>
	<td>microsoft/Phi-3.5-mini-instruct</td>
	<td>✅</td>
	<td></td>
	</tr>
	</table>
	"""
	)

	# Accordion for Parameters Overview
	with gr.Accordion("Parameters Overview", open=False):
	gr.Markdown(
	"""
	## System Message
	###### This box is for setting the initial context or instructions for the AI. It helps guide the AI on how to respond to your inputs.

	## Max New Tokens
	###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI can generate in a single response. The default value is 512, and the maximum is 4096.

	## Temperature
	###### Temperature controls the randomness of the AI's responses. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused. The default value is 0.7.

	## Top-P (Nucleus Sampling)
	###### Top-P sampling is another way to control the diversity of the AI's responses. It ensures that the AI only considers the most likely tokens up to a cumulative probability of P. The default value is 0.95.

	## Frequency Penalty
	###### This penalty discourages the AI from repeating the same tokens (words or phrases) in its responses. A higher penalty reduces repetition. The default value is 0.0.

	## Seed
	###### The seed is a number that ensures the reproducibility of the AI's responses. If you set a specific seed, the AI will generate the same response every time for the same input. If you set it to -1, the AI will generate a random seed each time.

	## Custom Model
	###### You can specify a custom Hugging Face model path here. This will override any selected featured model. This is optional and allows you to use models not listed in the featured models.

	### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
	"""
	)

	print("Gradio interface initialized.")

	if __name__ == "__main__":
	print("Launching the demo application.")
	demo.launch()