Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Apr 30

Commit

d92e5cd

verified ·

1 Parent(s): a3f21b0

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -103

app.py CHANGED Viewed

@@ -16,7 +16,9 @@ def respond(
     frequency_penalty,
     seed,
     custom_model,
-    provider  # New parameter for provider selection
 ):
     print(f"Received message: {message}")
     print(f"History: {history}")
@@ -25,6 +27,8 @@ def respond(
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Selected provider: {provider}")
     # Initialize the Inference Client with the provider
     # Provider is specified during initialization, not in the method call
@@ -54,8 +58,8 @@ def respond(
     messages.append({"role": "user", "content": message})
     print("Latest user message appended.")
-    # If user provided a model, use that; otherwise, fall back to a default model
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
@@ -106,6 +110,7 @@ def respond(
 chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
 print("Chatbot interface created.")
 system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 max_tokens_slider = gr.Slider(
@@ -113,7 +118,7 @@ max_tokens_slider = gr.Slider(
     maximum=4096,
     value=512,
     step=1,
-    label="Max new tokens"
 )
 temperature_slider = gr.Slider(
     minimum=0.1,
@@ -144,7 +149,7 @@ seed_slider = gr.Slider(
     label="Seed (-1 for random)"
 )
-# The custom_model_box is what the respond function sees as "custom_model"
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",
@@ -152,7 +157,7 @@ custom_model_box = gr.Textbox(
     placeholder="meta-llama/Llama-3.3-70B-Instruct"
 )
-# Available providers as of April 2025
 providers_list = [
     "hf-inference",  # Default Hugging Face Inference
     "cerebras",      # Cerebras provider
@@ -169,7 +174,6 @@ providers_list = [
     "openai"         # OpenAI compatible endpoints
 ]
-# Provider selection dropdown for better UX with many options
 provider_dropdown = gr.Dropdown(
     choices=providers_list,
     value="hf-inference",
@@ -177,6 +181,57 @@ provider_dropdown = gr.Dropdown(
     info="Select which inference provider to use. Uses your Hugging Face PRO credits."
 )
 def set_custom_model_from_radio(selected):
     """
     This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
@@ -185,6 +240,7 @@ def set_custom_model_from_radio(selected):
     print(f"Featured model selected: {selected}")
     return selected
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
@@ -195,7 +251,9 @@ demo = gr.ChatInterface(
         frequency_penalty_slider,
         seed_slider,
         custom_model_box,
-        provider_dropdown,  # Add provider selection to inputs
     ],
     fill_height=True,
     chatbot=chatbot,
@@ -204,102 +262,21 @@ demo = gr.ChatInterface(
 print("ChatInterface object created.")
 with demo:
-    # Add new accordion for advanced settings including provider selection
-    with gr.Accordion("Advanced Settings", open=True):
-        gr.Markdown("### Inference Provider Selection")
-        gr.Markdown("Select which provider to use for inference. Uses your Hugging Face PRO credits.")
-        # Provider dropdown is already included in the additional_inputs
-        gr.Markdown("""
-        ### Provider Information
-        - **hf-inference**: Default Hugging Face Inference API
-        - **cerebras**: Cerebras AI models - extremely fast inference (70x faster than GPUs)
-        - **together**: Together AI models
-        - **sambanova**: SambaNova models
-        - **replicate**: Replicate models
-        - **fal-ai**: Fal.ai models
-        - **novita**: Novita AI
-        - **black-forest-labs**: Black Forest Labs
-        - **cohere**: Cohere models
-        - **fireworks-ai**: Fireworks AI
-        - **hyperbolic**: Hyperbolic models
-        - **nebius**: Nebius models
-        - **openai**: OpenAI compatible endpoints
-        As a PRO user, you receive $2 of credits monthly across all providers.
-        Note: Not all models are available on all providers. If you select a provider that doesn't support your chosen model, you'll get an error message.
-        """)
-        # Model selection components moved from the removed accordion
-        gr.Markdown("### Model Selection")
-        model_search_box = gr.Textbox(
-            label="Filter Models",
-            placeholder="Search for a featured model...",
-            lines=1
-        )
-        print("Model search box created.")
-        models_list = [
-            "meta-llama/Llama-3.3-70B-Instruct",
-            "meta-llama/Llama-3.1-70B-Instruct",
-            "meta-llama/Llama-3.0-70B-Instruct",
-            "meta-llama/Llama-3.2-3B-Instruct",
-            "meta-llama/Llama-3.2-1B-Instruct",
-            "meta-llama/Llama-3.1-8B-Instruct",
-            "NousResearch/Hermes-3-Llama-3.1-8B",
-            "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-            "mistralai/Mistral-Nemo-Instruct-2407",
-            "mistralai/Mixtral-8x7B-Instruct-v0.1",
-            "mistralai/Mistral-7B-Instruct-v0.3",
-            "mistralai/Mistral-7B-Instruct-v0.2",
-            "Qwen/Qwen3-235B-A22B",
-            "Qwen/Qwen3-32B",
-            "Qwen/Qwen2.5-72B-Instruct",
-            "Qwen/Qwen2.5-3B-Instruct",
-            "Qwen/Qwen2.5-0.5B-Instruct",
-            "Qwen/QwQ-32B",
-            "Qwen/Qwen2.5-Coder-32B-Instruct",
-            "microsoft/Phi-3.5-mini-instruct",
-            "microsoft/Phi-3-mini-128k-instruct",
-            "microsoft/Phi-3-mini-4k-instruct",
-            "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
-            "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-            "HuggingFaceH4/zephyr-7b-beta",
-            "HuggingFaceTB/SmolLM2-360M-Instruct",
-            "tiiuae/falcon-7b-instruct",
-            "01-ai/Yi-1.5-34B-Chat",
-        ]
-        print("Models list initialized.")
-        featured_model_radio = gr.Radio(
-            label="Select a model below",
-            choices=models_list,
-            value="meta-llama/Llama-3.3-70B-Instruct",
-            interactive=True
-        )
-        print("Featured models radio button created.")
-        def filter_models(search_term):
-            print(f"Filtering models with search term: {search_term}")
-            filtered = [m for m in models_list if search_term.lower() in m.lower()]
-            print(f"Filtered models: {filtered}")
-            return gr.update(choices=filtered)
-        model_search_box.change(
-            fn=filter_models,
-            inputs=model_search_box,
-            outputs=featured_model_radio
-        )
-        print("Model search box change event linked.")
-        featured_model_radio.change(
-            fn=set_custom_model_from_radio,
-            inputs=featured_model_radio,
-            outputs=custom_model_box
-        )
-        print("Featured model radio button change event linked.")
 print("Gradio interface initialized.")

     frequency_penalty,
     seed,
     custom_model,
+    provider,  # Provider selection
+    model_search_term,  # For filtering models
+    selected_model  # From radio button selection
 ):
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Selected provider: {provider}")
+    print(f"Model search term: {model_search_term}")
+    print(f"Selected model from radio: {selected_model}")
     # Initialize the Inference Client with the provider
     # Provider is specified during initialization, not in the method call
     messages.append({"role": "user", "content": message})
     print("Latest user message appended.")
+    # Determine which model to use, prioritizing custom_model if provided
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
 chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
 print("Chatbot interface created.")
+# Basic input components
 system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 max_tokens_slider = gr.Slider(
     maximum=4096,
     value=512,
     step=1,
+    label="Max tokens"
 )
 temperature_slider = gr.Slider(
     minimum=0.1,
     label="Seed (-1 for random)"
 )
+# Custom model box
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",
     placeholder="meta-llama/Llama-3.3-70B-Instruct"
 )
+# Provider selection
 providers_list = [
     "hf-inference",  # Default Hugging Face Inference
     "cerebras",      # Cerebras provider
     "openai"         # OpenAI compatible endpoints
 ]
 provider_dropdown = gr.Dropdown(
     choices=providers_list,
     value="hf-inference",
     info="Select which inference provider to use. Uses your Hugging Face PRO credits."
 )
+# Model selection components
+model_search_box = gr.Textbox(
+    label="Filter Models",
+    placeholder="Search for a featured model...",
+    lines=1
+)
+models_list = [
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "meta-llama/Llama-3.1-70B-Instruct",
+    "meta-llama/Llama-3.0-70B-Instruct",
+    "meta-llama/Llama-3.2-3B-Instruct",
+    "meta-llama/Llama-3.2-1B-Instruct",
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "NousResearch/Hermes-3-Llama-3.1-8B",
+    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "mistralai/Mistral-Nemo-Instruct-2407",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "Qwen/Qwen3-235B-A22B",
+    "Qwen/Qwen3-32B",
+    "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen/Qwen2.5-0.5B-Instruct",
+    "Qwen/QwQ-32B",
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    "microsoft/Phi-3.5-mini-instruct",
+    "microsoft/Phi-3-mini-128k-instruct",
+    "microsoft/Phi-3-mini-4k-instruct",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+    "HuggingFaceH4/zephyr-7b-beta",
+    "HuggingFaceTB/SmolLM2-360M-Instruct",
+    "tiiuae/falcon-7b-instruct",
+    "01-ai/Yi-1.5-34B-Chat",
+]
+featured_model_radio = gr.Radio(
+    label="Select a model below",
+    choices=models_list,
+    value="meta-llama/Llama-3.3-70B-Instruct",
+    interactive=True
+)
+def filter_models(search_term):
+    print(f"Filtering models with search term: {search_term}")
+    filtered = [m for m in models_list if search_term.lower() in m.lower()]
+    print(f"Filtered models: {filtered}")
+    return gr.update(choices=filtered)
 def set_custom_model_from_radio(selected):
     """
     This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
     print(f"Featured model selected: {selected}")
     return selected
+# Create the Gradio interface
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
         frequency_penalty_slider,
         seed_slider,
         custom_model_box,
+        provider_dropdown,   # Provider selection
+        model_search_box,    # Model search box
+        featured_model_radio # Featured model radio
     ],
     fill_height=True,
     chatbot=chatbot,
 print("ChatInterface object created.")
 with demo:
+    # Connect the model filter to update the radio choices
+    model_search_box.change(
+        fn=filter_models,
+        inputs=model_search_box,
+        outputs=featured_model_radio
+    )
+    print("Model search box change event linked.")
+    # Connect the featured model radio to update the custom model box
+    featured_model_radio.change(
+        fn=set_custom_model_from_radio,
+        inputs=featured_model_radio,
+        outputs=custom_model_box
+    )
+    print("Featured model radio button change event linked.")
 print("Gradio interface initialized.")