Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

e4bb2d0

verified ·

1 Parent(s): e13eb1b

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -113

app.py CHANGED Viewed

@@ -22,7 +22,8 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    selected_model,
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -32,17 +33,19 @@ def respond(
     - max_tokens: the maximum number of tokens to generate in the response
     - temperature: sampling temperature
     - top_p: top-p (nucleus) sampling
-    - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - selected_model: the model to use for generating the response
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
-    print(f"System message: {system_message}")
-    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
-    print(f"Selected model: {selected_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
@@ -58,7 +61,7 @@ def respond(
         if user_part:
             messages.append({"role": "user", "content": user_part})
             print(f"Added user message to context: {user_part}")
-        if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
@@ -69,19 +72,19 @@ def respond(
     response = ""
     print("Sending request to OpenAI API.")
-    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=selected_model,  # Use the selected model
         max_tokens=max_tokens,
         stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
-        frequency_penalty=frequency_penalty,  # <-- NEW
-        seed=seed,  # <-- NEW
-        messages=messages,
     ):
         # Extract the token text from the response chunk
-        token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
         yield response
@@ -92,116 +95,158 @@ def respond(
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
-# Define the list of featured models
-featured_models = [
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "google/flan-t5-xl",
-    "facebook/bart-large-cnn",
-    "EleutherAI/gpt-neo-2.7B",
-    # Add more featured models here
-]
-# Create the Gradio Blocks interface
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    # Tab for model selection
-    with gr.Tab("Models"):
         with gr.Row():
-            with gr.Column():
-                with gr.Accordion("Featured Models", open=True):
-                    model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1)
-                    model = gr.Dropdown(label="Select a model below", choices=featured_models, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True)
-                    def filter_models(search_term):
-                        filtered_models = [m for m in featured_models if search_term.lower() in m.lower()]
-                        return gr.update(choices=filtered_models)
-                    model_search.change(filter_models, inputs=model_search, outputs=model)
-                custom_model = gr.Textbox(label="Custom Model", placeholder="Enter a custom model ID here", interactive=True)
-    # Tab for chat interface
-    with gr.Tab("Chat"):
         with gr.Row():
-            with gr.Column():
-                txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
-        # Additional parameters
         with gr.Row():
-            with gr.Column():
-                system_message = gr.Textbox(label="System Message", value="", lines=3)
-                max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max New Tokens")
-                temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
-                frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
-                seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
-        # Chatbot display
-        chatbot = gr.Chatbot(height=600)
-        # Submit button
-        submit_btn = gr.Button("Submit")
     # Tab for information
-    with gr.Tab("Information"):
         with gr.Row():
-            gr.Markdown(
-                """
-                # Featured Models
-                - **meta-llama/Llama-3.3-70B-Instruct**: A large language model from Meta.
-                - **google/flan-t5-xl**: A pretrained encoder-decoder model from Google.
-                - **facebook/bart-large-cnn**: A pretrained sequence-to-sequence model from Facebook.
-                - **EleutherAI/gpt-neo-2.7B**: A large autoregressive language model from EleutherAI.
-                # Parameters Overview
-                - **System Message**: Sets the behavior and context for the assistant.
-                - **Max New Tokens**: Limits the length of the generated response.
-                - **Temperature**: Controls the randomness of the output. Higher values make output more random.
-                - **Top-P**: Controls the diversity of text by selecting tokens that account for top-p probability mass.
-                - **Frequency Penalty**: Decreases the model's likelihood to repeat the same lines.
-                - **Seed**: Ensures reproducibility of results; set to -1 for random seed.
                 """
             )
-    # Function to handle chat submission
-    def user(user_message, history):
-        return "", history + [[user_message, None]]
-    # Function to process the chat
-    def bot(history, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, selected_model):
-        # Get the last user message
-        user_message = history[-1][0]
-        # Generate response
-        response_iter = respond(
-            user_message,
-            history[:-1],  # Exclude the last user message which doesn't have a response yet
-            system_message,
-            max_tokens,
-            temperature,
-            top_p,
-            frequency_penalty,
-            seed,
-            selected_model,
-        )
-        # Collect the entire response
-        full_response = ""
-        for resp in response_iter:
-            full_response = resp
-        # Update history with the bot's response
-        history[-1][1] = full_response
-        return history
-    # Set up the chat flow
-    txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
-        bot, [chatbot, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, model], chatbot
-    )
-    submit_btn.click(user, [txt, chatbot], [txt, chatbot], queue=False).then(
-        bot, [chatbot, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, model], chatbot
     )
 print("Gradio interface initialized.")
 if __name__ == "__main__":
-    print("Launching the demo application.")
-    demo.launch()

     top_p,
     frequency_penalty,
     seed,
+    model,
+    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - max_tokens: the maximum number of tokens to generate in the response
     - temperature: sampling temperature
     - top_p: top-p (nucleus) sampling
+    - frequency_penalty: penalize repeated tokens in the response
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - model: the selected model
+    - custom_model: the custom model path
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
+    print(f"system message: {system_message}")
+    print(f"max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Selected Model: {model}")
+    print(f"Custom model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         if user_part:
             messages.append({"role": "user", "content": user_part})
             print(f"Added user message to context: {user_part}")
+        ifassistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
     response = ""
     print("Sending request to OpenAI API.")
+    # Make the request to the HF Inference API via openAI-like client
     for message_chunk in client.chat.completions.create(
+        model=custom_model if custom_model.strip() != "" else model,
         max_tokens=max_tokens,
         stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
+        frequency_penalty=frequency_penalty,  # <--
+        seed=seed,  # <--
+        messages=messages
     ):
         # Extract the token text from the response chunk
+        token_text = message_chunk.choices[0].message.content
         print(f"Received token: {token_text}")
         response += token_text
         yield response
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
+# Define the Gradio interface
+with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
+    # Tab for basic settings
+    with gr.Tab("Basic Settings"):
+        with gr.Column(elem_id="prompt-container"):
+            with gr.Row():
+                # Textbox for user to input the message
+                text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
+            with gr.Row():
+                # Textbox for custom model input
+                custom_model = gr.textbox(label="Custom Model", info="HuggingFace model path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct", lines=1, elem_id="model-search-input")
+            # Accordion for selecting the model
+            with gr.Accordion("Featured models", open=True):
+                # Textbox for searching models
+                model_search = gr.textbox(Label="Filter models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
+                # Radio buttons to select the desired model
+                model = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=[
+                    "meta-llama/Llama-3.3-70B-Instruct",
+                    "anthropic/claude-3",
+                    "anthropic/claude-instant-3",
+                    "anthropic/claude-2",
+                    "anthropic/claude-2",
+                    "anthropic/claude-instant-2",
+                    "anthropic/claude-1.3",
+                    "anthropic/claude-instant-1.3",
+                    "anthropic/claude-1",
+                    "anthropic/claude-instant-1",
+                    "anthropic/claude-0.3",
+                    "anthropic/claude-instant-0.3",
+                    "anthropic/claude-0.1",
+                    "anthropic/claude-instant-0.1",
+                    "anthropic/claude-v2",
+                    "anthropic/claude-instant-v2",
+                    "anthropic/claude-v1",
+                    "anthropic/claude-instant-v1",
+                    "anthropic/claude-v0.3",
+                    "anthropic/claude-instant-v0.3",
+                    "anthropic/claude-v0.1",
+                    "anthropic/claude-instant-v0.1",
+                ], interactive=True, elem_id="model-radio")
+                # Filtering models based on search input
+                def filter_models(search_term):
+                    filtered_models = [m for m in model.choices if search_term.lower() in m.lower()]
+                    return gr.update(choices=filtered_models)
+                # Update model list when search box is used
+                model_search.change(filter_models, inputs=model, outputs=model)
+    # Tab for advanced settings
+    with gr.Tab("Advanced Settings"):
         with gr.Row():
+            # Text box for specifying the system message
+            system_message = gr.text box(value="", label="System message")
         with gr.Row():
+            # Slider for setting the maximum new tokens
+            max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
         with gr.Row():
+            # Slider for setting the temperature
+            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+        with gr.Row():
+            #Slider for setting top-p
+            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P")
+        with gr.Row():
+            #Slider for setting frequency penalty
+            frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
+        with gr.Row():
+            #Slider for setting the seed
+            seed = gr.SLider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
     # Tab for information
+    with gr.tab("Information"):
         with gr.Row():
+            # Display a sample prompt
+            gr.textbox(label="Sample prompt", value="Enter a prompt | ultra detail, ultra elaboration, ultra quality, perfect.")
+        with gr.Accordion("Featured Models (WiP)", open=False):
+            gr.html(
                 """
+            <p><a href="https://huggingface.co/models?inferences=warm&pipeline_tag=text-to-text&sort=trending">View more models</a></p>
+            <table style="width:100%; text-align:center; margin:auto;">
+                <tr>
+                    <th>Model</th>
+                    <th>Description</th>
+                </tr>
+                <tr>
+                    <td>meta-llama/Llama-3.3-70B-Instruct</td>
+                    <td>High-quality, large-scale language model</td>
+                </tr>
+                <tr>
+                    <td>anthropic/claude-3</td>
+                    <td> Advanced conversational AI model</td>
+                </tr>
+                <tr>
+                    <td>anthropic/claude-instant-3</td>
+                    <td> Fast and efficient conversational AI model</td>
+                </tr>
+            </table>
+            """
             )
+        with gr.Accordion("Parameters Overview", open=False):
+            gr.markdown(
+            """
+            ## System Message
+            - **Description**: The system message provides context and instructions to the model.
+            - **Default**: ""
+            ## Max New Tokens
+            - **Description**: The maximum number of tokens to generate in the response.
+            - **Default**: 512
+            - **Range**: 1 to 4096
+            ## Temperature
+            - **Description**: Controls the randomness of the output. Lower values make the output more deterministic, higher values make it output more varied.
+ - **Default**: 0.7
+ - **Range**: 0.1 to 4.0
+            ## Top-P
+            - **Description**: Controls the diversity of the output. Lower values make the output more focused, higher values make it more varied.
+            - **Default**: 0.7
+            - **Range**: 0.1 to 1.0
+            ## Frequency Penalty
+            - **Description**: Penalizes repeated tokens in the response. Higher values makes the output less repetitive.
+ - **Default**: 0.0
+ - **Range**: -2.0 to 2.0
+            ## Seed
+            - **Description**: A fixed seed for reproducibility. -1 for random.
+            - **Default**: -1
+            - **Range**: -1 to 65535
+            """
+            )
+"""
+    # Row containing the 'Run' button to trigger the query function
+    with gr.Row():
+        text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
+    # Row for displaying the generated response
+    with gr.Row():
+        response_output = gr.Textbox(label="Response Output", elem_id="response-output")
+    # Set up button to call the respond function
+    text_button.click(
+        respond,
+        inputs=[
+            text_prompt, model, custom_model, system_message, max_tokens, temperature, top_p, frequency_penalty, seed
+        ],
+        outputs=[response_output]
     )
 print("Gradio interface initialized.")
 if __name__ == "__main__":
+    demo.launch(show_api=False, share=False)