Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

77298b9

verified ·

1 Parent(s): d3123eb

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -215

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import gradio as gr
-from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -13,291 +17,313 @@ client = OpenAI(
 )
 print("OpenAI client initialized.")
 def respond(
-    message,
-    history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
     frequency_penalty,
     seed,
-    custom_model,
-    selected_featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
-    - message: the user's new message
-    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
     - system_message: the system prompt
     - max_tokens: the maximum number of tokens to generate in the response
     - temperature: sampling temperature
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - custom_model: the user-provided custom model name (if any)
-    - selected_featured_model: the model selected from featured models
     """
-    print(f"Received message: {message}")
-    print(f"History: {history}")
     print(f"System message: {system_message}")
-    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
-    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
-    print(f"Custom model: {custom_model}")
-    print(f"Selected featured model: {selected_featured_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
-    # Determine which model to use: either custom_model or selected featured model
-    if custom_model.strip() != "":
         model_to_use = custom_model.strip()
-        print(f"Using Custom Model: {model_to_use}")
     else:
-        model_to_use = selected_featured_model
-        print(f"Using Featured Model: {model_to_use}")
-    # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
-    # Add conversation history to the context
-    for val in history:
-        user_part = val[0]
-        assistant_part = val[1]
-        if user_part:
-            messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context: {user_part}")
-        if assistant_part:
-            messages.append({"role": "assistant", "content": assistant_part})
-            print(f"Added assistant message to context: {assistant_part}")
-    # Append the latest user message
-    messages.append({"role": "user", "content": message})
-    # Start with an empty string to build the response as tokens stream in
-    response = ""
-    print("Sending request to OpenAI API.")
-    try:
-        # Make the streaming request to the HF Inference API via openai-like client
-        for message_chunk in client.chat.completions.create(
-            model=model_to_use,              # Use either the user-provided custom model or selected featured model
-            max_tokens=max_tokens,
-            stream=True,                     # Stream the response
-            temperature=temperature,
-            top_p=top_p,
-            frequency_penalty=frequency_penalty,
-            seed=seed,
-            messages=messages,
-        ):
-            # Extract the token text from the response chunk
-            token_text = message_chunk.choices[0].delta.content
-            print(f"Received token: {token_text}")
-            response += token_text
-            # Yield the partial response to Gradio so it can display in real-time
-            yield response
-    except Exception as e:
-        print(f"Error during API call: {e}")
-        yield f"An error occurred: {e}"
     print("Completed response generation.")
-# Create a Chatbot component with a specified height
-chatbot = gr.Chatbot(height=600)
-print("Chatbot interface created.")
-# Placeholder featured models list
-FEATURED_MODELS_LIST = [
-    "meta-llama/Llama-3.1-8B-Instruct",
-    "microsoft/Phi-3.5-mini-instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3",
-    "Qwen/Qwen2.5-72B-Instruct",
 ]
-# Define the Gradio Blocks interface
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    gr.Markdown("# Serverless-TextGen-Hub 📝🤖")
     gr.Markdown(
         """
-        Welcome to the **Serverless-TextGen-Hub**! Chat with your favorite models seamlessly.
         """
     )
-    with gr.Row():
-        # Chatbot component
-        chatbot_component = gr.Chatbot(height=600)
-    with gr.Row():
-        # System message input
         system_message = gr.Textbox(
-            value="You are a helpful assistant.",
             label="System Message",
-            placeholder="Enter system message here...",
             lines=2,
         )
-    with gr.Row():
-        # User message input
-        user_message = gr.Textbox(
-            label="Your Message",
-            placeholder="Type your message here...",
-            lines=2,
         )
-        # Run button
-        run_button = gr.Button("Send", variant="primary")
-    with gr.Row():
-        # Additional settings
-        with gr.Column(scale=1):
-            max_tokens = gr.Slider(
-                minimum=1,
-                maximum=4096,
-                value=512,
-                step=1,
-                label="Max New Tokens",
-            )
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=4.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature",
-            )
-            top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-P",
-            )
-            frequency_penalty = gr.Slider(
-                minimum=-2.0,
-                maximum=2.0,
-                value=0.0,
-                step=0.1,
-                label="Frequency Penalty",
-            )
-            seed = gr.Slider(
-                minimum=-1,
-                maximum=65535,  # Arbitrary upper limit for demonstration
-                value=-1,
-                step=1,
-                label="Seed (-1 for random)",
-            )
-            custom_model = gr.Textbox(
-                value="",
-                label="Custom Model",
-                info="(Optional) Provide a custom Hugging Face model path. This will override the selected featured model if not empty.",
-                placeholder="e.g., meta-llama/Llama-3.3-70B-Instruct",
-            )
-    with gr.Accordion("Featured Models", open=True):
-        with gr.Column():
-            model_search = gr.Textbox(
-                label="Filter Models",
-                placeholder="Search for a featured model...",
-                lines=1,
-            )
-            featured_model = gr.Radio(
-                label="Select a model below",
-                value=FEATURED_MODELS_LIST[0],
-                choices=FEATURED_MODELS_LIST,
-                interactive=True,
-            )
-    # Function to filter featured models based on search input
-    def filter_featured_models(search_term):
-        if not search_term:
-            return gr.update(choices=FEATURED_MODELS_LIST, value=FEATURED_MODELS_LIST[0])
-        filtered = [model for model in FEATURED_MODELS_LIST if search_term.lower() in model.lower()]
-        if not filtered:
-            return gr.update(choices=[], value=None)
-        return gr.update(choices=filtered, value=filtered[0])
-    # Update featured_model choices based on search
-    model_search.change(
-        fn=filter_featured_models,
-        inputs=model_search,
-        outputs=featured_model,
     )
-    # Function to handle the chatbot response
-    def handle_response(message, history, system_msg, max_tok, temp, tp, freq_pen, sd, custom_mod, selected_feat_mod):
-        # Append user message to history
-        history = history or []
-        history.append((message, None))
-        # Generate response using the respond function
-        response = respond(
-            message=message,
-            history=history,
-            system_message=system_msg,
-            max_tokens=max_tok,
-            temperature=temp,
-            top_p=tp,
-            frequency_penalty=freq_pen,
-            seed=sd,
-            custom_model=custom_mod,
-            selected_featured_model=selected_feat_mod,
         )
-        return response, history + [(message, response)]
-    # Handle button click
-    run_button.click(
-        fn=handle_response,
         inputs=[
-            user_message,
-            chatbot_component,        # history
             system_message,
             max_tokens,
             temperature,
             top_p,
             frequency_penalty,
             seed,
-            custom_model,
-            featured_model,
-        ],
-        outputs=[
-            chatbot_component,
-            chatbot_component,        # Updated history
         ],
     )
-    # Allow pressing Enter to send the message
-    user_message.submit(
-        fn=handle_response,
         inputs=[
-            user_message,
-            chatbot_component,        # history
             system_message,
             max_tokens,
             temperature,
             top_p,
             frequency_penalty,
             seed,
-            custom_model,
-            featured_model,
-        ],
-        outputs=[
-            chatbot_component,
-            chatbot_component,        # Updated history
         ],
     )
-    # Custom CSS to enhance the UI
-    demo.load(lambda: None, None, None, _js="""
-    () => {
-        const style = document.createElement('style');
-        style.innerHTML = `
-            footer {visibility: hidden !important;}
-            .gradio-container {background-color: #f9f9f9;}
-        `;
-        document.head.appendChild(style);
-    }
     """)
-print("Launching Gradio interface...")  # Debug log
-# Launch the Gradio interface without showing the API or sharing externally
-demo.launch(show_api=False, share=False)

 import gradio as gr
 import os
+from openai import OpenAI
+################################################
+#                INITIAL SETUP
+################################################
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 )
 print("OpenAI client initialized.")
+# Our main response-generating function
 def respond(
+    user_message,
+    history,
     system_message,
     max_tokens,
     temperature,
     top_p,
     frequency_penalty,
     seed,
+    featured_model,
+    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
+    - user_message: the user's new message
+    - history: the list of previous messages, each as [user_text, assistant_text]
     - system_message: the system prompt
     - max_tokens: the maximum number of tokens to generate in the response
     - temperature: sampling temperature
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - featured_model: the user-chosen model from the radio button
+    - custom_model: a user-specified custom model that overrides featured_model if not empty
     """
+    print(f"New user message: {user_message}")
+    print(f"History so far: {history}")
     print(f"System message: {system_message}")
+    print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}")
+    print(f"frequency_penalty: {frequency_penalty}, seed: {seed}")
+    print(f"Featured Model: {featured_model}")
+    print(f"Custom Model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
+    # Determine which model to use
+    # If the user typed something in custom_model, that overrides the featured model
+    # Otherwise we use the model selected in the radio. If neither, default to the example "meta-llama..."
+    model_to_use = None
+    if custom_model.strip():
         model_to_use = custom_model.strip()
+    elif featured_model is not None and featured_model.strip():
+        model_to_use = featured_model.strip()
     else:
+        model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
+    print(f"Model selected for inference: {model_to_use}")
+    # Construct the conversation messages for the HF Inference API
     messages = [{"role": "system", "content": system_message}]
+    for user_text, assistant_text in history:
+        if user_text:
+            messages.append({"role": "user", "content": user_text})
+        if assistant_text:
+            messages.append({"role": "assistant", "content": assistant_text})
+    messages.append({"role": "user", "content": user_message})
+    # We'll collect and stream the response
+    response_so_far = ""
+    # Make the streaming request to the HF Inference API
+    print("Sending request to OpenAI/Hugging Face Inference API...")
+    for message_chunk in client.chat.completions.create(
+        model=model_to_use,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
+        messages=messages,
+    ):
+        # The content for the partial chunk
+        token_text = message_chunk.choices[0].delta.content
+        response_so_far += token_text
+        # Return partial response to Gradio to display in real-time
+        yield response_so_far
     print("Completed response generation.")
+################################################
+#          GRADIO UI + STATE MANAGEMENT
+################################################
+def user_submit(user_message, history):
+    """
+    This function is called when the user sends a message.
+    We simply add the user message to the conversation history.
+    """
+    print("user_submit triggered.")
+    # Append the new user message to history
+    if not history:
+        history = []
+    history = history + [[user_message, None]]
+    return history, ""
+def bot_reply(history, system_message, max_tokens, temperature, top_p,
+              frequency_penalty, seed, featured_model, custom_model):
+    """
+    This function is triggered to produce the bot's response after the user has submitted.
+    We call 'respond' for streaming text.
+    """
+    print("bot_reply triggered.")
+    # The last conversation item has user_message, None
+    user_message = history[-1][0]
+    # We will stream the partial responses from 'respond'
+    bot_response = respond(
+        user_message=user_message,
+        history=history[:-1],  # all items except the last, because we pass the last user msg separately
+        system_message=system_message,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
+        featured_model=featured_model,
+        custom_model=custom_model
+    )
+    # As we yield from the generator, we update the last item in history with the partial response
+    # Gradio streaming logic: yield the partial updates as they come in
+    for partial_text in bot_response:
+        history[-1][1] = partial_text
+        yield history
+# We define a small list of placeholder featured models for demonstration
+models_list = [
+    "meta-llama/Llama-2-13B-Chat-hf",
+    "bigscience/bloom",
+    "EleutherAI/gpt-neo-2.7B",
+    "meta-llama/Llama-3.3-70B-Instruct"
 ]
+def filter_models(search_term):
+    """
+    Filter function triggered when user types in the model_search box.
+    Returns an updated list of models that contain the search term.
+    """
+    filtered = [m for m in models_list if search_term.lower() in m.lower()]
+    return gr.update(choices=filtered)
+################################################
+#        BUILDING THE GRADIO LAYOUT
+################################################
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     gr.Markdown(
         """
+        # Serverless-TextGen-Hub
+        **A UI for text generation using Hugging Face's Inference API.**
+        Below is a simple chat interface. You can pick from **Featured Models** or specify a **Custom Model**
+        to override the choice. If you're not sure, just use the default.
         """
     )
+    # State to hold the conversation history, will be a list of [user, bot]
+    conversation_state = gr.State([])
+    # Row for system message + advanced settings
+    with gr.Accordion("Advanced Settings", open=False):
         system_message = gr.Textbox(
             label="System Message",
+            value="You are a helpful assistant.",
             lines=2,
+            info="Provides background or personality instructions to the model."
+        )
+        max_tokens = gr.Slider(
+            minimum=1,
+            maximum=4096,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        )
+        temperature = gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        )
+        top_p = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P"
+        )
+        frequency_penalty = gr.Slider(
+            minimum=-2.0,
+            maximum=2.0,
+            value=0.0,
+            step=0.1,
+            label="Frequency Penalty"
+        )
+        seed = gr.Slider(
+            minimum=-1,
+            maximum=65535,
+            value=-1,
+            step=1,
+            label="Seed (-1 for random)"
         )
+    # Featured Models + filtering
+    with gr.Accordion("Featured Models", open=False):
+        model_search = gr.Textbox(
+            label="Filter Models",
+            placeholder="Search for a featured model...",
+            lines=1
+        )
+        featured_model_radio = gr.Radio(
+            label="Select a featured model below",
+            choices=models_list,
+            value=models_list[0],  # default selection
+            interactive=True
+        )
+        model_search.change(
+            filter_models,
+            inputs=model_search,
+            outputs=featured_model_radio
         )
+    # This is the Custom Model box (overrides Featured Models if not empty)
+    custom_model = gr.Textbox(
+        label="Custom Model",
+        value="",
+        info="(Optional) Provide a custom HF model path. If not empty, it overrides the Featured Model."
     )
+    # The main Chatbot interface
+    chatbot = gr.Chatbot(height=600)
+    # Textbox for the user to type a new message
+    with gr.Row():
+        user_input = gr.Textbox(
+            show_label=False,
+            placeholder="Type your message here (press enter or click 'Submit')",
+            lines=2
         )
+        submit_btn = gr.Button("Submit", variant="primary")
+    # The user submits -> we update the conversation state
+    submit_btn.click(
+        fn=user_submit,
+        inputs=[user_input, conversation_state],
+        outputs=[conversation_state, user_input],
+    )
+    # Then the bot replies, streaming the output
+    # We pass all required arguments from the advanced settings, plus the model selection boxes
+    submit_btn.click(
+        fn=bot_reply,
         inputs=[
+            conversation_state,
             system_message,
             max_tokens,
             temperature,
             top_p,
             frequency_penalty,
             seed,
+            featured_model_radio,
+            custom_model
         ],
+        outputs=[chatbot],
+        # 'bot_reply' is a generator, so we set streaming=True:
+        queue=True
     )
+    # We also allow pressing Enter in user_input to do the same thing
+    user_input.submit(
+        fn=user_submit,
+        inputs=[user_input, conversation_state],
+        outputs=[conversation_state, user_input],
+    )
+    user_input.submit(
+        fn=bot_reply,
         inputs=[
+            conversation_state,
             system_message,
             max_tokens,
             temperature,
             top_p,
             frequency_penalty,
             seed,
+            featured_model_radio,
+            custom_model
         ],
+        outputs=[chatbot],
+        queue=True
     )
+    gr.HTML("""
+    <br>
+    <p style='text-align:center;'>
+        Developed by <strong>Nymbo</strong>.
+        Powered by <strong>Hugging Face Inference API</strong>.
+    </p>
     """)
+# Finally, launch the app
+if __name__ == "__main__":
+    print("Launching the Serverless-TextGen-Hub application...")
+    demo.launch()