Spaces:

KingNish
/

Bagel-7B-Demo

Paused

App Files Files Community

KingNish commited on May 26

Commit

397bb2f

verified ·

1 Parent(s): 9432b24

by gemini

Browse files

Files changed (1) hide show

app.py +228 -193

app.py CHANGED Viewed

@@ -178,13 +178,13 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
     result = {"text": "", "image": None}
     # Call inferencer with or without think parameter based on user choice
     for i in inferencer(text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
-        print(type(i))
         if type(i) == str:
             result["text"] += i
         else:
             result["image"] = i
-        yield result["image"], result.get("text", None)
 # Image Understanding function with thinking option and hyperparameters
@@ -192,7 +192,8 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
 def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
                         do_sample=False, text_temperature=0.3, max_new_tokens=512):
     if image is None:
-        return "Please upload an image."
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
@@ -203,22 +204,24 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
     inference_hyper = dict(
         do_sample=do_sample,
         temperature=text_temperature,
-        max_think_token_n=max_new_tokens, # Set max_length
     )
-    result = {"text": "", "image": None}
     # Use show_thinking parameter to control thinking process
     for i in inferencer(image=image, text=prompt, think=show_thinking,
                         understanding_output=True, **inference_hyper):
         if type(i) == str:
-            result["text"] += i
-        else:
-            result["image"] = i
-        yield result["text"]
 # Image Editing function with thinking option and hyperparameters
-@spaces.GPU(duration=120)
 def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_scale=4.0,
               cfg_img_scale=2.0, cfg_interval=0.0,
               timestep_shift=3.0, num_timesteps=50, cfg_renorm_min=1.0,
@@ -228,7 +231,8 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
     set_seed(seed)
     if image is None:
-        return "Please upload an image.", ""
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
@@ -257,7 +261,7 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
         else:
             result["image"] = i
-        yield result["image"], result.get("text", "")
 # Helper function to load example images
 def load_example_image(image_path):
@@ -267,201 +271,232 @@ def load_example_image(image_path):
         print(f"Error loading example image: {e}")
         return None
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("""
-<div>
-  <img src="https://lf3-static.bytednsdoc.com/obj/eden-cn/nuhojubrps/banner.png" alt="BAGEL" width="380"/>
-</div>
-""")
-    with gr.Tab("📝 Text to Image"):
-        txt_input = gr.Textbox(
-            label="Prompt",
-            value="A female cosplayer portraying an ethereal fairy or elf, wearing a flowing dress made of delicate fabrics in soft, mystical colors like emerald green and silver. She has pointed ears, a gentle, enchanting expression, and her outfit is adorned with sparkling jewels and intricate patterns. The background is a magical forest with glowing plants, mystical creatures, and a serene atmosphere."
-        )
         with gr.Row():
-            show_thinking = gr.Checkbox(label="Thinking", value=False)
-        # Add hyperparameter controls in an accordion
-        with gr.Accordion("Inference Hyperparameters", open=False):
-            # 参数一排两个布局
-            with gr.Group():
-                with gr.Row():
-                    seed = gr.Slider(minimum=0, maximum=1000000, value=0, step=1,
-                                   label="Seed", info="0 for random seed, positive for reproducible results")
-                    image_ratio = gr.Dropdown(choices=["1:1", "4:3", "3:4", "16:9", "9:16"],
-                                                value="1:1", label="Image Ratio",
-                                                info="The longer size is fixed to 1024")
-                with gr.Row():
-                    cfg_text_scale = gr.Slider(minimum=1.0, maximum=8.0, value=4.0, step=0.1, interactive=True,
-                                             label="CFG Text Scale", info="Controls how strongly the model follows the text prompt (4.0-8.0)")
-                    cfg_interval = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.1,
-                                           label="CFG Interval", info="Start of CFG application interval (end is fixed at 1.0)")
-                with gr.Row():
-                    cfg_renorm_type = gr.Dropdown(choices=["global", "local", "text_channel"],
-                                                value="global", label="CFG Renorm Type",
-                                                info="If the genrated image is blurry, use 'global'")
-                    cfg_renorm_min = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, interactive=True,
-                                             label="CFG Renorm Min", info="1.0 disables CFG-Renorm")
-                with gr.Row():
-                    num_timesteps = gr.Slider(minimum=10, maximum=100, value=50, step=5, interactive=True,
-                                            label="Timesteps", info="Total denoising steps")
-                    timestep_shift = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, interactive=True,
-                                             label="Timestep Shift", info="Higher values for layout, lower for details")
-                # Thinking parameters in a single row
-                thinking_params = gr.Group(visible=False)
-                with thinking_params:
-                    with gr.Row():
-                        do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
-                        max_think_token_n = gr.Slider(minimum=64, maximum=4006, value=1024, step=64, interactive=True,
-                                                    label="Max Think Tokens", info="Maximum number of tokens for thinking")
-                        text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, interactive=True,
-                                                  label="Temperature", info="Controls randomness in text generation")
-        thinking_output = gr.Textbox(label="Thinking Process", visible=False)
-        img_output = gr.Image(label="Generated Image")
-        gen_btn = gr.Button("Generate")
-        # Dynamically show/hide thinking process box and parameters
-        def update_thinking_visibility(show):
-            return gr.update(visible=show), gr.update(visible=show)
-        show_thinking.change(
-            fn=update_thinking_visibility,
-            inputs=[show_thinking],
-            outputs=[thinking_output, thinking_params]
-        )
-        gen_btn.click(
-            fn=text_to_image,
-            inputs=[
-                txt_input, show_thinking, cfg_text_scale,
-                cfg_interval, timestep_shift,
-                num_timesteps, cfg_renorm_min, cfg_renorm_type,
-                max_think_token_n, do_sample, text_temperature, seed, image_ratio
-            ],
-            outputs=[img_output, thinking_output]
-        )
-    with gr.Tab("🖌️ Image Edit"):
         with gr.Row():
-            with gr.Column(scale=1):
-                edit_image_input = gr.Image(label="Input Image", value=load_example_image('test_images/women.jpg'))
-                edit_prompt = gr.Textbox(
-                    label="Prompt",
-                    value="She boards a modern subway, quietly reading a folded newspaper, wearing the same clothes."
-                )
-            with gr.Column(scale=1):
-                edit_image_output = gr.Image(label="Result")
-                edit_thinking_output = gr.Textbox(label="Thinking Process", visible=False)
         with gr.Row():
-            edit_show_thinking = gr.Checkbox(label="Thinking", value=False)
-        # Add hyperparameter controls in an accordion
-        with gr.Accordion("Inference Hyperparameters", open=False):
-            with gr.Group():
-                with gr.Row():
-                    edit_seed = gr.Slider(minimum=0, maximum=1000000, value=0, step=1, interactive=True,
-                                        label="Seed", info="0 for random seed, positive for reproducible results")
-                    edit_cfg_text_scale = gr.Slider(minimum=1.0, maximum=8.0, value=4.0, step=0.1, interactive=True,
-                                                  label="CFG Text Scale", info="Controls how strongly the model follows the text prompt")
-                with gr.Row():
-                    edit_cfg_img_scale = gr.Slider(minimum=1.0, maximum=4.0, value=2.0, step=0.1, interactive=True,
-                                                 label="CFG Image Scale", info="Controls how much the model preserves input image details")
-                    edit_cfg_interval = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, interactive=True,
-                                                label="CFG Interval", info="Start of CFG application interval (end is fixed at 1.0)")
-                with gr.Row():
-                    edit_cfg_renorm_type = gr.Dropdown(choices=["global", "local", "text_channel"],
-                                                     value="text_channel", label="CFG Renorm Type",
-                                                     info="If the genrated image is blurry, use 'global")
-                    edit_cfg_renorm_min = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, interactive=True,
-                                                  label="CFG Renorm Min", info="1.0 disables CFG-Renorm")
-                with gr.Row():
-                    edit_num_timesteps = gr.Slider(minimum=10, maximum=100, value=50, step=5, interactive=True,
-                                                 label="Timesteps", info="Total denoising steps")
-                    edit_timestep_shift = gr.Slider(minimum=1.0, maximum=10.0, value=3.0, step=0.5, interactive=True,
-                                                  label="Timestep Shift", info="Higher values for layout, lower for details")
-                # Thinking parameters in a single row
-                edit_thinking_params = gr.Group(visible=False)
-                with edit_thinking_params:
-                    with gr.Row():
-                        edit_do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
-                        edit_max_think_token_n = gr.Slider(minimum=64, maximum=4006, value=1024, step=64, interactive=True,
-                                                         label="Max Think Tokens", info="Maximum number of tokens for thinking")
-                        edit_text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, interactive=True,
-                                                        label="Temperature", info="Controls randomness in text generation")
-        edit_btn = gr.Button("Submit")
-        # Dynamically show/hide thinking process box for editing
-        def update_edit_thinking_visibility(show):
-            return gr.update(visible=show), gr.update(visible=show)
-        edit_show_thinking.change(
-            fn=update_edit_thinking_visibility,
-            inputs=[edit_show_thinking],
-            outputs=[edit_thinking_output, edit_thinking_params]
-        )
-        edit_btn.click(
-            fn=edit_image,
-            inputs=[
-                edit_image_input, edit_prompt, edit_show_thinking,
-                edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
-                edit_timestep_shift, edit_num_timesteps,
-                edit_cfg_renorm_min, edit_cfg_renorm_type,
-                edit_max_think_token_n, edit_do_sample, edit_text_temperature, edit_seed
-            ],
-            outputs=[edit_image_output, edit_thinking_output]
-        )
-    with gr.Tab("🖼️ Image Understanding"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                img_input = gr.Image(label="Input Image", value=load_example_image('test_images/meme.jpg'))
-                understand_prompt = gr.Textbox(
-                    label="Prompt",
-                    value="Can someone explain what's funny about this meme??"
-                )
-            with gr.Column(scale=1):
-                txt_output = gr.Textbox(label="Result", lines=20)
-        with gr.Row():
-            understand_show_thinking = gr.Checkbox(label="Thinking", value=False)
-        # Add hyperparameter controls in an accordion
-        with gr.Accordion("Inference Hyperparameters", open=False):
-            with gr.Row():
-                understand_do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
-                understand_text_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True,
-                                                     label="Temperature", info="Controls randomness in text generation (0=deterministic, 1=creative)")
-                understand_max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=512, step=64, interactive=True,
-                                                   label="Max New Tokens", info="Maximum length of generated text, including potential thinking")
-        img_understand_btn = gr.Button("Submit")
-        img_understand_btn.click(
-            fn=image_understanding,
-            inputs=[
-                img_input, understand_prompt, understand_show_thinking,
-                understand_do_sample, understand_text_temperature, understand_max_new_tokens
-            ],
-            outputs=txt_output
-        )
 demo.launch()

     result = {"text": "", "image": None}
     # Call inferencer with or without think parameter based on user choice
     for i in inferencer(text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
+        # print(type(i)) # For debugging stream
         if type(i) == str:
             result["text"] += i
         else:
             result["image"] = i
+        yield result["image"], result.get("text", "")
 # Image Understanding function with thinking option and hyperparameters
 def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
                         do_sample=False, text_temperature=0.3, max_new_tokens=512):
     if image is None:
+        yield "Please upload an image for understanding."
+        return
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     inference_hyper = dict(
         do_sample=do_sample,
         temperature=text_temperature,
+        max_think_token_n=max_new_tokens, # Set max_length for text generation
     )
+    result_text = ""
     # Use show_thinking parameter to control thinking process
     for i in inferencer(image=image, text=prompt, think=show_thinking,
                         understanding_output=True, **inference_hyper):
         if type(i) == str:
+            result_text += i
+            yield result_text
+        # else: This branch seems unused in original, as understanding_output=True typically yields text.
+        #      If it yielded image, it would be an intermediate. For final output, it's text.
+        #      For now, we assume it only yields text.
+    yield result_text # Ensure final text is yielded
 # Image Editing function with thinking option and hyperparameters
+@spaces.GPU(duration=90)
 def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_scale=4.0,
               cfg_img_scale=2.0, cfg_interval=0.0,
               timestep_shift=3.0, num_timesteps=50, cfg_renorm_min=1.0,
     set_seed(seed)
     if image is None:
+        yield None, "Please upload an image for editing." # Yield tuple for image/text
+        return
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
         else:
             result["image"] = i
+        yield result["image"], result.get("text", "") # Yield tuple for image/text
 # Helper function to load example images
 def load_example_image(image_path):
         print(f"Error loading example image: {e}")
         return None
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("""
+    <div>
+      <img src="https://lf3-static.bytednsdoc.com/obj/eden-cn/nuhojubrps/banner.png" alt="BAGEL" width="380"/>
+    </div>
+    # BAGEL Multimodal Chatbot
+    Interact with BAGEL to generate images from text, edit existing images, or understand image content.
+    """)
+    # Chatbot display area
+    chatbot = gr.Chatbot(label="Chat History", height=500, avatar_images=(None, "https://lf3-static.bytednsdoc.com/obj/eden-cn/nuhojubrps/BAGEL_favicon.png"))
+    # Input area
+    with gr.Row():
+        image_input = gr.Image(type="pil", label="Optional: Upload an Image (for Image Understanding/Edit)", scale=0.5, value=None)
+        with gr.Column(scale=1.5):
+            user_prompt = gr.Textbox(label="Your Message", placeholder="Type your prompt here...", lines=3)
+            with gr.Row():
+                mode_selector = gr.Radio(
+                    choices=["Text to Image", "Image Understanding", "Image Edit"],
+                    value="Text to Image",
+                    label="Select Mode",
+                    interactive=True
+                )
+                submit_btn = gr.Button("Send", variant="primary")
+    # Global/Shared Hyperparameters
+    with gr.Accordion("General Settings & Hyperparameters", open=False) as general_accordion:
         with gr.Row():
+            show_thinking_global = gr.Checkbox(label="Show Thinking Process", value=False, info="Enable to see model's intermediate thinking text.")
+            seed_global = gr.Slider(minimum=0, maximum=1000000, value=0, step=1, label="Seed", info="0 for random seed, positive for reproducible results.")
+        # Container for thinking-specific parameters, visibility controlled by show_thinking_global
+        thinking_params_container = gr.Group(visible=False)
+        with thinking_params_container:
+            gr.Markdown("#### Thinking Process Parameters (affect text generation)")
+            with gr.Row():
+                common_do_sample = gr.Checkbox(label="Enable Sampling", value=False, info="Enable sampling for text generation (otherwise greedy).")
+                common_text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="Text Temperature", info="Controls randomness in text generation (higher = more random).")
+                common_max_think_token_n = gr.Slider(minimum=64, maximum=4096, value=1024, step=64, label="Max Think Tokens / Max New Tokens", info="Maximum number of tokens for thinking (T2I/Edit) or generated text (Understanding).")
+    # T2I Hyperparameters
+    t2i_params_accordion = gr.Accordion("Text to Image Specific Parameters", open=False)
+    with t2i_params_accordion:
+        gr.Markdown("#### Text to Image Parameters")
+        with gr.Row():
+            t2i_image_ratio = gr.Dropdown(choices=["1:1", "4:3", "3:4", "16:9", "9:16"], value="1:1", label="Image Ratio", info="The longer size is fixed to 1024 pixels.")
+        with gr.Row():
+            t2i_cfg_text_scale = gr.Slider(minimum=1.0, maximum=8.0, value=4.0, step=0.1, label="CFG Text Scale", info="Controls how strongly the model follows the text prompt (4.0-8.0 recommended).")
+            t2i_cfg_interval = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.1, label="CFG Interval", info="Start of Classifier-Free Guidance application interval (end is fixed at 1.0).")
+        with gr.Row():
+            t2i_cfg_renorm_type = gr.Dropdown(choices=["global", "local", "text_channel"], value="global", label="CFG Renorm Type", info="Normalization type for CFG. Use 'global' if the generated image is blurry.")
+            t2i_cfg_renorm_min = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="CFG Renorm Min", info="Minimum value for CFG Renormalization (1.0 disables CFG-Renorm).")
+        with gr.Row():
+            t2i_num_timesteps = gr.Slider(minimum=10, maximum=100, value=50, step=5, label="Timesteps", info="Total denoising steps for image generation.")
+            t2i_timestep_shift = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Timestep Shift", info="Higher values for layout control, lower for fine details.")
+    # Image Edit Hyperparameters
+    edit_params_accordion = gr.Accordion("Image Edit Specific Parameters", open=False)
+    with edit_params_accordion:
+        gr.Markdown("#### Image Edit Parameters")
         with gr.Row():
+            edit_cfg_text_scale = gr.Slider(minimum=1.0, maximum=8.0, value=4.0, step=0.1, label="CFG Text Scale", info="Controls how strongly the model follows the text prompt for editing.")
+            edit_cfg_img_scale = gr.Slider(minimum=1.0, maximum=4.0, value=2.0, step=0.1, label="CFG Image Scale", info="Controls how much the model preserves input image details during editing.")
+        with gr.Row():
+            edit_cfg_interval = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="CFG Interval", info="Start of CFG application interval for editing (end is fixed at 1.0).")
+            edit_cfg_renorm_type = gr.Dropdown(choices=["global", "local", "text_channel"], value="text_channel", label="CFG Renorm Type", info="Normalization type for CFG during editing. Use 'global' if output is blurry.")
+        with gr.Row():
+            edit_cfg_renorm_min = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="CFG Renorm Min", info="Minimum value for CFG Renormalization during editing (1.0 disables CFG-Renorm).")
         with gr.Row():
+            edit_num_timesteps = gr.Slider(minimum=10, maximum=100, value=50, step=5, label="Timesteps", info="Total denoising steps for image editing.")
+            edit_timestep_shift = gr.Slider(minimum=1.0, maximum=10.0, value=3.0, step=0.5, label="Timestep Shift", info="Higher values for layout control, lower for fine details during editing.")
+    # Main chat processing function
+    @spaces.GPU(duration=90) # Apply GPU decorator to the combined function
+    def process_chat_message(history, prompt, uploaded_image, mode,
+                             show_thinking_global_val, seed_global_val,
+                             common_do_sample_val, common_text_temperature_val, common_max_think_token_n_val,
+                             t2i_cfg_text_scale_val, t2i_cfg_interval_val, t2i_timestep_shift_val,
+                             t2i_num_timesteps_val, t2i_cfg_renorm_min_val, t2i_cfg_renorm_type_val,
+                             t2i_image_ratio_val,
+                             edit_cfg_text_scale_val, edit_cfg_img_scale_val, edit_cfg_interval_val,
+                             edit_timestep_shift_val, edit_num_timesteps_val, edit_cfg_renorm_min_val,
+                             edit_cfg_renorm_type_val):
+        # Append user message to history
+        history.append([prompt, None])
+        # Define common parameters for inference functions
+        common_infer_params = dict(
+            show_thinking=show_thinking_global_val,
+            do_sample=common_do_sample_val,
+            text_temperature=common_text_temperature_val,
+        )
+        try:
+            if mode == "Text to Image":
+                # Add T2I specific parameters, including max_think_token_n and seed
+                t2i_params = {
+                    **common_infer_params,
+                    "max_think_token_n": common_max_think_token_n_val,
+                    "seed": seed_global_val,
+                    "cfg_text_scale": t2i_cfg_text_scale_val,
+                    "cfg_interval": t2i_cfg_interval_val,
+                    "timestep_shift": t2i_timestep_shift_val,
+                    "num_timesteps": t2i_num_timesteps_val,
+                    "cfg_renorm_min": t2i_cfg_renorm_min_val,
+                    "cfg_renorm_type": t2i_cfg_renorm_type_val,
+                    "image_ratio": t2i_image_ratio_val,
+                }
+                for img, txt in text_to_image(
+                    prompt=prompt,
+                    **t2i_params
+                ):
+                    # For Text to Image, yield image first, then thinking text (if available)
+                    if img is not None:
+                        history[-1] = [prompt, (img, txt)]
+                    elif txt: # Only update text if image is not ready yet
+                        history[-1] = [prompt, txt]
+                    yield history, gr.update(value="") # Update chatbot and clear input
+            elif mode == "Image Understanding":
+                if uploaded_image is None:
+                    history[-1] = [prompt, "Please upload an image for Image Understanding."]
+                    yield history, gr.update(value="")
+                    return
+                # Add Understanding specific parameters (max_new_tokens maps to common_max_think_token_n)
+                # Note: seed is not used in image_understanding
+                understand_params = {
+                    **common_infer_params,
+                    "max_new_tokens": common_max_think_token_n_val,
+                }
+                # Remove seed from parameters as it's not used by image_understanding
+                understand_params.pop('seed', None)
+                for txt in image_understanding(
+                    image=uploaded_image,
+                    prompt=prompt,
+                    **understand_params
+                ):
+                    history[-1] = [prompt, txt]
+                    yield history, gr.update(value="")
+            elif mode == "Image Edit":
+                if uploaded_image is None:
+                    history[-1] = [prompt, "Please upload an image for Image Editing."]
+                    yield history, gr.update(value="")
+                    return
+                # Add Edit specific parameters, including max_think_token_n and seed
+                edit_params = {
+                    **common_infer_params,
+                    "max_think_token_n": common_max_think_token_n_val,
+                    "seed": seed_global_val,
+                    "cfg_text_scale": edit_cfg_text_scale_val,
+                    "cfg_img_scale": edit_cfg_img_scale_val,
+                    "cfg_interval": edit_cfg_interval_val,
+                    "timestep_shift": edit_timestep_shift_val,
+                    "num_timesteps": edit_num_timesteps_val,
+                    "cfg_renorm_min": edit_cfg_renorm_min_val,
+                    "cfg_renorm_type": edit_cfg_renorm_type_val,
+                }
+                for img, txt in edit_image(
+                    image=uploaded_image,
+                    prompt=prompt,
+                    **edit_params
+                ):
+                    # For Image Edit, yield image first, then thinking text (if available)
+                    if img is not None:
+                        history[-1] = [prompt, (img, txt)]
+                    elif txt: # Only update text if image is not ready yet
+                        history[-1] = [prompt, txt]
+                    yield history, gr.update(value="")
+        except Exception as e:
+            history[-1] = [prompt, f"An error occurred: {e}"]
+            yield history, gr.update(value="") # Update history with error and clear input
+    # Event handlers for dynamic UI updates and submission
+    # Control visibility of thinking parameters
+    show_thinking_global.change(
+        fn=lambda x: gr.update(visible=x),
+        inputs=[show_thinking_global],
+        outputs=[thinking_params_container]
+    )
+    # Clear image input if mode switches to Text to Image
+    mode_selector.change(
+        fn=lambda mode: gr.update(value=None) if mode == "Text to Image" else gr.update(),
+        inputs=[mode_selector],
+        outputs=[image_input]
+    )
+    # List of all input components whose values are passed to process_chat_message
+    inputs_list = [
+        chatbot, user_prompt, image_input, mode_selector,
+        show_thinking_global, seed_global,
+        common_do_sample, common_text_temperature, common_max_think_token_n,
+        t2i_cfg_text_scale, t2i_cfg_interval, t2i_timestep_shift,
+        t2i_num_timesteps, t2i_cfg_renorm_min, t2i_cfg_renorm_type,
+        t2i_image_ratio,
+        edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
+        edit_timestep_shift, edit_num_timesteps, edit_cfg_renorm_min,
+        edit_cfg_renorm_type
+    ]
+    # Link submit button and text input 'Enter' key to the processing function
+    submit_btn.click(
+        fn=process_chat_message,
+        inputs=inputs_list,
+        outputs=[chatbot, user_prompt],
+        scroll_to_output=True,
+        queue=False, # Set to True if long generation times cause issues, but might affect responsiveness
+    )
+    user_prompt.submit( # Allows pressing Enter in textbox to submit
+        fn=process_chat_message,
+        inputs=inputs_list,
+        outputs=[chatbot, user_prompt],
+        scroll_to_output=True,
+        queue=False,
+    )
 demo.launch()