Spaces:

skallewag
/

SEEM

Build error

App Files Files Community

skallewag commited on May 27

Commit

0d3376c

verified ·

1 Parent(s): 060adc0

Update app.py

Browse files

Files changed (1) hide show

app.py +284 -203

app.py CHANGED Viewed

@@ -6,75 +6,103 @@
 # Written by Xueyan Zou ([email protected]), Jianwei Yang ([email protected])
 # --------------------------------------------------------
-# Setup paths and install dependencies before any imports
 import os
 import sys
 import subprocess
-print("Setting up SEEM environment...")
-# Install detectron2 first
-print("Installing detectron2...")
-try:
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "git+https://github.com/MaureenZOU/detectron2-xyz.git"])
-    print("Detectron2 installation complete!")
-except Exception as e:
-    print(f"Error installing detectron2: {e}")
-    sys.exit(1)
-# Fix the distributed.py file if it's causing issues
-if os.path.exists('utils/distributed.py'):
-    with open('utils/distributed.py', 'r') as f:
-        content = f.read()
-    if 'from mpi4py import MPI' in content:
-        print("Patching utils/distributed.py to work without mpi4py")
-        patched_content = content.replace(
-            "from mpi4py import MPI",
-            """try:
-    from mpi4py import MPI
-except ImportError:
-    # Dummy MPI implementation
-    class MPI:
-        class COMM_WORLD:
-            @staticmethod
-            def Get_rank():
-                return 0
-            @staticmethod
-            def Get_size():
-                return 1"""
-        )
-        with open('utils/distributed.py', 'w') as f:
-            f.write(patched_content)
-        print("Patched utils/distributed.py")
-# Ensure the Python path includes the current directory
-current_dir = os.getcwd()
-if current_dir not in sys.path:
-    sys.path.insert(0, current_dir)
-os.environ["PYTHONPATH"] = current_dir
-print(f"Set PYTHONPATH to: {current_dir}")
-# Check if the interactive.py file exists in the tasks directory
-if os.path.exists('tasks') and 'interactive.py' not in os.listdir('tasks'):
-    print("Creating interactive.py in tasks directory")
-    # Check if examples directory exists
-    if not os.path.exists('examples'):
-        os.makedirs('examples', exist_ok=True)
-    # Create a simplified version of interactive.py
-    with open('tasks/interactive.py', 'w') as f:
-        f.write("""
 import torch
 import numpy as np
-import torch.nn.functional as F
-from PIL import Image
 def interactive_infer_image(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
     # Get image dimensions
     img = image['image']
     h, w = img.size[1], img.size[0]
-    # Display a message and a blank mask for debugging
     print("Called interactive_infer_image with tasks:", tasks)
     print("Image size:", img.size)
     if refimg is not None:
@@ -83,121 +111,235 @@ def interactive_infer_image(model, audio_model, image, tasks, refimg=None, reftx
         print("Text:", reftxt)
     if audio_pth:
         print("Audio path:", audio_pth)
-    # Create a simple blank result
     mask = np.zeros((h, w), dtype=np.uint8)
     return Image.fromarray(mask), None
 def interactive_infer_video(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
-    # Just return the input video for debugging
     print("Called interactive_infer_video with tasks:", tasks)
     if video_pth:
         print("Video path:", video_pth)
     return None, video_pth
 """)
-    print("Created interactive.py")
-# Continue with regular imports
-import warnings
-import PIL
-from PIL import Image, ImageDraw
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
-import gradio as gr
-import torch
-import argparse
-import whisper
-import numpy as np
-from gradio import processing_utils
-from modeling.BaseModel import BaseModel
-from modeling import build_model
-from utils.distributed import init_distributed
-from utils.arguments import load_opt_from_config_files
-from utils.constants import COCO_PANOPTIC_CLASSES
-# Import the interactive functions using a try-except block to catch import errors
 try:
-    # First try the original path
-    try:
-        from demo.seem.tasks.interactive import interactive_infer_image, interactive_infer_video
-        print("Successfully imported interactive functions from demo.seem.tasks.interactive")
-    except ImportError:
-        # Try direct import from tasks directory
-        from tasks.interactive import interactive_infer_image, interactive_infer_video
-        print("Successfully imported interactive functions from tasks.interactive")
-except ImportError as e:
-    print(f"Error importing interactive functions: {e}")
-    print("Python path:", sys.path)
-    print("Current directory:", os.getcwd())
-    print("Contents of current directory:", os.listdir('.'))
-    if os.path.exists('tasks'):
-        print("Contents of tasks directory:", os.listdir('tasks'))
     sys.exit(1)
-def parse_option():
-    parser = argparse.ArgumentParser('SEEM Demo', add_help=False)
-    parser.add_argument('--conf_files', default="configs/seem/focall_unicl_lang_demo.yaml", metavar="FILE", help='path to config file', )
-    cfg = parser.parse_args()
-    return cfg
-'''
-build args
-'''
-cfg = parse_option()
-opt = load_opt_from_config_files([cfg.conf_files])
-opt = init_distributed(opt)
-# META DATA
-cur_model = 'None'
-if 'focalt' in cfg.conf_files:
-    pretrained_pth = os.path.join("seem_focalt_v0.pt")
-    if not os.path.exists(pretrained_pth):
-        os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt"))
-    cur_model = 'Focal-T'
-elif 'focal' in cfg.conf_files:
-    pretrained_pth = os.path.join("seem_focall_v0.pt")
-    if not os.path.exists(pretrained_pth):
-        os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt"))
-    cur_model = 'Focal-L'
-'''
-build model
-'''
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-print(f"Using device: {device}")
 try:
-    model = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth).eval().to(device)
-    with torch.no_grad():
-        model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(COCO_PANOPTIC_CLASSES + ["background"], is_eval=True)
-    print("Model loaded successfully")
-    model_loaded = True
 except Exception as e:
-    print(f"Error loading model: {e}")
-    print("Continuing with simplified interface")
     model = None
     model_loaded = False
-'''
-audio
-'''
-try:
-    audio = whisper.load_model("base")
-    audio_loaded = True
-except Exception as e:
-    print(f"Error loading audio model: {e}")
-    audio = None
-    audio_loaded = False
 @torch.no_grad()
 def inference(image, task, *args, **kwargs):
     if not model_loaded:
-        # Return a placeholder image if model failed to load
         warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
         d = ImageDraw.Draw(warning_img)
         d.text((50, 150), "Model could not be loaded.", fill=(255, 0, 0))
-        d.text((50, 200), "Please check logs for details.", fill=(255, 0, 0))
         return warning_img, None
     # Prepare input parameters for the interactive functions
@@ -222,7 +364,6 @@ def inference(image, task, *args, **kwargs):
             return interactive_infer_image(model, audio, image_input, task, refimg, reftxt, audio_pth, video_pth)
     except Exception as e:
         print(f"Error during inference: {e}")
-        import traceback
         traceback.print_exc()
         warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
         d = ImageDraw.Draw(warning_img)
@@ -256,66 +397,6 @@ class Video(gr.components.Video):
     def preprocess(self, x):
         return super().preprocess(x)
-# Now we can check and create example files since we have the necessary imports
-# Check if the example files exist
-if os.path.exists('examples'):
-    example_files = [
-        'corgi1.webp', 'corgi2.jpg', 'river1.png', 'river2.png',
-        'zebras1.jpg', 'zebras2.jpg', 'fries1.png', 'fries2.png',
-        'placeholder.png', 'ref_vase.JPG', 'river1.wav', 'vasedeck.mp4'
-    ]
-    # Check for missing files
-    missing_files = []
-    for file_name in example_files:
-        if not os.path.exists(os.path.join('examples', file_name)):
-            missing_files.append(file_name)
-    # Create any missing files
-    if missing_files:
-        print(f"Creating missing example files: {', '.join(missing_files)}")
-        # Create a placeholder image for image files
-        placeholder_img = Image.new('RGB', (400, 300), color=(240, 240, 240))
-        d = ImageDraw.Draw(placeholder_img)
-        d.text((150, 150), "Placeholder", fill=(0, 0, 0))
-        for file_name in missing_files:
-            file_path = os.path.join('examples', file_name)
-            if file_name.endswith(('.jpg', '.webp', '.png', '.JPG')):
-                placeholder_img.save(file_path)
-            elif file_name.endswith('.wav'):
-                with open(file_path, 'wb') as f:
-                    f.write(b'RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x04\x00\x00\x00\x04\x00\x00\x01\x00\x08\x00data\x00\x00\x00\x00')
-            elif file_name.endswith('.mp4'):
-                with open(file_path, 'wb') as f:
-                    f.write(b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00mp42mp41\x00\x00\x00\x00')
-else:
-    print("Creating examples directory")
-    os.makedirs('examples', exist_ok=True)
-    # Create placeholder files
-    placeholder_img = Image.new('RGB', (400, 300), color=(240, 240, 240))
-    d = ImageDraw.Draw(placeholder_img)
-    d.text((150, 150), "Placeholder", fill=(0, 0, 0))
-    example_files = [
-        'corgi1.webp', 'corgi2.jpg', 'river1.png', 'river2.png',
-        'zebras1.jpg', 'zebras2.jpg', 'fries1.png', 'fries2.png',
-        'placeholder.png', 'ref_vase.JPG'
-    ]
-    for file_name in example_files:
-        file_path = os.path.join('examples', file_name)
-        placeholder_img.save(file_path)
-    with open('examples/river1.wav', 'wb') as f:
-        f.write(b'RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x04\x00\x00\x00\x04\x00\x00\x01\x00\x08\x00data\x00\x00\x00\x00')
-    with open('examples/vasedeck.mp4', 'wb') as f:
-        f.write(b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00mp42mp41\x00\x00\x00\x00')
-    print("Created example files")
 '''
 launch app
 '''
@@ -325,7 +406,7 @@ title = "SEEM: Segment Everything Everywhere All At Once"
 if model_loaded:
     model_status = f"<span style=\"color:green;\">✓ Model loaded successfully</span> (SEEM {cur_model})"
 else:
-    model_status = "<span style=\"color:red;\">✗ Model failed to load</span> (see logs for details)"
 description = f"""
 <div style="text-align: center; font-weight: bold;">
@@ -341,7 +422,7 @@ description = f"""
 </div>
 """
-article = "The Demo is Run on SEEM"
 inputs = [ImageMask(label="[Stroke] Draw on Image",type="pil"), gr.inputs.CheckboxGroup(choices=["Stroke", "Example", "Text", "Audio", "Video", "Panoptic"], type="value", label="Interative Mode"), ImageMask(label="[Example] Draw on Referring Image",type="pil"), gr.Textbox(label="[Text] Referring Text"), gr.Audio(label="[Audio] Referring Audio", source="microphone", type="filepath"), gr.Video(label="[Video] Referring Video Segmentation",format="mp4",interactive=True)]
 gr.Interface(
     fn=inference,
@@ -366,4 +447,4 @@ gr.Interface(
     article=article,
     allow_flagging='never',
     cache_examples=False,
-).launch(share=True)

 # Written by Xueyan Zou ([email protected]), Jianwei Yang ([email protected])
 # --------------------------------------------------------
+# This file is specifically adapted for Hugging Face Spaces deployment
 import os
 import sys
 import subprocess
+import warnings
+import traceback
+from pathlib import Path
+# Log all operations for debugging
+print("Starting SEEM HF Space setup...")
+print(f"Current directory: {os.getcwd()}")
+print(f"Python version: {sys.version}")
+# Make sure utils directory exists
+os.makedirs('utils', exist_ok=True)
+print("Created utils directory if it didn't exist")
+# Create a custom distributed.py without mpi4py dependency
+with open('utils/distributed.py', 'w') as f:
+    f.write("""# Custom distributed.py without mpi4py dependency
+import os
 import torch
+import torch.distributed as dist
+class MPI:
+    class COMM_WORLD:
+        @staticmethod
+        def Get_rank():
+            return 0
+        @staticmethod
+        def Get_size():
+            return 1
+        @staticmethod
+        def bcast(data, root=0):
+            return data
+        @staticmethod
+        def barrier():
+            pass
+def apply_distributed(opt):
+    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    opt.rank = 0
+    opt.world_size = 1
+    opt.gpu = 0
+    return opt
+def init_distributed(opt=None):
+    if opt is not None:
+        opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        opt.rank = 0
+        opt.world_size = 1
+        opt.gpu = 0
+        return opt
+    return None
+def get_rank():
+    return 0
+def get_world_size():
+    return 1
+def is_main_process():
+    return True
+def synchronize():
+    pass
+def all_gather(data):
+    return [data]
+def reduce_dict(input_dict, average=True):
+    return input_dict
+""")
+print("Created custom distributed.py")
+# Ensure examples directory exists
+os.makedirs('examples', exist_ok=True)
+print("Created examples directory if it didn't exist")
+# Create a minimal interactive.py in tasks directory
+os.makedirs('tasks', exist_ok=True)
+with open('tasks/interactive.py', 'w') as f:
+    f.write("""
 import numpy as np
+from PIL import Image, ImageDraw
 def interactive_infer_image(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
     # Get image dimensions
     img = image['image']
     h, w = img.size[1], img.size[0]
+    # Display a message and create a simple mask for demonstration
     print("Called interactive_infer_image with tasks:", tasks)
     print("Image size:", img.size)
     if refimg is not None:
         print("Text:", reftxt)
     if audio_pth:
         print("Audio path:", audio_pth)
+    # Create a simple circle mask in the center
     mask = np.zeros((h, w), dtype=np.uint8)
+    center_x, center_y = w//2, h//2
+    radius = min(w, h) // 4
+    for y in range(h):
+        for x in range(w):
+            if ((x - center_x)**2 + (y - center_y)**2) < radius**2:
+                mask[y, x] = 255
     return Image.fromarray(mask), None
 def interactive_infer_video(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
+    # Just return the input video for demonstration
     print("Called interactive_infer_video with tasks:", tasks)
     if video_pth:
         print("Video path:", video_pth)
     return None, video_pth
 """)
+print("Created simplified interactive.py")
+# Create some example placeholder files
+example_files = [
+    'corgi1.webp', 'corgi2.jpg', 'river1.png', 'river2.png',
+    'zebras1.jpg', 'zebras2.jpg', 'fries1.png', 'fries2.png',
+    'placeholder.png', 'ref_vase.JPG'
+]
+placeholder_img = None
+try:
+    from PIL import Image, ImageDraw
+    placeholder_img = Image.new('RGB', (400, 300), color=(240, 240, 240))
+    d = ImageDraw.Draw(placeholder_img)
+    d.text((150, 150), "Placeholder", fill=(0, 0, 0))
+except Exception as e:
+    print(f"Error creating placeholder image: {e}")
+for file_name in example_files:
+    file_path = os.path.join('examples', file_name)
+    if not os.path.exists(file_path) and placeholder_img is not None:
+        try:
+            placeholder_img.save(file_path)
+            print(f"Created {file_path}")
+        except Exception as e:
+            print(f"Error creating {file_path}: {e}")
+# Create dummy audio/video files if needed
+if not os.path.exists('examples/river1.wav'):
+    try:
+        with open('examples/river1.wav', 'wb') as f:
+            f.write(b'RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x04\x00\x00\x00\x04\x00\x00\x01\x00\x08\x00data\x00\x00\x00\x00')
+        print("Created dummy audio file")
+    except Exception as e:
+        print(f"Error creating dummy audio file: {e}")
+if not os.path.exists('examples/vasedeck.mp4'):
+    try:
+        with open('examples/vasedeck.mp4', 'wb') as f:
+            f.write(b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00mp42mp41\x00\x00\x00\x00')
+        print("Created dummy video file")
+    except Exception as e:
+        print(f"Error creating dummy video file: {e}")
+# Continue with regular imports
+print("Importing required libraries...")
 try:
+    import PIL
+    from PIL import Image, ImageDraw
+    import gradio as gr
+    import torch
+    import argparse
+    import numpy as np
+    from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
+    from gradio import processing_utils
+    print("Basic imports successful")
+except Exception as e:
+    print(f"Error importing basic libraries: {e}")
+    traceback.print_exc()
     sys.exit(1)
+# Try to import specialized libraries but handle their absence gracefully
+try:
+    import whisper
+    audio_loaded = True
+    print("Whisper loaded successfully")
+except Exception as e:
+    print(f"Error loading whisper: {e}")
+    audio_loaded = False
+# Global flags for model status
+model_loaded = False
+audio_loaded = audio_loaded if 'audio_loaded' in locals() else False
+interactive_functions_imported = False
+# Dummy constants if not available
+try:
+    from utils.constants import COCO_PANOPTIC_CLASSES
+    print("Loaded COCO_PANOPTIC_CLASSES")
+except ImportError:
+    print("Creating dummy COCO_PANOPTIC_CLASSES")
+    COCO_PANOPTIC_CLASSES = ["person", "cat", "dog", "car", "bicycle", "umbrella", "tree", "sky", "building"]
+# Try to import the model but handle failures gracefully
 try:
+    # Attempt to import specialized modules but don't fail if they're not available
+    try:
+        from modeling.BaseModel import BaseModel
+        from modeling import build_model
+        from utils.distributed import init_distributed
+        from utils.arguments import load_opt_from_config_files
+        print("Model imports successful")
+        # Try to import interactive functions
+        try:
+            from tasks.interactive import interactive_infer_image, interactive_infer_video
+            print("Successfully imported interactive functions from tasks.interactive")
+            interactive_functions_imported = True
+        except ImportError as e:
+            print(f"Error importing interactive functions: {e}")
+            interactive_functions_imported = False
+        # Try to set up the model
+        try:
+            parser = argparse.ArgumentParser('SEEM Demo', add_help=False)
+            parser.add_argument('--conf_files', default="configs/seem/focall_unicl_lang_demo.yaml", metavar="FILE", help='path to config file')
+            cfg = parser.parse_args()
+            opt = load_opt_from_config_files([cfg.conf_files])
+            opt = init_distributed(opt)
+            # META DATA
+            cur_model = 'None'
+            pretrained_pth = None
+            if 'focalt' in cfg.conf_files:
+                pretrained_pth = os.path.join("seem_focalt_v0.pt")
+                if not os.path.exists(pretrained_pth):
+                    print(f"Downloading model file {pretrained_pth}...")
+                    os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt"))
+                cur_model = 'Focal-T'
+            elif 'focal' in cfg.conf_files:
+                pretrained_pth = os.path.join("seem_focall_v0.pt")
+                if not os.path.exists(pretrained_pth):
+                    print(f"Downloading model file {pretrained_pth}...")
+                    os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt"))
+                cur_model = 'Focal-L'
+            if pretrained_pth and os.path.exists(pretrained_pth):
+                device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+                print(f"Using device: {device}")
+                model = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth).eval().to(device)
+                with torch.no_grad():
+                    model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(COCO_PANOPTIC_CLASSES + ["background"], is_eval=True)
+                print("Model loaded successfully")
+                model_loaded = True
+            else:
+                print(f"Model file not found: {pretrained_pth}")
+                model = None
+                model_loaded = False
+        except Exception as e:
+            print(f"Error setting up model: {e}")
+            traceback.print_exc()
+            model = None
+            model_loaded = False
+    except Exception as e:
+        print(f"Error during model import: {e}")
+        traceback.print_exc()
+        model = None
+        model_loaded = False
 except Exception as e:
+    print(f"Error during model setup: {e}")
+    traceback.print_exc()
     model = None
     model_loaded = False
+# If interactive functions weren't imported, define dummy versions
+if not interactive_functions_imported:
+    print("Creating dummy interactive functions")
+    def interactive_infer_image(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
+        # Create a simple circle mask in the center
+        img = image['image']
+        h, w = img.size[1], img.size[0]
+        mask = np.zeros((h, w), dtype=np.uint8)
+        center_x, center_y = w//2, h//2
+        radius = min(w, h) // 4
+        for y in range(h):
+            for x in range(w):
+                if ((x - center_x)**2 + (y - center_y)**2) < radius**2:
+                    mask[y, x] = 255
+        return Image.fromarray(mask), None
+    def interactive_infer_video(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
+        return None, video_pth
+# Inference function
 @torch.no_grad()
 def inference(image, task, *args, **kwargs):
     if not model_loaded:
+        # Return a placeholder image with an informative message
+        print("Model not loaded, returning placeholder image")
+        # Generate a simple mask based on the image size
+        if image is not None:
+            try:
+                h, w = image.size[1], image.size[0]
+                mask = np.zeros((h, w), dtype=np.uint8)
+                # Add a simple shape to the mask for demonstration
+                center_x, center_y = w//2, h//2
+                radius = min(w, h) // 4
+                for y in range(h):
+                    for x in range(w):
+                        if ((x - center_x)**2 + (y - center_y)**2) < radius**2:
+                            mask[y, x] = 255
+                return Image.fromarray(mask), None
+            except Exception as e:
+                print(f"Error creating demo mask: {e}")
+                warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
+                d = ImageDraw.Draw(warning_img)
+                d.text((50, 150), "Model could not be loaded.", fill=(255, 0, 0))
+                d.text((50, 200), "Using simplified interface for demonstration.", fill=(255, 0, 0))
+                return warning_img, None
         warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
         d = ImageDraw.Draw(warning_img)
         d.text((50, 150), "Model could not be loaded.", fill=(255, 0, 0))
+        d.text((50, 200), "Using simplified interface for demonstration.", fill=(255, 0, 0))
         return warning_img, None
     # Prepare input parameters for the interactive functions
             return interactive_infer_image(model, audio, image_input, task, refimg, reftxt, audio_pth, video_pth)
     except Exception as e:
         print(f"Error during inference: {e}")
         traceback.print_exc()
         warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
         d = ImageDraw.Draw(warning_img)
     def preprocess(self, x):
         return super().preprocess(x)
 '''
 launch app
 '''
 if model_loaded:
     model_status = f"<span style=\"color:green;\">✓ Model loaded successfully</span> (SEEM {cur_model})"
 else:
+    model_status = "<span style=\"color:orange;\">⚠ Running in demonstration mode</span> (model not loaded)"
 description = f"""
 <div style="text-align: center; font-weight: bold;">
 </div>
 """
+article = "SEEM Demo" + (" (Simplified Interface)" if not model_loaded else "")
 inputs = [ImageMask(label="[Stroke] Draw on Image",type="pil"), gr.inputs.CheckboxGroup(choices=["Stroke", "Example", "Text", "Audio", "Video", "Panoptic"], type="value", label="Interative Mode"), ImageMask(label="[Example] Draw on Referring Image",type="pil"), gr.Textbox(label="[Text] Referring Text"), gr.Audio(label="[Audio] Referring Audio", source="microphone", type="filepath"), gr.Video(label="[Video] Referring Video Segmentation",format="mp4",interactive=True)]
 gr.Interface(
     fn=inference,
     article=article,
     allow_flagging='never',
     cache_examples=False,
+).launch()