Spaces:

kadirnar
/

Video-Diffusion-WebUI

Runtime error

App Files Files Community

kadirnar commited on Mar 20, 2023

Commit

debc5f3

1 Parent(s): 62f6167

update

Browse files

Files changed (6) hide show

app.py +2 -2
inpaint_zoom/app/zoom_in_app.py +162 -149
inpaint_zoom/app/zoom_out_app.py +36 -34
inpaint_zoom/zoom_out_app.py +0 -154
inpaint_zoom/zoom_out_utils.py +0 -45
utils.py +0 -45

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from inpaint_zoom.app.zoom_out_app import stable_diffusion_zoom_out_app
-from inpaint_zoom.app.zoom_in_app import stable_diffusion_zoom_in_app
 import gradio as gr
@@ -23,7 +23,7 @@ with app:
     with gr.Row():
         with gr.Column():
             with gr.Tab('Zoom In'):
-                stable_diffusion_zoom_in_app()
             with gr.Tab('Zoom Out'):
                 stable_diffusion_zoom_out_app()

 from inpaint_zoom.app.zoom_out_app import stable_diffusion_zoom_out_app
+from inpaint_zoom.app.zoom_in_app import StableDiffusionZoomIn
 import gradio as gr
     with gr.Row():
         with gr.Column():
             with gr.Tab('Zoom In'):
+                StableDiffusionZoomIn.app()
             with gr.Tab('Zoom Out'):
                 stable_diffusion_zoom_out_app()

inpaint_zoom/app/zoom_in_app.py CHANGED Viewed

@@ -22,174 +22,187 @@ stable_paint_negative_prompt_list = [
         "lurry, bad art, blurred, text, watermark",
     ]
-def stable_diffusion_zoom_in(
-    model_id,
-    prompt,
-    negative_prompt,
-    guidance_scale,
-    num_inference_steps,
-    ):
-    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
-    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-    pipe = pipe.to("cuda")
-    pipe.safety_checker = dummy
-    pipe.enable_attention_slicing()
-    g_cuda = torch.Generator(device='cuda')
-    num_init_images = 2
-    seed = 9999
-    height = 512
-    width = height
-    current_image = Image.new(mode="RGBA", size=(height, width))
-    mask_image = np.array(current_image)[:,:,3]
-    mask_image = Image.fromarray(255-mask_image).convert("RGB")
-    current_image = current_image.convert("RGB")
-    init_images =  pipe(prompt=[prompt]*num_init_images,
-                        negative_prompt=[negative_prompt]*num_init_images,
-                        image=current_image,
-                        guidance_scale = guidance_scale,
-                        height = height,
-                        width = width,
-                        generator = g_cuda.manual_seed(seed),
-                        mask_image=mask_image,
-                        num_inference_steps=num_inference_steps)[0]
-    image_grid(init_images, rows=1, cols=num_init_images)
-    init_image_selected = 1 #@param
-    if num_init_images == 1:
-        init_image_selected = 0
-    else:
-        init_image_selected = init_image_selected - 1
-    num_outpainting_steps = 20 #@param
-    mask_width = 128 #@param
-    num_interpol_frames = 30 #@param
-    current_image = init_images[init_image_selected]
-    all_frames = []
-    all_frames.append(current_image)
-    for i in range(num_outpainting_steps):
-        print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
-        prev_image_fix = current_image
-        prev_image = shrink_and_paste_on_blank(current_image, mask_width)
-        current_image = prev_image
-        #create mask (black image with white mask_width width edges)
-        mask_image = np.array(current_image)[:,:,3]
-        mask_image = Image.fromarray(255-mask_image).convert("RGB")
-        #inpainting step
-        current_image = current_image.convert("RGB")
-        images = pipe(prompt=prompt,
-                        negative_prompt=negative_prompt,
-                        image=current_image,
-                        guidance_scale = guidance_scale,
-                        height = height,
-                        width = width,
-                        #this can make the whole thing deterministic but the output less exciting
-                        #generator = g_cuda.manual_seed(seed),
-                        mask_image=mask_image,
-                        num_inference_steps=num_inference_steps)[0]
-        current_image = images[0]
-        current_image.paste(prev_image, mask=prev_image)
-        #interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
-        for j in range(num_interpol_frames - 1):
-            interpol_image = current_image
-            interpol_width = round(
-                (1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
-                )
-            interpol_image = interpol_image.crop((interpol_width,
-                                                interpol_width,
-                                                width - interpol_width,
-                                                height - interpol_width))
-            interpol_image = interpol_image.resize((height, width))
-            #paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
-            interpol_width2 = round(
-                ( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
-                )
-            prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
-            interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
-            all_frames.append(interpol_image)
-        all_frames.append(current_image)
-    video_file_name = "infinite_zoom_out"
-    fps = 30
-    save_path = video_file_name + ".mp4"
-    write_video(save_path, all_frames, fps)
-    return save_path
-def stable_diffusion_zoom_in_app():
-    with gr.Blocks():
-        with gr.Row():
-            with gr.Column():
-                text2image_in_model_path = gr.Dropdown(
-                    choices=stable_paint_model_list,
-                    value=stable_paint_model_list[0],
-                    label='Text-Image Model Id'
-                )
-                text2image_in_prompt = gr.Textbox(
-                    lines=1,
-                    value=stable_paint_prompt_list[0],
-                    label='Prompt'
-                )
-                text2image_in_negative_prompt = gr.Textbox(
-                    lines=1,
-                    value=stable_paint_negative_prompt_list[0],
-                    label='Negative Prompt'
-                )
-                with gr.Accordion("Advanced Options", open=False):
-                    text2image_in_guidance_scale = gr.Slider(
-                        minimum=0.1,
-                        maximum=15,
-                        step=0.1,
-                        value=7.5,
-                        label='Guidance Scale'
                     )
-                    text2image_in_num_inference_step = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=50,
-                        label='Num Inference Step'
                     )
-                text2image_in_predict = gr.Button(value='Generator')
-            with gr.Column():
-                output_image = gr.Video(label='Output')
-        text2image_in_predict.click(
-            fn=stable_diffusion_zoom_in,
-            inputs=[
-                text2image_in_model_path,
-                text2image_in_prompt,
-                text2image_in_negative_prompt,
-                text2image_in_guidance_scale,
-                text2image_in_num_inference_step,
-            ],
-            outputs=output_image
-        )

         "lurry, bad art, blurred, text, watermark",
     ]
+class StableDiffusionZoomIn:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, model_id):
+        if self.pipe is None:
+            self.pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
+        self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
+        self.pipe = self.pipe.to("cuda")
+        self.pipe.safety_checker = dummy
+        self.pipe.enable_attention_slicing()
+        self.pipe.enable_xformers_memory_efficient_attention()
+        self.g_cuda = torch.Generator(device='cuda')
+        return self.pipe
+    def generate_video(
+        self,
+        model_id,
+        prompt,
+        negative_prompt,
+        guidance_scale,
+        num_inference_steps,
+        ):
+        pipe = self.load_model(model_id)
+        num_init_images = 2
+        seed = 9999
+        height = 512
+        width = height
+        current_image = Image.new(mode="RGBA", size=(height, width))
+        mask_image = np.array(current_image)[:,:,3]
+        mask_image = Image.fromarray(255-mask_image).convert("RGB")
+        current_image = current_image.convert("RGB")
+        init_images =  pipe(prompt=[prompt]*num_init_images,
+                            negative_prompt=[negative_prompt]*num_init_images,
+                            image=current_image,
+                            guidance_scale = guidance_scale,
+                            height = height,
+                            width = width,
+                            generator = self.g_cuda.manual_seed(seed),
+                            mask_image=mask_image,
+                            num_inference_steps=num_inference_steps)[0]
+        image_grid(init_images, rows=1, cols=num_init_images)
+        init_image_selected = 1 #@param
+        if num_init_images == 1:
+            init_image_selected = 0
+        else:
+            init_image_selected = init_image_selected - 1
+        num_outpainting_steps = 20 #@param
+        mask_width = 128 #@param
+        num_interpol_frames = 30 #@param
+        current_image = init_images[init_image_selected]
+        all_frames = []
+        all_frames.append(current_image)
+        for i in range(num_outpainting_steps):
+            print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
+            prev_image_fix = current_image
+            prev_image = shrink_and_paste_on_blank(current_image, mask_width)
+            current_image = prev_image
+            #create mask (black image with white mask_width width edges)
+            mask_image = np.array(current_image)[:,:,3]
+            mask_image = Image.fromarray(255-mask_image).convert("RGB")
+            #inpainting step
+            current_image = current_image.convert("RGB")
+            images = pipe(prompt=prompt,
+                            negative_prompt=negative_prompt,
+                            image=current_image,
+                            guidance_scale = guidance_scale,
+                            height = height,
+                            width = width,
+                            #this can make the whole thing deterministic but the output less exciting
+                            #generator = g_cuda.manual_seed(seed),
+                            mask_image=mask_image,
+                            num_inference_steps=num_inference_steps)[0]
+            current_image = images[0]
+            current_image.paste(prev_image, mask=prev_image)
+            #interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
+            for j in range(num_interpol_frames - 1):
+                interpol_image = current_image
+                interpol_width = round(
+                    (1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
+                    )
+                interpol_image = interpol_image.crop((interpol_width,
+                                                    interpol_width,
+                                                    width - interpol_width,
+                                                    height - interpol_width))
+                interpol_image = interpol_image.resize((height, width))
+                #paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
+                interpol_width2 = round(
+                    ( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
+                    )
+                prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
+                interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
+                all_frames.append(interpol_image)
+            all_frames.append(current_image)
+        video_file_name = "infinite_zoom_out"
+        fps = 30
+        save_path = video_file_name + ".mp4"
+        write_video(save_path, all_frames, fps)
+        return save_path
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    text2image_in_model_path = gr.Dropdown(
+                        choices=stable_paint_model_list,
+                        value=stable_paint_model_list[0],
+                        label='Text-Image Model Id'
                     )
+                    text2image_in_prompt = gr.Textbox(
+                        lines=1,
+                        value=stable_paint_prompt_list[0],
+                        label='Prompt'
                     )
+                    text2image_in_negative_prompt = gr.Textbox(
+                        lines=1,
+                        value=stable_paint_negative_prompt_list[0],
+                        label='Negative Prompt'
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            text2image_in_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label='Guidance Scale'
+                            )
+                            text2image_in_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label='Num Inference Step'
+                            )
+                    text2image_in_predict = gr.Button(value='Generator')
+                with gr.Column():
+                    output_image = gr.Video(label='Output')
+            text2image_in_predict.click(
+                fn=StableDiffusionZoomIn().generate_video,
+                inputs=[
+                    text2image_in_model_path,
+                    text2image_in_prompt,
+                    text2image_in_negative_prompt,
+                    text2image_in_guidance_scale,
+                    text2image_in_num_inference_step,
+                ],
+                outputs=output_image
+            )

inpaint_zoom/app/zoom_out_app.py CHANGED Viewed

@@ -79,7 +79,7 @@ def stable_diffusion_zoom_out_app():
                 )
                 text2image_out_prompt = gr.Textbox(
-                    lines=1,
                     value=stable_paint_prompt_list[0],
                     label='Prompt'
                 )
@@ -89,39 +89,41 @@ def stable_diffusion_zoom_out_app():
                     value=stable_paint_negative_prompt_list[0],
                     label='Negative Prompt'
                 )
-                with gr.Accordion("Advanced Options", open=False):
-                    text2image_out_guidance_scale = gr.Slider(
-                        minimum=0.1,
-                        maximum=15,
-                        step=0.1,
-                        value=7.5,
-                        label='Guidance Scale'
-                    )
-                    text2image_out_num_inference_step = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=50,
-                        label='Num Inference Step'
-                    )
-                    text2image_out_step_size = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=10,
-                        label='Step Size'
-                    )
-                    text2image_out_num_frames = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=10,
-                        label='Frames'
-                    )
                 text2image_out_predict = gr.Button(value='Generator')

                 )
                 text2image_out_prompt = gr.Textbox(
+                    lines=2,
                     value=stable_paint_prompt_list[0],
                     label='Prompt'
                 )
                     value=stable_paint_negative_prompt_list[0],
                     label='Negative Prompt'
                 )
+                with gr.Row():
+                    with gr.Column():
+                        text2image_out_guidance_scale = gr.Slider(
+                            minimum=0.1,
+                            maximum=15,
+                            step=0.1,
+                            value=7.5,
+                            label='Guidance Scale'
+                        )
+                        text2image_out_num_inference_step = gr.Slider(
+                            minimum=1,
+                            maximum=100,
+                            step=1,
+                            value=50,
+                            label='Num Inference Step'
+                        )
+                    with gr.Row():
+                        with gr.Column():
+                            text2image_out_step_size = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=10,
+                                label='Step Size'
+                            )
+                            text2image_out_num_frames = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=10,
+                                label='Frames'
+                            )
                 text2image_out_predict = gr.Button(value='Generator')

inpaint_zoom/zoom_out_app.py DELETED Viewed

@@ -1,154 +0,0 @@
-from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
-from inpaint_zoom.zoom_out_utils import preprocess_image, preprocess_mask_image, write_video, dummy
-from PIL import Image
-import gradio as gr
-import torch
-import os
-os.environ["CUDA_VISIBLE_DEVICES"]="0"
-stable_paint_model_list = [
-  "stabilityai/stable-diffusion-2-inpainting",
-  "runwayml/stable-diffusion-inpainting"
-]
-stable_paint_prompt_list = [
-        "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
-        "A beautiful landscape of a mountain range with a lake in the foreground",
-]
-stable_paint_negative_prompt_list = [
-        "lurry, bad art, blurred, text, watermark",
-    ]
-def stable_diffusion_zoom_out(
-  model_id,
-  original_prompt,
-  negative_prompt,
-  guidance_scale,
-  num_inference_steps,
-  step_size,
-  num_frames,
-  fps,
-    ):
-    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
-    pipe.set_use_memory_efficient_attention_xformers(True)
-    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-    pipe = pipe.to("cuda")
-    pipe.safety_checker = dummy
-    new_image = Image.new(mode="RGBA", size=(512,512))
-    current_image, mask_image = preprocess_mask_image(new_image)
-    current_image = pipe(
-      prompt=[original_prompt],
-      negative_prompt=[negative_prompt],
-      image=current_image,
-      mask_image=mask_image,
-      num_inference_steps=num_inference_steps,
-      guidance_scale=guidance_scale
-    ).images[0]
-    all_frames = []
-    all_frames.append(current_image)
-    for i in range(num_frames):
-        prev_image = preprocess_image(current_image, step_size, 512)
-        current_image = prev_image
-        current_image, mask_image = preprocess_mask_image(current_image)
-        current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
-        current_image.paste(prev_image, mask=prev_image)
-        all_frames.append(current_image)
-    save_path = "output.mp4"
-    write_video(save_path, all_frames, fps=fps)
-    return save_path
-def stable_diffusion_text2img_app():
-    with gr.Blocks():
-        with gr.Row():
-            with gr.Column():
-                text2image_out_model_path = gr.Dropdown(
-                    choices=stable_paint_model_list,
-                    value=stable_paint_model_list[0],
-                    label='Text-Image Model Id'
-                )
-                text2image_out_prompt = gr.Textbox(
-                    lines=1,
-                    value=stable_paint_prompt_list[0],
-                    label='Prompt'
-                )
-                text2image_out_negative_prompt = gr.Textbox(
-                    lines=1,
-                    value=stable_paint_negative_prompt_list[0],
-                    label='Negative Prompt'
-                )
-                with gr.Accordion("Advanced Options", open=False):
-                    text2image_out_guidance_scale = gr.Slider(
-                        minimum=0.1,
-                        maximum=15,
-                        step=0.1,
-                        value=7.5,
-                        label='Guidance Scale'
-                    )
-                    text2image_out_num_inference_step = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=50,
-                        label='Num Inference Step'
-                    )
-                    text2image_out_step_size = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=10,
-                        label='Step Size'
-                    )
-                    text2image_out_num_frames = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=10,
-                        label='Frames'
-                    )
-                    text2image_out_fps = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=30,
-                        label='FPS'
-                    )
-                text2image_out_predict = gr.Button(value='Generator')
-            with gr.Column():
-                output_image = gr.Video(label='Output')
-        text2image_out_predict.click(
-            fn=stable_diffusion_zoom_out,
-            inputs=[
-                text2image_out_model_path,
-                text2image_out_prompt,
-                text2image_out_negative_prompt,
-                text2image_out_guidance_scale,
-                text2image_out_num_inference_step,
-                text2image_out_step_size,
-                text2image_out_num_frames,
-                text2image_out_fps
-            ],
-            outputs=output_image
-        )

inpaint_zoom/zoom_out_utils.py DELETED Viewed

@@ -1,45 +0,0 @@
-import numpy as np
-import cv2
-from PIL import Image
-def write_video(file_path, frames, fps):
-    """
-    Writes frames to an mp4 video file
-    :param file_path: Path to output video, must end with .mp4
-    :param frames: List of PIL.Image objects
-    :param fps: Desired frame rate
-    """
-    w, h = frames[0].size
-    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
-    writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
-    for frame in frames:
-        np_frame = np.array(frame.convert('RGB'))
-        cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
-        writer.write(cv_frame)
-    writer.release()
-def dummy(images, **kwargs):
-    return images, False
-def preprocess_image(current_image, steps, image_size):
-    next_image = np.array(current_image.convert("RGBA"))*0
-    prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
-    prev_image = prev_image.convert("RGBA")
-    prev_image = np.array(prev_image)
-    next_image[:, :, 3] = 1
-    next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
-    prev_image = Image.fromarray(next_image)
-    return prev_image
-def preprocess_mask_image(current_image):
-    mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
-    mask_image = Image.fromarray(255-mask_image).convert("RGB")
-    current_image = current_image.convert("RGB")
-    return current_image, mask_image

utils.py DELETED Viewed

@@ -1,45 +0,0 @@
-import numpy as np
-import cv2
-from PIL import Image
-def write_video(file_path, frames, fps):
-    """
-    Writes frames to an mp4 video file
-    :param file_path: Path to output video, must end with .mp4
-    :param frames: List of PIL.Image objects
-    :param fps: Desired frame rate
-    """
-    w, h = frames[0].size
-    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
-    writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
-    for frame in frames:
-        np_frame = np.array(frame.convert('RGB'))
-        cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
-        writer.write(cv_frame)
-    writer.release()
-def dummy(images, **kwargs):
-    return images, False
-def preprocess_image(current_image, steps, image_size):
-    next_image = np.array(current_image.convert("RGBA"))*0
-    prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
-    prev_image = prev_image.convert("RGBA")
-    prev_image = np.array(prev_image)
-    next_image[:, :, 3] = 1
-    next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
-    prev_image = Image.fromarray(next_image)
-    return prev_image
-def preprocess_mask_image(current_image):
-    mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
-    mask_image = Image.fromarray(255-mask_image).convert("RGB")
-    current_image = current_image.convert("RGB")
-    return current_image, mask_image