Spaces:

Shivdutta
/

S24-StableDiffusion

Sleeping

App Files Files Community

Shivdutta commited on Aug 3, 2024

Commit

6798689

verified ·

1 Parent(s): 4e5be15

Upload app.py

Browse files

Files changed (1) hide show

app.py +13 -75

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from base64 import b64encode
-import torch
 import numpy
 import torch
 from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
 from huggingface_hub import notebook_login
 # For video display:
 from matplotlib import pyplot as plt
@@ -16,7 +16,7 @@ from transformers import CLIPTextModel, CLIPTokenizer, logging
 import os
 import numpy as np
-torch.manual_seed(24041975)
 # Supress some unnecessary warnings when loading the CLIPTextModel
 logging.set_verbosity_error()
@@ -145,7 +145,7 @@ def generate_with_embs(text_embeddings, text_input, seed):
     return latents_to_pil(latents)[0]
-def generate_with_prompt_style(prompt, style, seed = 42):
     prompt = prompt + ' in style of s'
     embed = torch.load(style)
@@ -175,72 +175,7 @@ def contrast_loss(images):
     variance = torch.var(images)
     return -variance
-def blue_loss_variant(images, use_mean=False, alpha=1.0):
-    """
-    Computes the blue loss for a batch of images with an optional mean component.
-    The blue loss is defined as the negative variance of the blue channel's pixel values.
-    Optionally, it can also include the mean value of the blue channel.
-    Parameters:
-    images (torch.Tensor): A batch of images. Expected shape is (N, C, H, W) where
-                           N is the batch size, C is the number of channels (3 for RGB),
-                           H is the height, and W is the width.
-    use_mean (bool): If True, includes the mean of the blue channel in the loss calculation.
-    alpha (float): Weighting factor for the mean component when use_mean is True.
-    Returns:
-    torch.Tensor: The blue loss, which is the negative variance of the blue channel's pixel values,
-                  optionally combined with the mean value of the blue channel.
-    """
-    # Ensure the input tensor has the correct shape
-    if images.shape[1] != 3:
-        raise ValueError("Expected images with 3 channels (RGB), but got shape {}".format(images.shape))
-    # Extract the blue channel (assuming the channels are in RGB order)
-    blue_channel = images[:, 2, :, :]
-    # Calculate the variance of the blue channel
-    variance = torch.var(blue_channel)
-    if use_mean:
-        # Calculate the mean of the blue channel
-        mean = torch.mean(blue_channel)
-        # Combine variance and mean into the loss
-        loss = -variance + alpha * mean
-    else:
-        loss = -variance
-    return loss
-def blue_loss(images):
-    """
-    Computes the blue loss for a batch of images.
-    The blue loss is defined as the negative variance of the blue channel's pixel values.
-    Parameters:
-    images (torch.Tensor): A batch of images. Expected shape is (N, C, H, W) where
-                           N is the batch size, C is the number of channels (3 for RGB),
-                           H is the height, and W is the width.
-    Returns:
-    torch.Tensor: The blue loss, which is the negative variance of the blue channel's pixel values.
-    """
-    # Ensure the input tensor has the correct shape
-    if images.shape[1] != 3:
-        raise ValueError("Expected images with 3 channels (RGB), but got shape {}".format(images.shape))
-    # Extract the blue channel (assuming the channels are in RGB order)
-    blue_channel = images[:, 2, :, :]
-    # Calculate the variance of the blue channel
-    variance = torch.var(blue_channel)
-    return -variance
-def generate_with_prompt_style_guidance(prompt, style, seed=42):
     prompt = prompt + ' in style of s'
@@ -325,7 +260,7 @@ def generate_with_prompt_style_guidance(prompt, style, seed=42):
             denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
             # Calculate loss
-            loss = blue_loss_variant(denoised_images) * contrast_loss_scale
             # # Occasionally print it out
             # if i%10==0:
@@ -344,7 +279,7 @@ def generate_with_prompt_style_guidance(prompt, style, seed=42):
     return latents_to_pil(latents)[0]
-import gradio as gr
 dict_styles = {
     'Dr Strange': 'styles/learned_embeds_dr_strange.bin',
@@ -354,11 +289,12 @@ dict_styles = {
 }
 # dict_styles.keys()
-def inference(prompt, style):
-    if prompt is not None and style is not None:
         style = dict_styles[style]
-        result = generate_with_prompt_style_guidance(prompt, style)
         return np.array(result)
     else:
         return None
@@ -369,6 +305,7 @@ examples = [['A man sipping wine wearing a spacesuit on the moon', 'Stripes']]
 demo = gr.Interface(inference,
                     inputs = [gr.Textbox(label='Prompt'),
                               gr.Dropdown(['Dr Strange', 'GTA-5',
                                            'Manga', 'Pokemon'], label='Style')
                               ],
@@ -377,7 +314,8 @@ demo = gr.Interface(inference,
                               ],
                     title = title,
                     description = description,
-                    # examples = examples,
                     # cache_examples=True
                     )
 demo.launch()

 from base64 import b64encode
 import numpy
 import torch
 from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
 from huggingface_hub import notebook_login
+import gradio as gr
 # For video display:
 from matplotlib import pyplot as plt
 import os
 import numpy as np
 # Supress some unnecessary warnings when loading the CLIPTextModel
 logging.set_verbosity_error()
     return latents_to_pil(latents)[0]
+def generate_with_prompt_style(prompt, style, seed):
     prompt = prompt + ' in style of s'
     embed = torch.load(style)
     variance = torch.var(images)
     return -variance
+def generate_with_prompt_style_guidance(prompt, style, seed):
     prompt = prompt + ' in style of s'
             denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
             # Calculate loss
+            loss = contrast_loss(denoised_images) * contrast_loss_scale
             # # Occasionally print it out
             # if i%10==0:
     return latents_to_pil(latents)[0]
 dict_styles = {
     'Dr Strange': 'styles/learned_embeds_dr_strange.bin',
 }
 # dict_styles.keys()
+def inference(prompt, seed,style):
+    if prompt is not None and style is not None and seed is not None:
         style = dict_styles[style]
+        torch.manual_seed(seed)
+        result = generate_with_prompt_style_guidance(prompt, style,seed)
         return np.array(result)
     else:
         return None
 demo = gr.Interface(inference,
                     inputs = [gr.Textbox(label='Prompt'),
+                              gr.Textbox(label='Seed', value='24041975'),
                               gr.Dropdown(['Dr Strange', 'GTA-5',
                                            'Manga', 'Pokemon'], label='Style')
                               ],
                               ],
                     title = title,
                     description = description,
+                    examples = examples,
                     # cache_examples=True
                     )
 demo.launch()