Spaces:

rizavelioglu
/

vae-comparison

Running on Zero

App Files Files Community

rizavelioglu commited on May 7

Commit

a766367

verified ·

1 Parent(s): b9da44c

add new model & timer

Browse files

Files changed (1) hide show

app.py +22 -15

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torchvision.transforms.v2 as transforms
 from torchvision.io import read_image
 from typing import Dict
 import os
 from huggingface_hub import login
@@ -46,7 +47,7 @@ class VAETester:
         endpoints = {
             "sd-vae-ft-mse": "https://q1bj3bpq6kzilnsu.us-east-1.aws.endpoints.huggingface.cloud",
             "sdxl-vae": "https://x2dmsqunjd6k9prw.us-east-1.aws.endpoints.huggingface.cloud",
-            "FLUX.1-schnell": "https://whhx50ex1aryqvw6.us-east-1.aws.endpoints.huggingface.cloud",
         }
         return endpoints[base_name]
@@ -57,8 +58,8 @@ class VAETester:
             "sd-vae-ft-mse": AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").to(self.device),
             "sdxl-vae": AutoencoderKL.from_pretrained("stabilityai/sdxl-vae").to(self.device),
             "stable-diffusion-3-medium": AutoencoderKL.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", subfolder="vae").to(self.device),
-            "FLUX.1-schnell": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-schnell", subfolder="vae").to(self.device),
-            "FLUX.1-dev": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae").to(self.device),
         }
         # Define the desired order of models
         order = [
@@ -68,9 +69,9 @@ class VAETester:
             "sdxl-vae",
             #"sdxl-vae (remote)",
             "stable-diffusion-3-medium",
-            "FLUX.1-schnell",
-            #"FLUX.1-schnell (remote)",
-            "FLUX.1-dev",
         ]
         # Construct the vae_models dictionary in the specified order
@@ -95,6 +96,9 @@ class VAETester:
         img_transformed = self.input_transform(img).to(self.device).unsqueeze(0)
         original_base = self.base_transform(img).cpu()
         if model_config["type"] == "local":
             vae = model_config["vae"]
             with torch.no_grad():
@@ -112,6 +116,10 @@ class VAETester:
                 return_type="pt",
                 partial_postprocess=False,
             )
         decoded_transformed = self.output_transform(decoded.squeeze(0)).cpu()
         reconstructed = decoded_transformed.clip(0, 1)
         diff = (original_base - reconstructed).abs()
@@ -119,14 +127,13 @@ class VAETester:
         diff_image = transforms.ToPILImage()(bw_diff)
         recon_image = transforms.ToPILImage()(reconstructed)
         diff_score = bw_diff.sum().item()
-        return diff_image, recon_image, diff_score
     def process_all_models(self, img: torch.Tensor, tolerance: float):
         """Process image through all configured VAEs"""
         results = {}
         for name, model_config in self.vae_models.items():
-            diff_img, recon_img, score = self.process_image(img, model_config, tolerance)
-            results[name] = (diff_img, recon_img, score)
         return results
 @spaces.GPU(duration=15)
@@ -142,10 +149,10 @@ def test_all_vaes(image_path: str, tolerance: float, img_size: int):
         scores = []
         for name in tester.vae_models.keys():
-            diff_img, recon_img, score = results[name]
             diff_images.append((diff_img, name))
             recon_images.append((recon_img, name))
-            scores.append(f"{name:<25}: {score:,.0f}")
         return diff_images, recon_images, "\n".join(scores)
     except Exception as e:
@@ -157,13 +164,13 @@ examples = [f"examples/{img_filename}" for img_filename in sorted(os.listdir("ex
 with gr.Blocks(title="VAE Performance Tester", css=".monospace-text {font-family: 'Courier New', Courier, monospace;}") as demo:
     gr.Markdown("# VAE Comparison Tool")
     gr.Markdown("""
-        Upload an image or select an example to compare how different VAEs reconstruct it. Now includes remote VAEs via Hugging Face's remote decoding feature!
         1. The image is padded to a square and resized to the selected size (512 or 1024 pixels).
-        2. Each VAE (local or remote) encodes the image into a latent space and decodes it back.
         3. Outputs include:
            - **Difference Maps**: Where reconstruction differs from the original (white = difference > tolerance).
            - **Reconstructed Images**: Outputs from each VAE.
-           - **Sum of Differences**: Total pixels exceeding tolerance (lower is better).
         Adjust tolerance to change sensitivity.
     """)
@@ -185,7 +192,7 @@ with gr.Blocks(title="VAE Performance Tester", css=".monospace-text {font-family
             with gr.Row():
                 diff_gallery = gr.Gallery(label="Difference Maps", columns=4, height=512)
                 recon_gallery = gr.Gallery(label="Reconstructed Images", columns=4, height=512)
-            scores_output = gr.Textbox(label="Sum of differences (lower is better)", lines=9, elem_classes="monospace-text")
     if examples:
         with gr.Row():

 from torchvision.io import read_image
 from typing import Dict
 import os
+import time
 from huggingface_hub import login
         endpoints = {
             "sd-vae-ft-mse": "https://q1bj3bpq6kzilnsu.us-east-1.aws.endpoints.huggingface.cloud",
             "sdxl-vae": "https://x2dmsqunjd6k9prw.us-east-1.aws.endpoints.huggingface.cloud",
+            "FLUX.1": "https://whhx50ex1aryqvw6.us-east-1.aws.endpoints.huggingface.cloud",
         }
         return endpoints[base_name]
             "sd-vae-ft-mse": AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").to(self.device),
             "sdxl-vae": AutoencoderKL.from_pretrained("stabilityai/sdxl-vae").to(self.device),
             "stable-diffusion-3-medium": AutoencoderKL.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", subfolder="vae").to(self.device),
+            "FLUX.1": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae").to(self.device),
+            "CogView4-6B": AutoencoderKL.from_pretrained("THUDM/CogView4-6B", subfolder="vae").to(self.device),
         }
         # Define the desired order of models
         order = [
             "sdxl-vae",
             #"sdxl-vae (remote)",
             "stable-diffusion-3-medium",
+            "FLUX.1",
+            #"FLUX.1 (remote)",
+            "CogView4-6B",
         ]
         # Construct the vae_models dictionary in the specified order
         img_transformed = self.input_transform(img).to(self.device).unsqueeze(0)
         original_base = self.base_transform(img).cpu()
+        # Start timer
+        start_time = time.time()
         if model_config["type"] == "local":
             vae = model_config["vae"]
             with torch.no_grad():
                 return_type="pt",
                 partial_postprocess=False,
             )
+        # End timer
+        processing_time = time.time() - start_time
         decoded_transformed = self.output_transform(decoded.squeeze(0)).cpu()
         reconstructed = decoded_transformed.clip(0, 1)
         diff = (original_base - reconstructed).abs()
         diff_image = transforms.ToPILImage()(bw_diff)
         recon_image = transforms.ToPILImage()(reconstructed)
         diff_score = bw_diff.sum().item()
+        return diff_image, recon_image, diff_score, processing_time
     def process_all_models(self, img: torch.Tensor, tolerance: float):
         """Process image through all configured VAEs"""
         results = {}
         for name, model_config in self.vae_models.items():
+            results[name] = self.process_image(img, model_config, tolerance)
         return results
 @spaces.GPU(duration=15)
         scores = []
         for name in tester.vae_models.keys():
+            diff_img, recon_img, score, proc_time = results[name]
             diff_images.append((diff_img, name))
             recon_images.append((recon_img, name))
+            scores.append(f"{name:<25}: {score:7,.0f} | {proc_time:.4f}s")
         return diff_images, recon_images, "\n".join(scores)
     except Exception as e:
 with gr.Blocks(title="VAE Performance Tester", css=".monospace-text {font-family: 'Courier New', Courier, monospace;}") as demo:
     gr.Markdown("# VAE Comparison Tool")
     gr.Markdown("""
+        Upload an image or select an example to compare how different VAEs reconstruct it.
         1. The image is padded to a square and resized to the selected size (512 or 1024 pixels).
+        2. Each VAE encodes the image into a latent space and decodes it back.
         3. Outputs include:
            - **Difference Maps**: Where reconstruction differs from the original (white = difference > tolerance).
            - **Reconstructed Images**: Outputs from each VAE.
+           - **Sum of Differences and Time**: Total pixels exceeding tolerance (lower is better) and processing time in seconds.
         Adjust tolerance to change sensitivity.
     """)
             with gr.Row():
                 diff_gallery = gr.Gallery(label="Difference Maps", columns=4, height=512)
                 recon_gallery = gr.Gallery(label="Reconstructed Images", columns=4, height=512)
+            scores_output = gr.Textbox(label="Sum of differences (lower is better) | Processing time (lower is faster)", lines=9, elem_classes="monospace-text")
     if examples:
         with gr.Row():