Spaces:

PrunaAI
/

InferBench

Running

App Files Files Community

davidberenstein1957 commited on 10 days ago

Commit

f7399f1

1 Parent(s): b42cb0b

feat: implement text-to-image leaderboard in app.py, updating data source and enhancing user interface with new tab and detailed performance metrics for various providers

Browse files

Files changed (2) hide show

app.py +32 -14
results.jsonl → data/text_to_image.jsonl +49 -8

app.py CHANGED Viewed

@@ -10,23 +10,28 @@ from assets import custom_css
 # override method to avoid bugg
 Leaderboard.raise_error_if_incorrect_config = lambda self: None
-abs_path = Path(__file__).parent
 # Load the JSONL file into a pandas DataFrame using the json library
-with open(abs_path / "results.jsonl", "r") as file:
     json_data = file.read()
     partially_fixed_json_data = json_data.replace("}\n{", "},\n{")
     fixed_json_data = f"[{partially_fixed_json_data}]"
     json_data = json.loads(fixed_json_data)
 df = pd.DataFrame(json_data)
-df["Model"] = df.apply(
-    lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{row["Model"]}</a>',
     axis=1,
 )
 df = df[
-    ["Model", "Median Inference Time", "Price per Image"]
-    + [col for col in df.columns.tolist() if col not in ["URL", "Model", "Median Inference Time", "Price per Image"]]
 ]
 df = df.sort_values(by="GenEval", ascending=False)
@@ -41,27 +46,40 @@ with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as d
             """
     )
     with gr.Tabs():
-        with gr.TabItem("FLUX.1 [dev] Leaderboard"):
             Leaderboard(
                 value=df,
                 select_columns=df.columns.tolist(),
-                datatype=["markdown"] + ["number"] * (len(df.columns.tolist()) - 1),
             )
         with gr.TabItem("About"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown(
                         """
-                        # 📊 InferBench
-                        We ran a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by:
                         - Replicate: https://replicate.com/black-forest-labs/flux-dev
                         - Fal: https://fal.ai/models/fal-ai/flux/dev
                         - Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8
                         - Together AI: https://www.together.ai/models/flux-1-dev
-                        All of these inference providers offer FLUX.1 [dev] implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measure.
                         For comparison purposes we used the same generation set-up for all the providers.
@@ -72,15 +90,15 @@ with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as d
                         Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too!
-                        > We published a full blog post on the [InferBench and FLUX-juiced](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint).
                         """
                     )
                 with gr.Column():
                     gr.Markdown(
                         """
-                        # 🧃 FLUX-juiced
-                        FLUX-juiced is our optimized version of FLUX.1, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**.
                         Under the hood, it uses a custom combination of:

 # override method to avoid bugg
 Leaderboard.raise_error_if_incorrect_config = lambda self: None
+abs_path = Path(__file__).parent / "data"
 # Load the JSONL file into a pandas DataFrame using the json library
+with open(abs_path / "text_to_image.jsonl", "r") as file:
     json_data = file.read()
     partially_fixed_json_data = json_data.replace("}\n{", "},\n{")
     fixed_json_data = f"[{partially_fixed_json_data}]"
     json_data = json.loads(fixed_json_data)
 df = pd.DataFrame(json_data)
+df["URL"] = df.apply(
+    lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>',
     axis=1,
 )
 df = df[
+    ["URL", "Provider", "Device", "Model", "Optimization", "Median Inference Time", "Price per Image"]
+    + [
+        col
+        for col in df.columns.tolist()
+        if col
+        not in ["URL", "Model", "Median Inference Time", "Price per Image", "Provider", "Device", "Optimization"]
+    ]
 ]
 df = df.sort_values(by="GenEval", ascending=False)
             """
     )
     with gr.Tabs():
+        with gr.TabItem("Text-to-Image Leaderboard"):
             Leaderboard(
                 value=df,
                 select_columns=df.columns.tolist(),
+                datatype=["markdown", "markdown", "markdown", "markdown", "markdown"]
+                + ["number"] * (len(df.columns.tolist()) - 5),
+                filter_columns=[
+                    "Provider",
+                    "Device",
+                    "Model",
+                    "Optimization",
+                ],
             )
         with gr.TabItem("About"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown(
                         """
+                        # 📊 Text-to-Image Leaderboard
+                        This leaderboard compares the performance of different text-to-image providers.
+                        We started with a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by:
                         - Replicate: https://replicate.com/black-forest-labs/flux-dev
                         - Fal: https://fal.ai/models/fal-ai/flux/dev
                         - Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8
                         - Together AI: https://www.together.ai/models/flux-1-dev
+                        We also included the following non-FLUX providers:
+                        - AWS Nova Canvas: https://aws.amazon.com/ai/generative-ai/nova/creative/
+                        All of these inference providers offer implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measures.
                         For comparison purposes we used the same generation set-up for all the providers.
                         Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too!
+                        > We published a full blog post on [the creation of our FLUX-juiced endpoint](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint).
                         """
                     )
                 with gr.Column():
                     gr.Markdown(
                         """
+                        # 🧃 FLUX.1-dev (juiced)
+                        FLUX.1-dev (juiced) is our optimized version of FLUX.1-dev, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**.
                         Under the hood, it uses a custom combination of:

results.jsonl → data/text_to_image.jsonl RENAMED Viewed

@@ -1,5 +1,8 @@
 {
-    "Model": "Baseline [Nvidia H100]",
     "URL": "https://huggingface.co/black-forest-labs/FLUX.1-dev?library=diffusers",
     "GenEval": 67.98,
     "HPS (v2.1)": 30.36,
@@ -13,7 +16,10 @@
     "Price per Image": 0.025
 }
 {
-    "Model": "fal",
     "URL": "https://fal.ai/models/fal-ai/flux/dev",
     "GenEval": 68.72,
     "HPS (v2.1)": 29.97,
@@ -27,7 +33,10 @@
     "Price per Image": 0.025
 }
 {
-    "Model": "fireworks [fp8]",
     "URL": "https://fireworks.ai/models/fireworks/flux-1-dev-fp8",
     "GenEval": 65.55,
     "HPS (v2.1)": 30.26,
@@ -41,7 +50,10 @@
     "Price per Image": 0.014
 }
 {
-    "Model": "Pruna [extra juiced]",
     "URL": "https://replicate.com/prunaai/flux.1-juiced",
     "GenEval": 69.9,
     "HPS (v2.1)": 29.86,
@@ -55,7 +67,10 @@
     "Price per Image": 0.004
 }
 {
-    "Model": "Pruna [juiced]",
     "URL": "https://replicate.com/prunaai/flux.1-juiced",
     "GenEval": 68.64,
     "HPS (v2.1)": 30.38,
@@ -69,7 +84,10 @@
     "Price per Image": 0.0048
 }
 {
-    "Model": "Pruna [lightly juiced]",
     "URL": "https://replicate.com/prunaai/flux.1-lightly-juiced",
     "GenEval": 69.12,
     "HPS (v2.1)": 30.36,
@@ -83,7 +101,10 @@
     "Price per Image": 0.0054
 }
 {
-    "Model": "Replicate [go_fast]",
     "URL": "https://replicate.com/black-forest-labs/flux-dev",
     "GenEval": 67.41,
     "HPS (v2.1)": 29.25,
@@ -97,7 +118,10 @@
     "Price per Image": 0.025
 }
 {
-    "Model": "Together AI",
     "URL": "https://www.together.ai/models/flux-1-dev",
     "GenEval": 64.61,
     "HPS (v2.1)": 30.22,
@@ -110,3 +134,20 @@
     "Median Inference Time": 3.38,
     "Price per Image": 0.025
 }

 {
+    "Provider": "Black Forest Labs",
+    "Device": "H100",
+    "Model": "FLUX.1-dev",
+    "Optimization": "none",
     "URL": "https://huggingface.co/black-forest-labs/FLUX.1-dev?library=diffusers",
     "GenEval": 67.98,
     "HPS (v2.1)": 30.36,
     "Price per Image": 0.025
 }
 {
+    "Provider": "fal.ai",
+    "Device": "Serverless",
+    "Model": "FLUX.1-dev",
+    "Optimization": "custom",
     "URL": "https://fal.ai/models/fal-ai/flux/dev",
     "GenEval": 68.72,
     "HPS (v2.1)": 29.97,
     "Price per Image": 0.025
 }
 {
+    "Provider": "Fireworks AI",
+    "Device": "Serverless",
+    "Model": "FLUX.1-dev",
+    "Optimization": "fp8",
     "URL": "https://fireworks.ai/models/fireworks/flux-1-dev-fp8",
     "GenEval": 65.55,
     "HPS (v2.1)": 30.26,
     "Price per Image": 0.014
 }
 {
+    "Provider": "Pruna AI",
+    "Device": "H100",
+    "Model": "FLUX.1-dev",
+    "Optimization": "extra juiced",
     "URL": "https://replicate.com/prunaai/flux.1-juiced",
     "GenEval": 69.9,
     "HPS (v2.1)": 29.86,
     "Price per Image": 0.004
 }
 {
+    "Provider": "Pruna AI",
+    "Device": "H100",
+    "Model": "FLUX.1-dev",
+    "Optimization": "juiced",
     "URL": "https://replicate.com/prunaai/flux.1-juiced",
     "GenEval": 68.64,
     "HPS (v2.1)": 30.38,
     "Price per Image": 0.0048
 }
 {
+    "Provider": "Pruna AI",
+    "Device": "H100",
+    "Model": "FLUX.1-dev",
+    "Optimization": "lightly juiced",
     "URL": "https://replicate.com/prunaai/flux.1-lightly-juiced",
     "GenEval": 69.12,
     "HPS (v2.1)": 30.36,
     "Price per Image": 0.0054
 }
 {
+    "Provider": "Black Forest Labs",
+    "Device": "H100",
+    "Model": "FLUX.1-dev",
+    "Optimization": "go_fast",
     "URL": "https://replicate.com/black-forest-labs/flux-dev",
     "GenEval": 67.41,
     "HPS (v2.1)": 29.25,
     "Price per Image": 0.025
 }
 {
+    "Provider": "Together AI",
+    "Device": "Serverless",
+    "Model": "FLUX.1-dev",
+    "Optimization": "unsure",
     "URL": "https://www.together.ai/models/flux-1-dev",
     "GenEval": 64.61,
     "HPS (v2.1)": 30.22,
     "Median Inference Time": 3.38,
     "Price per Image": 0.025
 }
+{
+    "Provider": "AWS",
+    "Device": "Serverless",
+    "Model": "AWS Nova Canvas",
+    "Optimization": "unsure",
+    "URL": "https://aws.amazon.com/ai/generative-ai/nova/creative/",
+    "GenEval": null,
+    "HPS (v2.1)": null,
+    "GenAI-Bench (VQA)": null,
+    "DrawBench (Image Reward)": 1.07,
+    "PartiPromts (ARNIQA)": 0.65,
+    "PartiPromts (ClipIQA)": 0.954,
+    "PartiPromts (ClipScore)": 28.1,
+    "PartiPromts (Sharpness - Laplacian Variance)": 10514,
+    "Median Inference Time": 3.65,
+    "Price per Image": null
+}