Spaces:

diffusers
/

optimized-diffusers-code

Running

sayakpaul HF Staff commited on about 22 hours ago

Commit

d891944

verified ·

1 Parent(s): ef0e180

Sync from GitHub

Files changed (3) hide show

app.py CHANGED Viewed

@@ -66,8 +66,8 @@ with gr.Blocks() as demo:
                 value="black-forest-labs/FLUX.1-dev",
             )
             gemini_model_to_use = gr.Dropdown(
-                ["gemini-2.5-flash", "gemini-2.5-pro"],
-                value="gemini-2.5-flash",
                 label="Gemini Model",
                 info="Select the model to generate the analysis.",
             )
@@ -92,7 +92,7 @@ with gr.Blocks() as demo:
                 )
         with gr.Column(scale=1):
-            submit_btn = gr.Button("Estimate Memory ☁", variant="primary", scale=1)
     # --- Start of New Code Block ---
     all_inputs = [
@@ -164,9 +164,9 @@ with gr.Blocks() as demo:
         prompt_output = gr.Textbox(label="Prompt", show_copy_button=True, lines=10, interactive=False)
     gr.Markdown("---")
-    gr.Markdown("### Generated Code")
-    output_markdown = gr.Markdown(label="LLM Output", value="*Your results will appear here...*")
     gr.Markdown(
         """
@@ -178,7 +178,7 @@ with gr.Blocks() as demo:
     )
     # --- Event Handling ---
-    submit_btn.click(fn=get_output_code, inputs=all_inputs, outputs=[output_markdown, prompt_output])
 if __name__ == "__main__":

                 value="black-forest-labs/FLUX.1-dev",
             )
             gemini_model_to_use = gr.Dropdown(
+                ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.5-pro"],
+                value="gemini-2.5-flash-lite",
                 label="Gemini Model",
                 info="Select the model to generate the analysis.",
             )
                 )
         with gr.Column(scale=1):
+            submit_btn = gr.Button("Get Code ☁", variant="primary", scale=1)
     # --- Start of New Code Block ---
     all_inputs = [
         prompt_output = gr.Textbox(label="Prompt", show_copy_button=True, lines=10, interactive=False)
     gr.Markdown("---")
+    with gr.Accordion("Generated Code (expand)", open=False):
+        code_output = gr.Code(interactive=True, language="python")
     gr.Markdown(
         """
     )
     # --- Event Handling ---
+    submit_btn.click(fn=get_output_code, inputs=all_inputs, outputs=[code_output, prompt_output])
 if __name__ == "__main__":

prompts.py CHANGED Viewed

@@ -171,6 +171,11 @@ that this is different from using FP8. In FP8, we use quantization like shown ab
 * Add a comment before the `pipe` call, saying "Modify the pipe call arguments as needed."
 * Do NOT add any serialization step after the pipe call.
 Please think about these guidelines carefully before producing the outputs.
 """

 * Add a comment before the `pipe` call, saying "Modify the pipe call arguments as needed."
 * Do NOT add any serialization step after the pipe call.
+## Specific guidelines on output format
+* When returning the outputs, your thinking/reasoning traces should be within comments.
+* You don't have to put the actual code snippet within a ```python ...``` block.
 Please think about these guidelines carefully before producing the outputs.
 """

utils/pipeline_utils.py CHANGED Viewed

@@ -19,6 +19,7 @@ DTYPE_MAP = {"F32": torch.float32, "F16": torch.float16, "BF16": torch.bfloat16}
 def _parse_single_file(url):
     print(f"{url=}")
     token = os.getenv("HF_TOKEN")
     headers = {"Range": "bytes=0-7", "Authorization": f"Bearer {token}"}
     response = requests.get(url, headers=headers)
     length_of_header = struct.unpack("<Q", response.content)[0]

 def _parse_single_file(url):
     print(f"{url=}")
     token = os.getenv("HF_TOKEN")
+    assert token, "HF_TOKEN must be set"
     headers = {"Range": "bytes=0-7", "Authorization": f"Bearer {token}"}
     response = requests.get(url, headers=headers)
     length_of_header = struct.unpack("<Q", response.content)[0]