Spaces:

multimodalart
/

self-forcing

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 10 days ago

Commit

8732b40

verified ·

1 Parent(s): 6daf741

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -0

app.py CHANGED Viewed

@@ -40,6 +40,27 @@ from demo_utils.constant import ZERO_VAE_CACHE
 from demo_utils.vae_block3 import VAEDecoderWrapper
 from utils.wan_wrapper import WanDiffusionWrapper, WanTextEncoder
 # --- Argument Parsing ---
 parser = argparse.ArgumentParser(description="Gradio Demo for Self-Forcing with Frame Streaming")
 parser.add_argument('--port', type=int, default=7860, help="Port to run the Gradio app on.")
@@ -458,6 +479,7 @@ with gr.Blocks(title="Self-Forcing Frame Streaming Demo") as demo:
                 placeholder="A stylish woman walks down a Tokyo street...",
                 lines=4,
             )
             gr.Examples(
                 examples=[
                     "A close-up shot of a ceramic teacup slowly pouring water into a glass mug. The water flows smoothly from the spout of the teacup into the mug, creating gentle ripples as it fills up. Both cups have detailed textures, with the teacup having a matte finish and the glass mug showcasing clear transparency. The background is a blurred kitchen countertop, adding context without distracting from the central action. The pouring motion is fluid and natural, emphasizing the interaction between the two cups.",
@@ -500,6 +522,11 @@ with gr.Blocks(title="Self-Forcing Frame Streaming Demo") as demo:
         inputs=[prompt, seed, fps],
         outputs=[frame_display, final_video, status_html]
     )
 # --- Launch App ---
 if __name__ == "__main__":

 from demo_utils.vae_block3 import VAEDecoderWrapper
 from utils.wan_wrapper import WanDiffusionWrapper, WanTextEncoder
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_checkpoint = "gokaygokay/Flux-Prompt-Enhance"
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
+enhancer = pipeline('text2text-generation',
+                    model=model,
+                    tokenizer=tokenizer,
+                    repetition_penalty= 1.2)
+enhancer.to(device)
+max_target_length = 256
+@spaces.GPU
+def enhance_prompt(prompt):
+    prefix = "enhance prompt: "
+    short_prompt = prompt
+    answer = enhancer(prefix + short_prompt, max_length=max_target_length)
+    final_answer = answer[0]['generated_text']
+    return final_answer
 # --- Argument Parsing ---
 parser = argparse.ArgumentParser(description="Gradio Demo for Self-Forcing with Frame Streaming")
 parser.add_argument('--port', type=int, default=7860, help="Port to run the Gradio app on.")
                 placeholder="A stylish woman walks down a Tokyo street...",
                 lines=4,
             )
+            enhance_button = gr.Button("Enhance prompt")
             gr.Examples(
                 examples=[
                     "A close-up shot of a ceramic teacup slowly pouring water into a glass mug. The water flows smoothly from the spout of the teacup into the mug, creating gentle ripples as it fills up. Both cups have detailed textures, with the teacup having a matte finish and the glass mug showcasing clear transparency. The background is a blurred kitchen countertop, adding context without distracting from the central action. The pouring motion is fluid and natural, emphasizing the interaction between the two cups.",
         inputs=[prompt, seed, fps],
         outputs=[frame_display, final_video, status_html]
     )
+    enhance_button.click(
+        fn=enhance_prompt,
+        inputs=[prompt],
+        outputs=[prompt]
+    )
 # --- Launch App ---
 if __name__ == "__main__":