multimodalart HF Staff commited on
Commit
8732b40
·
verified ·
1 Parent(s): 6daf741

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py CHANGED
@@ -40,6 +40,27 @@ from demo_utils.constant import ZERO_VAE_CACHE
40
  from demo_utils.vae_block3 import VAEDecoderWrapper
41
  from utils.wan_wrapper import WanDiffusionWrapper, WanTextEncoder
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # --- Argument Parsing ---
44
  parser = argparse.ArgumentParser(description="Gradio Demo for Self-Forcing with Frame Streaming")
45
  parser.add_argument('--port', type=int, default=7860, help="Port to run the Gradio app on.")
@@ -458,6 +479,7 @@ with gr.Blocks(title="Self-Forcing Frame Streaming Demo") as demo:
458
  placeholder="A stylish woman walks down a Tokyo street...",
459
  lines=4,
460
  )
 
461
  gr.Examples(
462
  examples=[
463
  "A close-up shot of a ceramic teacup slowly pouring water into a glass mug. The water flows smoothly from the spout of the teacup into the mug, creating gentle ripples as it fills up. Both cups have detailed textures, with the teacup having a matte finish and the glass mug showcasing clear transparency. The background is a blurred kitchen countertop, adding context without distracting from the central action. The pouring motion is fluid and natural, emphasizing the interaction between the two cups.",
@@ -500,6 +522,11 @@ with gr.Blocks(title="Self-Forcing Frame Streaming Demo") as demo:
500
  inputs=[prompt, seed, fps],
501
  outputs=[frame_display, final_video, status_html]
502
  )
 
 
 
 
 
503
 
504
  # --- Launch App ---
505
  if __name__ == "__main__":
 
40
  from demo_utils.vae_block3 import VAEDecoderWrapper
41
  from utils.wan_wrapper import WanDiffusionWrapper, WanTextEncoder
42
 
43
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
44
+
45
+ device = "cuda" if torch.cuda.is_available() else "cpu"
46
+ model_checkpoint = "gokaygokay/Flux-Prompt-Enhance"
47
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
48
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
49
+ enhancer = pipeline('text2text-generation',
50
+ model=model,
51
+ tokenizer=tokenizer,
52
+ repetition_penalty= 1.2)
53
+ enhancer.to(device)
54
+ max_target_length = 256
55
+
56
+ @spaces.GPU
57
+ def enhance_prompt(prompt):
58
+ prefix = "enhance prompt: "
59
+ short_prompt = prompt
60
+ answer = enhancer(prefix + short_prompt, max_length=max_target_length)
61
+ final_answer = answer[0]['generated_text']
62
+ return final_answer
63
+
64
  # --- Argument Parsing ---
65
  parser = argparse.ArgumentParser(description="Gradio Demo for Self-Forcing with Frame Streaming")
66
  parser.add_argument('--port', type=int, default=7860, help="Port to run the Gradio app on.")
 
479
  placeholder="A stylish woman walks down a Tokyo street...",
480
  lines=4,
481
  )
482
+ enhance_button = gr.Button("Enhance prompt")
483
  gr.Examples(
484
  examples=[
485
  "A close-up shot of a ceramic teacup slowly pouring water into a glass mug. The water flows smoothly from the spout of the teacup into the mug, creating gentle ripples as it fills up. Both cups have detailed textures, with the teacup having a matte finish and the glass mug showcasing clear transparency. The background is a blurred kitchen countertop, adding context without distracting from the central action. The pouring motion is fluid and natural, emphasizing the interaction between the two cups.",
 
522
  inputs=[prompt, seed, fps],
523
  outputs=[frame_display, final_video, status_html]
524
  )
525
+ enhance_button.click(
526
+ fn=enhance_prompt,
527
+ inputs=[prompt],
528
+ outputs=[prompt]
529
+ )
530
 
531
  # --- Launch App ---
532
  if __name__ == "__main__":