Rex Cheng
commited on
Commit
Β·
6ab1a8e
1
Parent(s):
c58ca4b
faster encode/decode with a longer GPU duration
Browse files
app.py
CHANGED
|
@@ -58,7 +58,7 @@ def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
|
|
| 58 |
net, feature_utils, seq_cfg = get_model()
|
| 59 |
|
| 60 |
|
| 61 |
-
@spaces.GPU
|
| 62 |
@torch.inference_mode()
|
| 63 |
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
| 64 |
cfg_strength: float, duration: float):
|
|
@@ -95,7 +95,7 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int
|
|
| 95 |
return video_save_path
|
| 96 |
|
| 97 |
|
| 98 |
-
@spaces.GPU
|
| 99 |
@torch.inference_mode()
|
| 100 |
def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
|
| 101 |
duration: float):
|
|
@@ -126,6 +126,12 @@ def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
|
| 126 |
|
| 127 |
video_to_audio_tab = gr.Interface(
|
| 128 |
fn=video_to_audio,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
inputs=[
|
| 130 |
gr.Video(),
|
| 131 |
gr.Text(label='Prompt'),
|
|
|
|
| 58 |
net, feature_utils, seq_cfg = get_model()
|
| 59 |
|
| 60 |
|
| 61 |
+
@spaces.GPU(duration=120)
|
| 62 |
@torch.inference_mode()
|
| 63 |
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
| 64 |
cfg_strength: float, duration: float):
|
|
|
|
| 95 |
return video_save_path
|
| 96 |
|
| 97 |
|
| 98 |
+
@spaces.GPU(duration=120)
|
| 99 |
@torch.inference_mode()
|
| 100 |
def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
|
| 101 |
duration: float):
|
|
|
|
| 126 |
|
| 127 |
video_to_audio_tab = gr.Interface(
|
| 128 |
fn=video_to_audio,
|
| 129 |
+
description="""
|
| 130 |
+
Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
|
| 131 |
+
Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
|
| 132 |
+
|
| 133 |
+
NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side) and does not improve results.
|
| 134 |
+
""",
|
| 135 |
inputs=[
|
| 136 |
gr.Video(),
|
| 137 |
gr.Text(label='Prompt'),
|