Spaces:
Running
on
Zero
Running
on
Zero
IceClear
commited on
Commit
·
512f3c8
1
Parent(s):
17caf25
update
Browse files
app.py
CHANGED
@@ -11,8 +11,6 @@
|
|
11 |
# // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# // See the License for the specific language governing permissions and
|
13 |
# // limitations under the License.
|
14 |
-
import spaces
|
15 |
-
|
16 |
import os
|
17 |
import torch
|
18 |
import mediapy
|
@@ -128,7 +126,6 @@ def configure_sequence_parallel(sp_size):
|
|
128 |
if sp_size > 1:
|
129 |
init_sequence_parallel(sp_size)
|
130 |
|
131 |
-
@spaces.GPU(duration=120)
|
132 |
def configure_runner(sp_size):
|
133 |
config_path = os.path.join('./configs_3b', 'main.yaml')
|
134 |
config = load_config(config_path)
|
@@ -144,10 +141,9 @@ def configure_runner(sp_size):
|
|
144 |
runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
|
145 |
return runner
|
146 |
|
147 |
-
@spaces.GPU(duration=120)
|
148 |
def generation_step(runner, text_embeds_dict, cond_latents):
|
149 |
def _move_to_cuda(x):
|
150 |
-
return [i.to(
|
151 |
|
152 |
noises = [torch.randn_like(latent) for latent in cond_latents]
|
153 |
aug_noises = [torch.randn_like(latent) for latent in cond_latents]
|
@@ -160,10 +156,10 @@ def generation_step(runner, text_embeds_dict, cond_latents):
|
|
160 |
|
161 |
def _add_noise(x, aug_noise):
|
162 |
t = (
|
163 |
-
torch.tensor([1000.0], device=
|
164 |
* cond_noise_scale
|
165 |
)
|
166 |
-
shape = torch.tensor(x.shape[1:], device=
|
167 |
t = runner.timestep_transform(t, shape)
|
168 |
print(
|
169 |
f"Timestep shifting from"
|
@@ -201,7 +197,6 @@ def generation_step(runner, text_embeds_dict, cond_latents):
|
|
201 |
|
202 |
return samples
|
203 |
|
204 |
-
@spaces.GPU(duration=120)
|
205 |
def generation_loop(video_path='./test_videos', output_dir='./results', seed=666, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
|
206 |
runner = configure_runner(1)
|
207 |
output_dir = 'output/out.mp4'
|
@@ -322,7 +317,7 @@ def generation_loop(video_path='./test_videos', output_dir='./results', seed=666
|
|
322 |
/ 255.0
|
323 |
)
|
324 |
print(f"Read video size: {video.size()}")
|
325 |
-
cond_latents.append(video_transform(video.to(
|
326 |
|
327 |
ori_lengths = [video.size(1) for video in cond_latents]
|
328 |
input_videos = cond_latents
|
@@ -330,15 +325,15 @@ def generation_loop(video_path='./test_videos', output_dir='./results', seed=666
|
|
330 |
|
331 |
runner.dit.to("cpu")
|
332 |
print(f"Encoding videos: {list(map(lambda x: x.size(), cond_latents))}")
|
333 |
-
runner.vae.to(
|
334 |
cond_latents = runner.vae_encode(cond_latents)
|
335 |
runner.vae.to("cpu")
|
336 |
-
runner.dit.to(
|
337 |
|
338 |
for i, emb in enumerate(text_embeds["texts_pos"]):
|
339 |
-
text_embeds["texts_pos"][i] = emb.to(
|
340 |
for i, emb in enumerate(text_embeds["texts_neg"]):
|
341 |
-
text_embeds["texts_neg"][i] = emb.to(
|
342 |
|
343 |
samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
|
344 |
runner.dit.to("cpu")
|
|
|
11 |
# // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# // See the License for the specific language governing permissions and
|
13 |
# // limitations under the License.
|
|
|
|
|
14 |
import os
|
15 |
import torch
|
16 |
import mediapy
|
|
|
126 |
if sp_size > 1:
|
127 |
init_sequence_parallel(sp_size)
|
128 |
|
|
|
129 |
def configure_runner(sp_size):
|
130 |
config_path = os.path.join('./configs_3b', 'main.yaml')
|
131 |
config = load_config(config_path)
|
|
|
141 |
runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
|
142 |
return runner
|
143 |
|
|
|
144 |
def generation_step(runner, text_embeds_dict, cond_latents):
|
145 |
def _move_to_cuda(x):
|
146 |
+
return [i.to(torch.device("cuda")) for i in x]
|
147 |
|
148 |
noises = [torch.randn_like(latent) for latent in cond_latents]
|
149 |
aug_noises = [torch.randn_like(latent) for latent in cond_latents]
|
|
|
156 |
|
157 |
def _add_noise(x, aug_noise):
|
158 |
t = (
|
159 |
+
torch.tensor([1000.0], device=torch.device("cuda"))
|
160 |
* cond_noise_scale
|
161 |
)
|
162 |
+
shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
|
163 |
t = runner.timestep_transform(t, shape)
|
164 |
print(
|
165 |
f"Timestep shifting from"
|
|
|
197 |
|
198 |
return samples
|
199 |
|
|
|
200 |
def generation_loop(video_path='./test_videos', output_dir='./results', seed=666, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
|
201 |
runner = configure_runner(1)
|
202 |
output_dir = 'output/out.mp4'
|
|
|
317 |
/ 255.0
|
318 |
)
|
319 |
print(f"Read video size: {video.size()}")
|
320 |
+
cond_latents.append(video_transform(video.to(torch.device("cuda"))))
|
321 |
|
322 |
ori_lengths = [video.size(1) for video in cond_latents]
|
323 |
input_videos = cond_latents
|
|
|
325 |
|
326 |
runner.dit.to("cpu")
|
327 |
print(f"Encoding videos: {list(map(lambda x: x.size(), cond_latents))}")
|
328 |
+
runner.vae.to(torch.device("cuda"))
|
329 |
cond_latents = runner.vae_encode(cond_latents)
|
330 |
runner.vae.to("cpu")
|
331 |
+
runner.dit.to(torch.device("cuda"))
|
332 |
|
333 |
for i, emb in enumerate(text_embeds["texts_pos"]):
|
334 |
+
text_embeds["texts_pos"][i] = emb.to(torch.device("cuda"))
|
335 |
for i, emb in enumerate(text_embeds["texts_neg"]):
|
336 |
+
text_embeds["texts_neg"][i] = emb.to(torch.device("cuda"))
|
337 |
|
338 |
samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
|
339 |
runner.dit.to("cpu")
|