Spaces:

ByteDance-Seed
/

SeedVR2-3B

Running on Zero

App Files Files Community

IceClear commited on Jun 17

Commit

512f3c8

1 Parent(s): 17caf25

update

Browse files

Files changed (1) hide show

app.py +8 -13

app.py CHANGED Viewed

@@ -11,8 +11,6 @@
 # // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # // See the License for the specific language governing permissions and
 # // limitations under the License.
-import spaces
 import os
 import torch
 import mediapy
@@ -128,7 +126,6 @@ def configure_sequence_parallel(sp_size):
     if sp_size > 1:
         init_sequence_parallel(sp_size)
-@spaces.GPU(duration=120)
 def configure_runner(sp_size):
     config_path = os.path.join('./configs_3b', 'main.yaml')
     config = load_config(config_path)
@@ -144,10 +141,9 @@ def configure_runner(sp_size):
         runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
     return runner
-@spaces.GPU(duration=120)
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x):
-        return [i.to(get_device()) for i in x]
     noises = [torch.randn_like(latent) for latent in cond_latents]
     aug_noises = [torch.randn_like(latent) for latent in cond_latents]
@@ -160,10 +156,10 @@ def generation_step(runner, text_embeds_dict, cond_latents):
     def _add_noise(x, aug_noise):
         t = (
-            torch.tensor([1000.0], device=get_device())
             * cond_noise_scale
         )
-        shape = torch.tensor(x.shape[1:], device=get_device())[None]
         t = runner.timestep_transform(t, shape)
         print(
             f"Timestep shifting from"
@@ -201,7 +197,6 @@ def generation_step(runner, text_embeds_dict, cond_latents):
     return samples
-@spaces.GPU(duration=120)
 def generation_loop(video_path='./test_videos', output_dir='./results', seed=666, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
     runner = configure_runner(1)
     output_dir = 'output/out.mp4'
@@ -322,7 +317,7 @@ def generation_loop(video_path='./test_videos', output_dir='./results', seed=666
                 / 255.0
             )
             print(f"Read video size: {video.size()}")
-            cond_latents.append(video_transform(video.to(get_device())))
         ori_lengths = [video.size(1) for video in cond_latents]
         input_videos = cond_latents
@@ -330,15 +325,15 @@ def generation_loop(video_path='./test_videos', output_dir='./results', seed=666
         runner.dit.to("cpu")
         print(f"Encoding videos: {list(map(lambda x: x.size(), cond_latents))}")
-        runner.vae.to(get_device())
         cond_latents = runner.vae_encode(cond_latents)
         runner.vae.to("cpu")
-        runner.dit.to(get_device())
         for i, emb in enumerate(text_embeds["texts_pos"]):
-            text_embeds["texts_pos"][i] = emb.to(get_device())
         for i, emb in enumerate(text_embeds["texts_neg"]):
-            text_embeds["texts_neg"][i] = emb.to(get_device())
         samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
         runner.dit.to("cpu")

 # // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # // See the License for the specific language governing permissions and
 # // limitations under the License.
 import os
 import torch
 import mediapy
     if sp_size > 1:
         init_sequence_parallel(sp_size)
 def configure_runner(sp_size):
     config_path = os.path.join('./configs_3b', 'main.yaml')
     config = load_config(config_path)
         runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
     return runner
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x):
+        return [i.to(torch.device("cuda")) for i in x]
     noises = [torch.randn_like(latent) for latent in cond_latents]
     aug_noises = [torch.randn_like(latent) for latent in cond_latents]
     def _add_noise(x, aug_noise):
         t = (
+            torch.tensor([1000.0], device=torch.device("cuda"))
             * cond_noise_scale
         )
+        shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
         t = runner.timestep_transform(t, shape)
         print(
             f"Timestep shifting from"
     return samples
 def generation_loop(video_path='./test_videos', output_dir='./results', seed=666, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
     runner = configure_runner(1)
     output_dir = 'output/out.mp4'
                 / 255.0
             )
             print(f"Read video size: {video.size()}")
+            cond_latents.append(video_transform(video.to(torch.device("cuda"))))
         ori_lengths = [video.size(1) for video in cond_latents]
         input_videos = cond_latents
         runner.dit.to("cpu")
         print(f"Encoding videos: {list(map(lambda x: x.size(), cond_latents))}")
+        runner.vae.to(torch.device("cuda"))
         cond_latents = runner.vae_encode(cond_latents)
         runner.vae.to("cpu")
+        runner.dit.to(torch.device("cuda"))
         for i, emb in enumerate(text_embeds["texts_pos"]):
+            text_embeds["texts_pos"][i] = emb.to(torch.device("cuda"))
         for i, emb in enumerate(text_embeds["texts_neg"]):
+            text_embeds["texts_neg"][i] = emb.to(torch.device("cuda"))
         samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
         runner.dit.to("cpu")