Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| from text_to_video.tuneavideo.pipelines.pipeline_text_to_video import TuneAVideoPipeline | |
| from text_to_video.tuneavideo.models.unet import UNet3DConditionModel | |
| import torch | |
| from diffusers import AutoencoderKL, DDIMScheduler | |
| from transformers import CLIPTextModel, CLIPTokenizer | |
| class TextToVideo(): | |
| def __init__(self,sd_path = None,motion_field_strength = 12, video_length = 8,t0 = 881, t1=941,use_cf_attn=True,use_motion_field=True) -> None: | |
| g = torch.Generator(device='cuda') | |
| g.manual_seed(22) | |
| self.g = g | |
| assert sd_path is not None | |
| print(f"Loading model SD-Net model file from {sd_path}") | |
| self.dtype = torch.float16 | |
| noise_scheduler = DDIMScheduler.from_pretrained( | |
| sd_path, subfolder="scheduler") | |
| tokenizer = CLIPTokenizer.from_pretrained( | |
| sd_path, subfolder="tokenizer") | |
| text_encoder = CLIPTextModel.from_pretrained( | |
| sd_path, subfolder="text_encoder") | |
| vae = AutoencoderKL.from_pretrained(sd_path, subfolder="vae") | |
| unet = UNet3DConditionModel.from_pretrained_2d( | |
| sd_path, subfolder="unet", use_cf_attn=use_cf_attn) | |
| self.pipe = TuneAVideoPipeline( | |
| vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, unet=unet, | |
| scheduler=DDIMScheduler.from_pretrained( | |
| sd_path, subfolder="scheduler") | |
| ).to('cuda').to(self.dtype) | |
| noise_scheduler.set_timesteps(50, device='cuda') | |
| # t0 parameter (DDIM backward from noise until t0) | |
| self.t0 = t0 | |
| # from t0 apply DDPM forward until t1 | |
| self.t1 = t1 | |
| self.use_foreground_motion_field = False # apply motion field on forground object (not used) | |
| # strength of motion field (delta_x = delta_y in Sect 3.3.1) | |
| self.motion_field_strength = motion_field_strength | |
| self.use_motion_field = use_motion_field # apply general motion field | |
| self.smooth_bg = False # temporally smooth background | |
| self.smooth_bg_strength = 0.4 # alpha = (1-self.smooth_bg_strength) in Eq (9) | |
| self.video_length = video_length | |
| def inference(self, prompt): | |
| prompt_compute = [prompt] | |
| xT = torch.randn((1, 4, 1, 64, 64), dtype=self.dtype, device="cuda") | |
| result = self.pipe(prompt_compute, | |
| video_length=self.video_length, | |
| height=512, | |
| width=512, | |
| num_inference_steps=50, | |
| guidance_scale=7.5, | |
| guidance_stop_step=1.0, | |
| t0=self.t0, | |
| t1=self.t1, | |
| xT=xT, | |
| use_foreground_motion_field=self.use_foreground_motion_field, | |
| motion_field_strength=self.motion_field_strength, | |
| use_motion_field=self.use_motion_field, | |
| smooth_bg=self.smooth_bg, | |
| smooth_bg_strength=self.smooth_bg_strength, | |
| generator=self.g) | |
| return result.videos[0] | |

