File size: 2,098 Bytes
9451ca9
 
 
 
 
 
 
 
 
 
 
 
 
3f8cfad
9451ca9
 
 
 
 
 
 
 
 
3f8cfad
9451ca9
 
 
3f8cfad
9451ca9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import torch
from PIL import Image
import numpy as np
from config.settings import DEVICE, MODEL_CONFIG, PROCESSING_CONFIG

class VideoGenerator:
    def __init__(self):
        self.pipeline = None
        self._load_models()
    
    def _load_models(self):
        if DEVICE == "cuda":
            try:
                from diffusers import StableVideoDiffusionPipeline
                self.pipeline = StableVideoDiffusionPipeline.from_pretrained(
                    MODEL_CONFIG['svd_model'],
                    torch_dtype=torch.float16,
                    variant="fp16"
                )
                self.pipeline.to("cuda")
                self.pipeline.enable_model_cpu_offload()
            except Exception as e:
                print(f"Failed to load SVD: {e}")
                self.pipeline = None
    
    def generate_video(self, image, prompt, duration=2):
        if not self.pipeline:
            print("Pipeline not available, skipping video generation")
            return None
        
        try:
            if isinstance(image, np.ndarray):
                if image.dtype == np.float32:
                    image = (image * 255).astype(np.uint8)
                image = Image.fromarray(image)
            
            image = image.resize(PROCESSING_CONFIG['default_resolution'])
            
            num_frames = min(PROCESSING_CONFIG['max_frames'], int(duration * PROCESSING_CONFIG['fps']))
            
            frames = self.pipeline(
                image,
                height=PROCESSING_CONFIG['default_resolution'][1],
                width=PROCESSING_CONFIG['default_resolution'][0],
                num_frames=num_frames,
                decode_chunk_size=8,
                motion_bucket_id=127,
                fps=7,
                noise_aug_strength=0.02,
                num_inference_steps=PROCESSING_CONFIG['generation_steps']
            ).frames[0]
            
            return [np.array(frame) for frame in frames]
            
        except Exception as e:
            print(f"Video generation failed: {e}")
            return None