import torch from diffusers import CosmosTextToWorldPipeline, CosmosTransformer3DModel from diffusers.utils import export_to_video from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig from transformers import AutoModel, T5EncoderModel model_id = "nvidia/Cosmos-1.0-Diffusion-7B-Text2World" # 4bit 설정 bnb_config = DiffusersBitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, # 계산은 fp16으로 bnb_4bit_quant_type="nf4", # NF4는 더 정밀함 bnb_4bit_use_double_quant=True # 더 작은 모델은 double quant 추천 ) # 4bit로 text_encoder 로드 text_encoder = T5EncoderModel.from_pretrained( model_id, subfolder="text_encoder", device_map="auto", # 자동 GPU 배치 quantization_config=bnb_config ) quant_config = DiffusersBitsAndBytesConfig(load_in_4bit=True,bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True) transformer = CosmosTransformer3DModel.from_pretrained( model_id, subfolder="transformer", quantization_config=quant_config, torch_dtype=torch.bfloat16, ) pipe = CosmosTextToWorldPipeline.from_pretrained( model_id, text_encoder=text_encoder, transformers=transformer, torch_dtype=torch.bfloat16 # optional ).to("cuda") #pipe.to("cuda") #pipe.enable_model_cpu_offload() #pipe.enable_vae_slicing() prompt = "A sleek, humanoid robot stands in a vast warehouse filled with neatly stacked cardboard boxes on industrial shelves. The robot's metallic body gleams under the bright, even lighting, highlighting its futuristic design and intricate joints. A glowing blue light emanates from its chest, adding a touch of advanced technology. The background is dominated by rows of boxes, suggesting a highly organized storage system. The floor is lined with wooden pallets, enhancing the industrial setting. The camera remains static, capturing the robot's poised stance amidst the orderly environment, with a shallow depth of field that keeps the focus on the robot while subtly blurring the background for a cinematic effect." output = pipe(prompt=prompt,width=960,height=704).frames[0] export_to_video(output, "output.mp4", fps=30)