harry900000 commited on
Commit
6d7fc1c
·
1 Parent(s): f54e7d4

fix environment problem

Browse files
app.py CHANGED
@@ -1,11 +1,34 @@
1
  import os
2
  from typing import List, Tuple
3
 
 
 
 
4
  PWD = os.path.dirname(__file__)
5
 
6
  import subprocess
7
 
 
 
 
 
 
 
 
 
 
 
8
  subprocess.run("pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True)
 
 
 
 
 
 
 
 
 
 
9
 
10
  try:
11
  import os
@@ -25,8 +48,8 @@ except Exception as e:
25
  # download checkpoints
26
  from download_checkpoints import main as download_checkpoints
27
 
28
- os.makedirs("./checkpoints", exist_ok=True)
29
- download_checkpoints(hf_token="", output_dir="./checkpoints", model="7b_av")
30
 
31
  os.environ["TOKENIZERS_PARALLELISM"] = "false" # Workaround to suppress MP warning
32
 
@@ -34,8 +57,6 @@ import copy
34
  import json
35
  import random
36
  from io import BytesIO
37
-
38
- import gradio as gr
39
  import torch
40
  from cosmos_transfer1.checkpoints import (
41
  BASE_7B_CHECKPOINT_AV_SAMPLE_PATH,
@@ -251,6 +272,7 @@ def inference(cfg, control_inputs) -> Tuple[List[str], List[str]]:
251
  return video_paths, prompt_paths
252
 
253
 
 
254
  def generate_video(
255
  hdmap_video_input,
256
  lidar_video_input,
@@ -270,7 +292,7 @@ def generate_video(
270
  "hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
271
  "lidar": {"control_weight": 0.7, "input_control": lidar_video_input},
272
  },
273
- checkpoint_dir="./cosmos-transfer1/checkpoints",
274
  prompt=prompt,
275
  negative_prompt=negative_prompt,
276
  sigma_max=80,
@@ -300,19 +322,21 @@ with gr.Blocks() as demo:
300
  prompt_input = gr.Textbox(
301
  label="Prompt",
302
  lines=5,
303
- value="A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface. Suds form and bubble around the scrubber, creating a satisfying visual of cleanliness in progress. The sound of scrubbing fills the air, accompanied by the gentle clinking of the dish against the sink. As the scrubber continues its task, the dish transforms, gleaming under the bright kitchen lights, symbolizing the triumph of cleanliness over mess.", # noqa: E501
 
304
  placeholder="Enter your descriptive prompt here...",
305
  )
306
 
307
  negative_prompt_input = gr.Textbox(
308
  label="Negative Prompt",
309
  lines=3,
310
- value="The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality.", # noqa: E501
 
311
  placeholder="Enter what you DON'T want to see in the image...",
312
  )
313
 
314
  with gr.Row():
315
- randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
316
  seed_input = gr.Slider(minimum=0, maximum=1000000, value=1, step=1, label="Seed")
317
 
318
  generate_button = gr.Button("Generate Image")
 
1
  import os
2
  from typing import List, Tuple
3
 
4
+ import gradio as gr
5
+ import spaces
6
+
7
  PWD = os.path.dirname(__file__)
8
 
9
  import subprocess
10
 
11
+ # copy cudnn files
12
+ subprocess.run("cp /usr/local/lib/python3.10/site-packages/nvidia/cudnn/include/*.h /usr/local/cuda/include", env={}, shell=True)
13
+ subprocess.run("cp /usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib/*.so* /usr/local/cuda/lib64", env={}, shell=True)
14
+
15
+ # setup env
16
+ os.environ["CUDA_HOME"] = "/usr/local/cuda"
17
+ os.environ["LD_LIBRARY_PATH"] = "$CUDA_HOME/lib:$CUDA_HOME/lib64:$LD_LIBRARY_PATH"
18
+ os.environ["PATH"] = "$CUDA_HOME/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:$PATH"
19
+
20
+ # install packages
21
  subprocess.run("pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True)
22
+ subprocess.run("pip install vllm==0.9.0", env={"VLLM_ATTENTION_BACKEND": "FLASHINFER"}, shell=True)
23
+ subprocess.run(
24
+ "pip install transformer-engine[pytorch] --no-build-isolation",
25
+ env={"PATH": os.environ["PATH"], "LD_LIBRARY_PATH": os.environ["LD_LIBRARY_PATH"], "CUDA_HOME": os.environ["CUDA_HOME"]},
26
+ shell=True,
27
+ )
28
+
29
+ from test_environment import main as check_environment
30
+
31
+ check_environment()
32
 
33
  try:
34
  import os
 
48
  # download checkpoints
49
  from download_checkpoints import main as download_checkpoints
50
 
51
+ os.makedirs(os.path.join(PWD, "checkpoints"), exist_ok=True)
52
+ download_checkpoints(hf_token="", output_dir=os.path.join(PWD, "checkpoints"), model="7b_av")
53
 
54
  os.environ["TOKENIZERS_PARALLELISM"] = "false" # Workaround to suppress MP warning
55
 
 
57
  import json
58
  import random
59
  from io import BytesIO
 
 
60
  import torch
61
  from cosmos_transfer1.checkpoints import (
62
  BASE_7B_CHECKPOINT_AV_SAMPLE_PATH,
 
272
  return video_paths, prompt_paths
273
 
274
 
275
+ @spaces.GPU()
276
  def generate_video(
277
  hdmap_video_input,
278
  lidar_video_input,
 
292
  "hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
293
  "lidar": {"control_weight": 0.7, "input_control": lidar_video_input},
294
  },
295
+ checkpoint_dir=os.path.join(PWD, "checkpoints"),
296
  prompt=prompt,
297
  negative_prompt=negative_prompt,
298
  sigma_max=80,
 
322
  prompt_input = gr.Textbox(
323
  label="Prompt",
324
  lines=5,
325
+ # value="A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface. Suds form and bubble around the scrubber, creating a satisfying visual of cleanliness in progress. The sound of scrubbing fills the air, accompanied by the gentle clinking of the dish against the sink. As the scrubber continues its task, the dish transforms, gleaming under the bright kitchen lights, symbolizing the triumph of cleanliness over mess.", # noqa: E501
326
+ value="The video is captured from a camera mounted on a car. The camera is facing forward. The video showcases a scenic golden-hour drive through a suburban area, bathed in the warm, golden hues of the setting sun. The dashboard camera captures the play of light and shadow as the sun’s rays filter through the trees, casting elongated patterns onto the road. The streetlights remain off, as the golden glow of the late afternoon sun provides ample illumination. The two-lane road appears to shimmer under the soft light, while the concrete barrier on the left side of the road reflects subtle warm tones. The stone wall on the right, adorned with lush greenery, stands out vibrantly under the golden light, with the palm trees swaying gently in the evening breeze. Several parked vehicles, including white sedans and vans, are seen on the left side of the road, their surfaces reflecting the amber hues of the sunset. The trees, now highlighted in a golden halo, cast intricate shadows onto the pavement. Further ahead, houses with red-tiled roofs glow warmly in the fading light, standing out against the sky, which transitions from deep orange to soft pastel blue. As the vehicle continues, a white sedan is seen driving in the same lane, while a black sedan and a white van move further ahead. The road markings are crisp, and the entire setting radiates a peaceful, almost cinematic beauty. The golden light, combined with the quiet suburban landscape, creates an atmosphere of tranquility and warmth, making for a mesmerizing and soothing drive.", # noqa: E501
327
  placeholder="Enter your descriptive prompt here...",
328
  )
329
 
330
  negative_prompt_input = gr.Textbox(
331
  label="Negative Prompt",
332
  lines=3,
333
+ # value="The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality.", # noqa: E501
334
+ value="The video captures a game playing, with bad crappy graphics and cartoonish frames. It represents a recording of old outdated games. The lighting looks very fake. The textures are very raw and basic. The geometries are very primitive. The images are very pixelated and of poor CG quality. There are many subtitles in the footage. Overall, the video is unrealistic at all.", # noqa: E501
335
  placeholder="Enter what you DON'T want to see in the image...",
336
  )
337
 
338
  with gr.Row():
339
+ randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=False)
340
  seed_input = gr.Slider(minimum=0, maximum=1000000, value=1, step=1, label="Seed")
341
 
342
  generate_button = gr.Button("Generate Image")
cosmos_transfer1/diffusion/config/transfer/config.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import Any, List
17
+
18
+ import attrs
19
+
20
+ from cosmos_transfer1.diffusion.config.transfer.model import CtrlModelConfig
21
+ from cosmos_transfer1.diffusion.config.transfer.registry import register_configs
22
+ from cosmos_transfer1.diffusion.model.model_ctrl import VideoDiffusionModelWithCtrl
23
+ from cosmos_transfer1.utils import config
24
+ from cosmos_transfer1.utils.config_helper import import_all_modules_from_package
25
+ from cosmos_transfer1.utils.lazy_config import PLACEHOLDER
26
+ from cosmos_transfer1.utils.lazy_config import LazyCall as L
27
+ from cosmos_transfer1.utils.lazy_config import LazyDict
28
+
29
+
30
+ @attrs.define(slots=False)
31
+ class Config(config.Config):
32
+ # default config groups that will be used unless overwritten
33
+ # see config groups in registry.py
34
+ defaults: List[Any] = attrs.field(
35
+ factory=lambda: [
36
+ "_self_",
37
+ {"net": None},
38
+ {"net_ctrl": None},
39
+ {"hint_key": "control_input_edge"},
40
+ {"conditioner": "ctrlnet_add_fps_image_size_padding_mask"},
41
+ {"tokenizer": "vae1"},
42
+ {"experiment": None},
43
+ ]
44
+ )
45
+ model_obj: LazyDict = L(VideoDiffusionModelWithCtrl)(
46
+ config=PLACEHOLDER,
47
+ )
48
+
49
+
50
+ def make_config():
51
+ c = Config(
52
+ model=CtrlModelConfig(),
53
+ )
54
+ register_configs()
55
+
56
+ import_all_modules_from_package("cosmos_transfer1.diffusion.config.inference")
57
+ return c
download_checkpoints.py CHANGED
@@ -114,6 +114,7 @@ def main(hf_token: str = os.environ.get("HF_TOKEN"), output_dir: str = "./checkp
114
  checkpoint_vars.append(obj)
115
 
116
  print(f"Found {len(checkpoint_vars)} checkpoints to download")
 
117
 
118
  # Download each checkpoint
119
  for checkpoint in checkpoint_vars:
 
114
  checkpoint_vars.append(obj)
115
 
116
  print(f"Found {len(checkpoint_vars)} checkpoints to download")
117
+ print(checkpoint_vars)
118
 
119
  # Download each checkpoint
120
  for checkpoint in checkpoint_vars:
requirements.txt CHANGED
@@ -1,10 +1,15 @@
1
- git+https://github.com/huggingface/diffusers.git
2
- transformers
3
- accelerate
4
- sentencepiece
5
- safetensors
6
- torchvision
7
- git+https://github.com/yiyixuxu/cosmos-guardrail.git
8
- peft
9
 
 
 
 
 
 
 
 
10
  git+https://github.com/nvidia-cosmos/cosmos-transfer1
 
 
 
 
 
1
+ # essentials
2
+ ninja
 
 
 
 
 
 
3
 
4
+ # torch
5
+ # torch==2.7.0
6
+ # torchvision==0.22.0
7
+ # torch==2.5.1
8
+ # torchvision==0.20.1
9
+
10
+ # cosmos-transfer1
11
  git+https://github.com/nvidia-cosmos/cosmos-transfer1
12
+
13
+ # other packages
14
+ https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl
15
+ decord==0.6.0
test_environment.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import argparse
17
+ import importlib
18
+ import os
19
+ import sys
20
+
21
+
22
+ def parse_args():
23
+ parser = argparse.ArgumentParser()
24
+ parser.add_argument(
25
+ "--training",
26
+ action="store_true",
27
+ help="Whether to check training-specific dependencies",
28
+ )
29
+ return parser.parse_args()
30
+
31
+
32
+ def check_packages(package_list):
33
+ global all_success
34
+ for package in package_list:
35
+ try:
36
+ _ = importlib.import_module(package)
37
+ except Exception:
38
+ print(f"\033[91m[ERROR]\033[0m Package not successfully imported: \033[93m{package}\033[0m")
39
+ all_success = False
40
+ else:
41
+ print(f"\033[92m[SUCCESS]\033[0m {package} found")
42
+
43
+
44
+ def main():
45
+ args = parse_args()
46
+
47
+ if not (sys.version_info.major == 3 and sys.version_info.minor >= 10):
48
+ detected = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
49
+ print(f"\033[91m[ERROR]\033[0m Python 3.10+ is required. You have: \033[93m{detected}\033[0m")
50
+ sys.exit(1)
51
+
52
+ if "CONDA_PREFIX" not in os.environ:
53
+ print(
54
+ "\033[93m[WARNING]\033[0m CONDA_PREFIX is not set. When manually installed, Cosmos should run under the cosmos-transfer1 conda environment (see INSTALL.md). This warning can be ignored when running in the container."
55
+ )
56
+
57
+ print("Attempting to import critical packages...")
58
+
59
+ packages = ["torch", "torchvision", "transformers", "megatron.core", "transformer_engine", "vllm", "pandas"]
60
+ packages_training = [
61
+ "apex.multi_tensor_apply",
62
+ ]
63
+ all_success = True
64
+
65
+ check_packages(packages)
66
+ if args.training:
67
+ check_packages(packages_training)
68
+
69
+ if all_success:
70
+ print("-----------------------------------------------------------")
71
+ print("\033[92m[SUCCESS]\033[0m Cosmos environment setup is successful!")