Spaces:

NTUST-DDRC
/

cosmos_transfer1_av

Paused

App Files Files Community

harry900000 commited on Jul 18

Commit

817cd1e

1 Parent(s): 16293fe

add function to watch current vram

Browse files

Files changed (2) hide show

app.py +15 -0
gpu_info.py +67 -0

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from typing import List, Tuple
 import gradio as gr
 import spaces
 PWD = os.path.dirname(__file__)
 CHECKPOINTS_PATH = "/data/checkpoints"
 # CHECKPOINTS_PATH = os.path.join(PWD, "checkpoints")
@@ -285,7 +287,10 @@ def generate_video(
     log.info(f"actual_seed: {actual_seed}")
     start_time = time.time()
     args, control_inputs = parse_arguments(
         controlnet_specs_in={
             "hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
@@ -301,10 +306,20 @@ def generate_video(
         num_gpus=1,
         seed=seed,
     )
     videos, prompts = inference(args, control_inputs)
     end_time = time.time()
     log.info(f"Time taken: {end_time - start_time} s")
     video = videos[0]
     return video, video, actual_seed

 import gradio as gr
 import spaces
+from gpu_info import watch_gpu_memory
 PWD = os.path.dirname(__file__)
 CHECKPOINTS_PATH = "/data/checkpoints"
 # CHECKPOINTS_PATH = os.path.join(PWD, "checkpoints")
     log.info(f"actual_seed: {actual_seed}")
+    # add timer to calculate the generation time
     start_time = time.time()
+    # parse generation configs
     args, control_inputs = parse_arguments(
         controlnet_specs_in={
             "hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
         num_gpus=1,
         seed=seed,
     )
+    # watch gpu memory
+    watcher = watch_gpu_memory(10)
+    # start inference
     videos, prompts = inference(args, control_inputs)
+    # print the generation time
     end_time = time.time()
     log.info(f"Time taken: {end_time - start_time} s")
+    # stop the watcher
+    watcher.cancel()
     video = videos[0]
     return video, video, actual_seed

gpu_info.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from subprocess import check_output
+from threading import Timer
+from typing import Callable, List
+def get_gpu_memory() -> List[int]:
+    """
+    Get the free GPU memory (VRAM) in MiB
+    :return memory_free_values: List of free GPU memory (VRAM) in MiB
+    """
+    command = "nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits"
+    memory_free_info = check_output(command.split()).decode("ascii").replace("\r", "").split("\n")[:-1]
+    memory_free_values = list(map(int, memory_free_info))
+    return memory_free_values
+class RepeatingTimer(Timer):
+    def run(self):
+        self.finished.wait(self.interval)
+        while not self.finished.is_set():
+            self.function(*self.args, **self.kwargs)
+            self.finished.wait(self.interval)
+gpu_memory_watcher: RepeatingTimer = None
+def watch_gpu_memory(interval: int = 1, callback: Callable[[List[int]], None] = None) -> RepeatingTimer:
+    """
+    Start a repeating timer to watch the GPU memory usage
+    :param interval: Interval in seconds
+    :return timer: RepeatingTimer object
+    """
+    global gpu_memory_watcher
+    if gpu_memory_watcher is not None:
+        raise RuntimeError("GPU memory watcher is already running")
+    if callback is None:
+        callback = print
+    gpu_memory_watcher = RepeatingTimer(interval, lambda: callback(get_gpu_memory()))
+    gpu_memory_watcher.start()
+    return gpu_memory_watcher
+if __name__ == "__main__":
+    from time import sleep
+    t = watch_gpu_memory()
+    counter = 0
+    while True:
+        sleep(1)
+        counter += 1
+        if counter == 10:
+            try:
+                watch_gpu_memory()
+            except RuntimeError:
+                print("Got exception")
+                pass
+        elif counter >= 20:
+            gpu_memory_watcher.cancel()
+            break