harry900000 commited on
Commit
817cd1e
·
1 Parent(s): 16293fe

add function to watch current vram

Browse files
Files changed (2) hide show
  1. app.py +15 -0
  2. gpu_info.py +67 -0
app.py CHANGED
@@ -6,6 +6,8 @@ from typing import List, Tuple
6
  import gradio as gr
7
  import spaces
8
 
 
 
9
  PWD = os.path.dirname(__file__)
10
  CHECKPOINTS_PATH = "/data/checkpoints"
11
  # CHECKPOINTS_PATH = os.path.join(PWD, "checkpoints")
@@ -285,7 +287,10 @@ def generate_video(
285
 
286
  log.info(f"actual_seed: {actual_seed}")
287
 
 
288
  start_time = time.time()
 
 
289
  args, control_inputs = parse_arguments(
290
  controlnet_specs_in={
291
  "hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
@@ -301,10 +306,20 @@ def generate_video(
301
  num_gpus=1,
302
  seed=seed,
303
  )
 
 
 
 
 
304
  videos, prompts = inference(args, control_inputs)
 
 
305
  end_time = time.time()
306
  log.info(f"Time taken: {end_time - start_time} s")
307
 
 
 
 
308
  video = videos[0]
309
  return video, video, actual_seed
310
 
 
6
  import gradio as gr
7
  import spaces
8
 
9
+ from gpu_info import watch_gpu_memory
10
+
11
  PWD = os.path.dirname(__file__)
12
  CHECKPOINTS_PATH = "/data/checkpoints"
13
  # CHECKPOINTS_PATH = os.path.join(PWD, "checkpoints")
 
287
 
288
  log.info(f"actual_seed: {actual_seed}")
289
 
290
+ # add timer to calculate the generation time
291
  start_time = time.time()
292
+
293
+ # parse generation configs
294
  args, control_inputs = parse_arguments(
295
  controlnet_specs_in={
296
  "hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
 
306
  num_gpus=1,
307
  seed=seed,
308
  )
309
+
310
+ # watch gpu memory
311
+ watcher = watch_gpu_memory(10)
312
+
313
+ # start inference
314
  videos, prompts = inference(args, control_inputs)
315
+
316
+ # print the generation time
317
  end_time = time.time()
318
  log.info(f"Time taken: {end_time - start_time} s")
319
 
320
+ # stop the watcher
321
+ watcher.cancel()
322
+
323
  video = videos[0]
324
  return video, video, actual_seed
325
 
gpu_info.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from subprocess import check_output
2
+ from threading import Timer
3
+ from typing import Callable, List
4
+
5
+
6
+ def get_gpu_memory() -> List[int]:
7
+ """
8
+ Get the free GPU memory (VRAM) in MiB
9
+
10
+ :return memory_free_values: List of free GPU memory (VRAM) in MiB
11
+ """
12
+
13
+ command = "nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits"
14
+ memory_free_info = check_output(command.split()).decode("ascii").replace("\r", "").split("\n")[:-1]
15
+ memory_free_values = list(map(int, memory_free_info))
16
+ return memory_free_values
17
+
18
+
19
+ class RepeatingTimer(Timer):
20
+ def run(self):
21
+ self.finished.wait(self.interval)
22
+ while not self.finished.is_set():
23
+ self.function(*self.args, **self.kwargs)
24
+ self.finished.wait(self.interval)
25
+
26
+
27
+ gpu_memory_watcher: RepeatingTimer = None
28
+
29
+
30
+ def watch_gpu_memory(interval: int = 1, callback: Callable[[List[int]], None] = None) -> RepeatingTimer:
31
+ """
32
+ Start a repeating timer to watch the GPU memory usage
33
+
34
+ :param interval: Interval in seconds
35
+ :return timer: RepeatingTimer object
36
+ """
37
+ global gpu_memory_watcher
38
+ if gpu_memory_watcher is not None:
39
+ raise RuntimeError("GPU memory watcher is already running")
40
+
41
+ if callback is None:
42
+ callback = print
43
+
44
+ gpu_memory_watcher = RepeatingTimer(interval, lambda: callback(get_gpu_memory()))
45
+ gpu_memory_watcher.start()
46
+
47
+ return gpu_memory_watcher
48
+
49
+
50
+ if __name__ == "__main__":
51
+ from time import sleep
52
+
53
+ t = watch_gpu_memory()
54
+
55
+ counter = 0
56
+ while True:
57
+ sleep(1)
58
+ counter += 1
59
+ if counter == 10:
60
+ try:
61
+ watch_gpu_memory()
62
+ except RuntimeError:
63
+ print("Got exception")
64
+ pass
65
+ elif counter >= 20:
66
+ gpu_memory_watcher.cancel()
67
+ break