ankandrew commited on
Commit
f17ef4c
·
1 Parent(s): c4f32fc

Add Info toast message w/ time taken

Browse files
Files changed (1) hide show
  1. app.py +27 -4
app.py CHANGED
@@ -1,4 +1,8 @@
 
1
  import subprocess
 
 
 
2
  import gradio as gr
3
  import spaces
4
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
@@ -20,6 +24,21 @@ MODEL_NAMES = {
20
  }
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @spaces.GPU(duration=300)
24
  def run_inference(model_key, input_type, text, image, video, fps, system_prompt, add_vision_id):
25
  """
@@ -78,10 +97,14 @@ def run_inference(model_key, input_type, text, image, video, fps, system_prompt,
78
  )
79
  inputs = inputs.to(model.device)
80
 
81
- gen_ids = model.generate(**inputs, max_new_tokens=512)
82
- # Trim the prompt tokens
83
- trimmed = [out_ids[len(inp_ids):] for inp_ids, out_ids in zip(inputs.input_ids, gen_ids)]
84
- return processor.batch_decode(trimmed, skip_special_tokens=True)[0]
 
 
 
 
85
 
86
 
87
  # Build Gradio interface
 
1
+ import contextlib
2
  import subprocess
3
+ import time
4
+ from typing import Iterator, Callable
5
+
6
  import gradio as gr
7
  import spaces
8
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 
24
  }
25
 
26
 
27
+ @contextlib.contextmanager
28
+ def measure_time() -> Iterator[Callable[[], float]]:
29
+ """
30
+ A context manager for measuring execution time (in seconds) within its code block.
31
+
32
+ usage:
33
+ with code_timer() as timer:
34
+ # Code snippet to be timed
35
+ print(f"Code took: {timer()} seconds")
36
+ """
37
+ start_time = end_time = time.perf_counter()
38
+ yield lambda: end_time - start_time
39
+ end_time = time.perf_counter()
40
+
41
+
42
  @spaces.GPU(duration=300)
43
  def run_inference(model_key, input_type, text, image, video, fps, system_prompt, add_vision_id):
44
  """
 
97
  )
98
  inputs = inputs.to(model.device)
99
 
100
+ with measure_time() as timer:
101
+ gen_ids = model.generate(**inputs, max_new_tokens=512)
102
+ # Trim the prompt tokens
103
+ trimmed = [out_ids[len(inp_ids):] for inp_ids, out_ids in zip(inputs.input_ids, gen_ids)]
104
+ result = processor.batch_decode(trimmed, skip_special_tokens=True)[0]
105
+
106
+ gr.Info(f"Finished in {timer():.2f}s", title="Success", duration=5) # green-style info toast :contentReference[oaicite:0]{index=0}
107
+ return result
108
 
109
 
110
  # Build Gradio interface