Stremly commited on
Commit
15c1569
Β·
verified Β·
1 Parent(s): 4c75728

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -40
app.py CHANGED
@@ -11,10 +11,30 @@ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
11
  from qwen_vl_utils import process_vision_info # include this file in your repo if not pip-installable
12
 
13
  # ---- model & processor loaded on CPU ----
 
14
 
15
- # ─── lazy-load cache ──────────────────────────────────────────
16
- _MODEL = None # will hold the quantised weights
17
- _PROCESSOR = None # will hold the resized processor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def draw_point(image: Image.Image, point=None, radius: int = 5):
@@ -39,38 +59,7 @@ def navigate(screenshot, task: str, platform: str, history):
39
  history (list | str | None): Previous messages list. Accepts either an
40
  actual Python list (via gr.JSON) or a JSON/Python‑literal string.
41
  """
42
- global _MODEL, _PROCESSOR
43
- # ------- on-demand model / processor load -------------------------
44
- if _MODEL is None:
45
- from transformers import BitsAndBytesConfig
46
-
47
- # 4-bit quantisation (~6 GB on H200)
48
- bnb_cfg = BitsAndBytesConfig(
49
- load_in_4bit=True,
50
- bnb_4bit_compute_dtype=torch.float16,
51
- bnb_4bit_use_double_quant=True,
52
- )
53
-
54
- _MODEL = Qwen2_5_VLForConditionalGeneration.from_pretrained(
55
- "ByteDance-Seed/UI-TARS-1.5-7B",
56
- quantization_config=bnb_cfg,
57
- device_map="auto",
58
- torch_dtype=torch.float16,
59
- low_cpu_mem_usage=True,
60
- )
61
-
62
- _PROCESSOR = AutoProcessor.from_pretrained(
63
- "ByteDance-Seed/UI-TARS-1.5-7B",
64
- size={"shortest_edge": 512, "longest_edge": 1344}, # sane res
65
- use_fast=True,
66
- )
67
-
68
- # use mem-efficient attention kernels
69
- torch.backends.cuda.enable_flash_sdp(False)
70
- torch.backends.cuda.enable_mem_efficient_sdp(True)
71
-
72
- model = _MODEL
73
- processor = _PROCESSOR
74
 
75
  # ───────────────────── normalise history input ──────────────────────────
76
  try:
@@ -139,11 +128,6 @@ def navigate(screenshot, task: str, platform: str, history):
139
  pass
140
 
141
  return screenshot, raw_out, messages
142
-
143
- finally: # ← always executed
144
- torch.cuda.empty_cache() # free unused blocks
145
- torch.cuda.ipc_collect() # defrag for next call
146
-
147
 
148
  # ────────────────────────── Gradio interface ───────────────────────────────
149
 
 
11
  from qwen_vl_utils import process_vision_info # include this file in your repo if not pip-installable
12
 
13
  # ---- model & processor loaded on CPU ----
14
+ from transformers import BitsAndBytesConfig
15
 
16
+ # 4-bit quantisation (~6 GB on H200)
17
+ bnb_cfg = BitsAndBytesConfig(
18
+ load_in_4bit=True,
19
+ bnb_4bit_compute_dtype=torch.float16,
20
+ bnb_4bit_use_double_quant=True,
21
+ )
22
+
23
+ _MODEL = Qwen2_5_VLForConditionalGeneration.from_pretrained(
24
+ "ByteDance-Seed/UI-TARS-1.5-7B",
25
+ quantization_config=bnb_cfg,
26
+ device_map="auto",
27
+ torch_dtype=torch.float16
28
+ )
29
+
30
+ _PROCESSOR = AutoProcessor.from_pretrained(
31
+ "ByteDance-Seed/UI-TARS-1.5-7B",
32
+ size={"shortest_edge": 512, "longest_edge": 1344}, # sane res
33
+ use_fast=True,
34
+ )
35
+
36
+ model = _MODEL
37
+ processor = _PROCESSOR
38
 
39
 
40
  def draw_point(image: Image.Image, point=None, radius: int = 5):
 
59
  history (list | str | None): Previous messages list. Accepts either an
60
  actual Python list (via gr.JSON) or a JSON/Python‑literal string.
61
  """
62
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # ───────────────────── normalise history input ──────────────────────────
65
  try:
 
128
  pass
129
 
130
  return screenshot, raw_out, messages
 
 
 
 
 
131
 
132
  # ────────────────────────── Gradio interface ───────────────────────────────
133