Spaces:
Sleeping
Sleeping
Commit
·
a9fd6b4
1
Parent(s):
557adf7
app.py
CHANGED
@@ -9,7 +9,6 @@ import requests
|
|
9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
10 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
11 |
import torch
|
12 |
-
from torch.ao.quantization import quantize_dynamic
|
13 |
import re
|
14 |
import traceback
|
15 |
|
@@ -18,32 +17,6 @@ MODEL_ID = "Hcompany/Holo1-3B"
|
|
18 |
|
19 |
# --- Helpers (robust across different transformers versions) ---
|
20 |
|
21 |
-
def locate_text_backbone(model):
|
22 |
-
"""
|
23 |
-
Tries common attribute names used by VLMs to find the LLM/text stack.
|
24 |
-
Falls back to the whole model if unknown.
|
25 |
-
"""
|
26 |
-
# common in Qwen-like / custom repos
|
27 |
-
for name in [
|
28 |
-
"language_model", # e.g., model.language_model
|
29 |
-
"text_model", # e.g., model.text_model
|
30 |
-
"model", # sometimes the text core is 'model'
|
31 |
-
"llm", # generic
|
32 |
-
"transformer", # some repos expose raw transformer as 'transformer'
|
33 |
-
]:
|
34 |
-
m = getattr(model, name, None)
|
35 |
-
if m is not None:
|
36 |
-
return m, name
|
37 |
-
|
38 |
-
# last resort: look for a child that has an lm_head or tied weights
|
39 |
-
for name, child in model.named_children():
|
40 |
-
if hasattr(child, "lm_head") or hasattr(child, "get_input_embeddings"):
|
41 |
-
return child, name
|
42 |
-
|
43 |
-
# if still not found, return the model itself
|
44 |
-
return model, None
|
45 |
-
|
46 |
-
|
47 |
def pick_device() -> str:
|
48 |
# Force CPU per request
|
49 |
return "cpu"
|
@@ -105,10 +78,10 @@ model_loaded = False
|
|
105 |
load_error_message = ""
|
106 |
|
107 |
try:
|
108 |
-
# CPU-friendly dtype; bf16 on CPU is spotty, so prefer
|
109 |
model = AutoModelForImageTextToText.from_pretrained(
|
110 |
MODEL_ID,
|
111 |
-
torch_dtype=torch.
|
112 |
trust_remote_code=True
|
113 |
).to(pick_device())
|
114 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
@@ -266,7 +239,7 @@ except Exception as e:
|
|
266 |
pass
|
267 |
|
268 |
# --- Gradio UI ---
|
269 |
-
title = "Holo1-
|
270 |
article = f"""
|
271 |
<p style='text-align: center'>
|
272 |
Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
|
@@ -325,4 +298,4 @@ else:
|
|
325 |
|
326 |
if __name__ == "__main__":
|
327 |
# CPU Spaces can be slow; keep debug True for logs
|
328 |
-
demo.launch(debug=True)
|
|
|
9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
10 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
11 |
import torch
|
|
|
12 |
import re
|
13 |
import traceback
|
14 |
|
|
|
17 |
|
18 |
# --- Helpers (robust across different transformers versions) ---
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def pick_device() -> str:
|
21 |
# Force CPU per request
|
22 |
return "cpu"
|
|
|
78 |
load_error_message = ""
|
79 |
|
80 |
try:
|
81 |
+
# CPU-friendly dtype; bf16 on CPU is spotty, so prefer float32
|
82 |
model = AutoModelForImageTextToText.from_pretrained(
|
83 |
MODEL_ID,
|
84 |
+
torch_dtype=torch.float32,
|
85 |
trust_remote_code=True
|
86 |
).to(pick_device())
|
87 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
|
239 |
pass
|
240 |
|
241 |
# --- Gradio UI ---
|
242 |
+
title = "Holo1-7B: Action VLM Localization Demo (CPU)"
|
243 |
article = f"""
|
244 |
<p style='text-align: center'>
|
245 |
Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
|
|
|
298 |
|
299 |
if __name__ == "__main__":
|
300 |
# CPU Spaces can be slow; keep debug True for logs
|
301 |
+
demo.launch(debug=True)
|