Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import requests
|
|
9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
10 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
11 |
import torch
|
|
|
12 |
import re
|
13 |
import traceback
|
14 |
|
@@ -17,6 +18,32 @@ MODEL_ID = "Hcompany/Holo1-3B"
|
|
17 |
|
18 |
# --- Helpers (robust across different transformers versions) ---
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def pick_device() -> str:
|
21 |
# Force CPU per request
|
22 |
return "cpu"
|
@@ -239,7 +266,7 @@ except Exception as e:
|
|
239 |
pass
|
240 |
|
241 |
# --- Gradio UI ---
|
242 |
-
title = "Holo1-
|
243 |
article = f"""
|
244 |
<p style='text-align: center'>
|
245 |
Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
|
|
|
9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
10 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
11 |
import torch
|
12 |
+
from torch.ao.quantization import quantize_dynamic
|
13 |
import re
|
14 |
import traceback
|
15 |
|
|
|
18 |
|
19 |
# --- Helpers (robust across different transformers versions) ---
|
20 |
|
21 |
+
def locate_text_backbone(model):
|
22 |
+
"""
|
23 |
+
Tries common attribute names used by VLMs to find the LLM/text stack.
|
24 |
+
Falls back to the whole model if unknown.
|
25 |
+
"""
|
26 |
+
# common in Qwen-like / custom repos
|
27 |
+
for name in [
|
28 |
+
"language_model", # e.g., model.language_model
|
29 |
+
"text_model", # e.g., model.text_model
|
30 |
+
"model", # sometimes the text core is 'model'
|
31 |
+
"llm", # generic
|
32 |
+
"transformer", # some repos expose raw transformer as 'transformer'
|
33 |
+
]:
|
34 |
+
m = getattr(model, name, None)
|
35 |
+
if m is not None:
|
36 |
+
return m, name
|
37 |
+
|
38 |
+
# last resort: look for a child that has an lm_head or tied weights
|
39 |
+
for name, child in model.named_children():
|
40 |
+
if hasattr(child, "lm_head") or hasattr(child, "get_input_embeddings"):
|
41 |
+
return child, name
|
42 |
+
|
43 |
+
# if still not found, return the model itself
|
44 |
+
return model, None
|
45 |
+
|
46 |
+
|
47 |
def pick_device() -> str:
|
48 |
# Force CPU per request
|
49 |
return "cpu"
|
|
|
266 |
pass
|
267 |
|
268 |
# --- Gradio UI ---
|
269 |
+
title = "Holo1-3B: Action VLM Localization Demo (CPU)"
|
270 |
article = f"""
|
271 |
<p style='text-align: center'>
|
272 |
Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
|