Spaces:
Sleeping
Sleeping
Commit
·
a9fd6b4
1
Parent(s):
557adf7
app.py
CHANGED
|
@@ -9,7 +9,6 @@ import requests
|
|
| 9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 10 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
| 11 |
import torch
|
| 12 |
-
from torch.ao.quantization import quantize_dynamic
|
| 13 |
import re
|
| 14 |
import traceback
|
| 15 |
|
|
@@ -18,32 +17,6 @@ MODEL_ID = "Hcompany/Holo1-3B"
|
|
| 18 |
|
| 19 |
# --- Helpers (robust across different transformers versions) ---
|
| 20 |
|
| 21 |
-
def locate_text_backbone(model):
|
| 22 |
-
"""
|
| 23 |
-
Tries common attribute names used by VLMs to find the LLM/text stack.
|
| 24 |
-
Falls back to the whole model if unknown.
|
| 25 |
-
"""
|
| 26 |
-
# common in Qwen-like / custom repos
|
| 27 |
-
for name in [
|
| 28 |
-
"language_model", # e.g., model.language_model
|
| 29 |
-
"text_model", # e.g., model.text_model
|
| 30 |
-
"model", # sometimes the text core is 'model'
|
| 31 |
-
"llm", # generic
|
| 32 |
-
"transformer", # some repos expose raw transformer as 'transformer'
|
| 33 |
-
]:
|
| 34 |
-
m = getattr(model, name, None)
|
| 35 |
-
if m is not None:
|
| 36 |
-
return m, name
|
| 37 |
-
|
| 38 |
-
# last resort: look for a child that has an lm_head or tied weights
|
| 39 |
-
for name, child in model.named_children():
|
| 40 |
-
if hasattr(child, "lm_head") or hasattr(child, "get_input_embeddings"):
|
| 41 |
-
return child, name
|
| 42 |
-
|
| 43 |
-
# if still not found, return the model itself
|
| 44 |
-
return model, None
|
| 45 |
-
|
| 46 |
-
|
| 47 |
def pick_device() -> str:
|
| 48 |
# Force CPU per request
|
| 49 |
return "cpu"
|
|
@@ -105,10 +78,10 @@ model_loaded = False
|
|
| 105 |
load_error_message = ""
|
| 106 |
|
| 107 |
try:
|
| 108 |
-
# CPU-friendly dtype; bf16 on CPU is spotty, so prefer
|
| 109 |
model = AutoModelForImageTextToText.from_pretrained(
|
| 110 |
MODEL_ID,
|
| 111 |
-
torch_dtype=torch.
|
| 112 |
trust_remote_code=True
|
| 113 |
).to(pick_device())
|
| 114 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
@@ -266,7 +239,7 @@ except Exception as e:
|
|
| 266 |
pass
|
| 267 |
|
| 268 |
# --- Gradio UI ---
|
| 269 |
-
title = "Holo1-
|
| 270 |
article = f"""
|
| 271 |
<p style='text-align: center'>
|
| 272 |
Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
|
|
@@ -325,4 +298,4 @@ else:
|
|
| 325 |
|
| 326 |
if __name__ == "__main__":
|
| 327 |
# CPU Spaces can be slow; keep debug True for logs
|
| 328 |
-
demo.launch(debug=True)
|
|
|
|
| 9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 10 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
| 11 |
import torch
|
|
|
|
| 12 |
import re
|
| 13 |
import traceback
|
| 14 |
|
|
|
|
| 17 |
|
| 18 |
# --- Helpers (robust across different transformers versions) ---
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def pick_device() -> str:
|
| 21 |
# Force CPU per request
|
| 22 |
return "cpu"
|
|
|
|
| 78 |
load_error_message = ""
|
| 79 |
|
| 80 |
try:
|
| 81 |
+
# CPU-friendly dtype; bf16 on CPU is spotty, so prefer float32
|
| 82 |
model = AutoModelForImageTextToText.from_pretrained(
|
| 83 |
MODEL_ID,
|
| 84 |
+
torch_dtype=torch.float32,
|
| 85 |
trust_remote_code=True
|
| 86 |
).to(pick_device())
|
| 87 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
|
|
| 239 |
pass
|
| 240 |
|
| 241 |
# --- Gradio UI ---
|
| 242 |
+
title = "Holo1-7B: Action VLM Localization Demo (CPU)"
|
| 243 |
article = f"""
|
| 244 |
<p style='text-align: center'>
|
| 245 |
Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
|
|
|
|
| 298 |
|
| 299 |
if __name__ == "__main__":
|
| 300 |
# CPU Spaces can be slow; keep debug True for logs
|
| 301 |
+
demo.launch(debug=True)
|