Spaces:

johnbridges
/

TestHolo

Sleeping

App Files Files Community

johnbridges commited on 8 days ago

Commit

a9fd6b4

1 Parent(s): 557adf7

.

Browse files

Files changed (1) hide show

app.py +4 -31

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import requests
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
 import torch
-from torch.ao.quantization import quantize_dynamic
 import re
 import traceback
@@ -18,32 +17,6 @@ MODEL_ID = "Hcompany/Holo1-3B"
 # --- Helpers (robust across different transformers versions) ---
-def locate_text_backbone(model):
-    """
-    Tries common attribute names used by VLMs to find the LLM/text stack.
-    Falls back to the whole model if unknown.
-    """
-    # common in Qwen-like / custom repos
-    for name in [
-        "language_model",   # e.g., model.language_model
-        "text_model",       # e.g., model.text_model
-        "model",            # sometimes the text core is 'model'
-        "llm",              # generic
-        "transformer",      # some repos expose raw transformer as 'transformer'
-    ]:
-        m = getattr(model, name, None)
-        if m is not None:
-            return m, name
-    # last resort: look for a child that has an lm_head or tied weights
-    for name, child in model.named_children():
-        if hasattr(child, "lm_head") or hasattr(child, "get_input_embeddings"):
-            return child, name
-    # if still not found, return the model itself
-    return model, None
 def pick_device() -> str:
     # Force CPU per request
     return "cpu"
@@ -105,10 +78,10 @@ model_loaded = False
 load_error_message = ""
 try:
-    # CPU-friendly dtype; bf16 on CPU is spotty, so prefer bfloat16
     model = AutoModelForImageTextToText.from_pretrained(
         MODEL_ID,
-        torch_dtype=torch.bfloat16,
         trust_remote_code=True
     ).to(pick_device())
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
@@ -266,7 +239,7 @@ except Exception as e:
         pass
 # --- Gradio UI ---
-title = "Holo1-3B: Action VLM Localization Demo (CPU)"
 article = f"""
 <p style='text-align: center'>
 Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
@@ -325,4 +298,4 @@ else:
 if __name__ == "__main__":
     # CPU Spaces can be slow; keep debug True for logs
-    demo.launch(debug=True)

 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
 import torch
 import re
 import traceback
 # --- Helpers (robust across different transformers versions) ---
 def pick_device() -> str:
     # Force CPU per request
     return "cpu"
 load_error_message = ""
 try:
+    # CPU-friendly dtype; bf16 on CPU is spotty, so prefer float32
     model = AutoModelForImageTextToText.from_pretrained(
         MODEL_ID,
+        torch_dtype=torch.float32,
         trust_remote_code=True
     ).to(pick_device())
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
         pass
 # --- Gradio UI ---
+title = "Holo1-7B: Action VLM Localization Demo (CPU)"
 article = f"""
 <p style='text-align: center'>
 Model: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a> by HCompany |
 if __name__ == "__main__":
     # CPU Spaces can be slow; keep debug True for logs
+    demo.launch(debug=True)