Spaces:

Hcompany
/

Holo1-Localization

Running on Zero

multimodalart HF Staff commited on Jun 3

Commit

14b9963

verified ·

1 Parent(s): b46c8a5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,17 +25,12 @@ model_loaded = False
 load_error_message = ""
 try:
-    # We recommend enabling flash_attention_2 for better acceleration and memory saving if available.
-    # For broader compatibility, we'll try without it first, or conditionally enable it.
-    # attn_implementation = "flash_attention_2" if torch.cuda.is_available() and hasattr(torch.nn.functional, 'scaled_dot_product_attention') else None
     model = AutoModelForImageTextToText.from_pretrained(
         MODEL_ID,
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2",
-        device_map="auto",
         trust_remote_code=True
-    )
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
     model_loaded = True
     print("Model and processor loaded successfully.")
@@ -75,6 +70,7 @@ def run_inference_localization(
     messages_for_template: List[dict[str, Any]],
     pil_image_for_processing: Image.Image
 ) -> str:
     """
     Runs inference using the Holo1 model.
     - messages_for_template: The prompt structure, potentially including the PIL image object

 load_error_message = ""
 try:
     model = AutoModelForImageTextToText.from_pretrained(
         MODEL_ID,
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2",
         trust_remote_code=True
+    ).to("cuda")
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
     model_loaded = True
     print("Model and processor loaded successfully.")
     messages_for_template: List[dict[str, Any]],
     pil_image_for_processing: Image.Image
 ) -> str:
+    torch.cuda.set_device("cuda")
     """
     Runs inference using the Holo1 model.
     - messages_for_template: The prompt structure, potentially including the PIL image object