multimodalart HF Staff commited on
Commit
14b9963
·
verified ·
1 Parent(s): b46c8a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -25,17 +25,12 @@ model_loaded = False
25
  load_error_message = ""
26
 
27
  try:
28
- # We recommend enabling flash_attention_2 for better acceleration and memory saving if available.
29
- # For broader compatibility, we'll try without it first, or conditionally enable it.
30
- # attn_implementation = "flash_attention_2" if torch.cuda.is_available() and hasattr(torch.nn.functional, 'scaled_dot_product_attention') else None
31
-
32
  model = AutoModelForImageTextToText.from_pretrained(
33
  MODEL_ID,
34
  torch_dtype=torch.bfloat16,
35
  attn_implementation="flash_attention_2",
36
- device_map="auto",
37
  trust_remote_code=True
38
- )
39
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
40
  model_loaded = True
41
  print("Model and processor loaded successfully.")
@@ -75,6 +70,7 @@ def run_inference_localization(
75
  messages_for_template: List[dict[str, Any]],
76
  pil_image_for_processing: Image.Image
77
  ) -> str:
 
78
  """
79
  Runs inference using the Holo1 model.
80
  - messages_for_template: The prompt structure, potentially including the PIL image object
 
25
  load_error_message = ""
26
 
27
  try:
 
 
 
 
28
  model = AutoModelForImageTextToText.from_pretrained(
29
  MODEL_ID,
30
  torch_dtype=torch.bfloat16,
31
  attn_implementation="flash_attention_2",
 
32
  trust_remote_code=True
33
+ ).to("cuda")
34
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
35
  model_loaded = True
36
  print("Model and processor loaded successfully.")
 
70
  messages_for_template: List[dict[str, Any]],
71
  pil_image_for_processing: Image.Image
72
  ) -> str:
73
+ torch.cuda.set_device("cuda")
74
  """
75
  Runs inference using the Holo1 model.
76
  - messages_for_template: The prompt structure, potentially including the PIL image object