prithivMLmods commited on
Commit
c337e98
·
verified ·
1 Parent(s): 819aa9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -17,7 +17,6 @@ import requests
17
  from transformers import (
18
  Qwen2VLForConditionalGeneration,
19
  Qwen2_5_VLForConditionalGeneration,
20
- AutoModelForImageTextToText,
21
  AutoProcessor,
22
  TextIteratorStreamer,
23
  AutoModel,
@@ -30,8 +29,20 @@ MAX_MAX_NEW_TOKENS = 4096
30
  DEFAULT_MAX_NEW_TOKENS = 2048
31
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
32
 
33
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
34
 
 
 
 
 
 
 
 
 
 
 
35
  # --- Model Loading ---
36
 
37
  # To address the warnings, we add `use_fast=False` to ensure we use the
@@ -81,7 +92,9 @@ model_v4 = AutoModel.from_pretrained(
81
  MODEL_ID_V4,
82
  trust_remote_code=True,
83
  torch_dtype=torch.bfloat16,
84
- attn_implementation='sdpa'
 
 
85
  ).eval().to(device)
86
  tokenizer_v4 = AutoTokenizer.from_pretrained(MODEL_ID_V4, trust_remote_code=True, use_fast=False)
87
 
@@ -312,4 +325,4 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
312
  )
313
 
314
  if __name__ == "__main__":
315
- demo.queue(max_size=50).launch(share=True, show_error=True)
 
17
  from transformers import (
18
  Qwen2VLForConditionalGeneration,
19
  Qwen2_5_VLForConditionalGeneration,
 
20
  AutoProcessor,
21
  TextIteratorStreamer,
22
  AutoModel,
 
29
  DEFAULT_MAX_NEW_TOKENS = 2048
30
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
31
 
32
+ # Let the environment (e.g., Hugging Face Spaces) determine the device.
33
+ # This avoids conflicts with the CUDA environment setup by the platform.
34
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
 
36
+ print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
37
+ print("torch.__version__ =", torch.__version__)
38
+ print("torch.version.cuda =", torch.version.cuda)
39
+ print("cuda available:", torch.cuda.is_available())
40
+ print("cuda device count:", torch.cuda.device_count())
41
+ if torch.cuda.is_available():
42
+ print("current device:", torch.cuda.current_device())
43
+ print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
44
+
45
+ print("Using device:", device)
46
  # --- Model Loading ---
47
 
48
  # To address the warnings, we add `use_fast=False` to ensure we use the
 
92
  MODEL_ID_V4,
93
  trust_remote_code=True,
94
  torch_dtype=torch.bfloat16,
95
+ # Using 'sdpa' can sometimes cause issues in certain environments,
96
+ # letting transformers choose the default is safer.
97
+ # attn_implementation='sdpa'
98
  ).eval().to(device)
99
  tokenizer_v4 = AutoTokenizer.from_pretrained(MODEL_ID_V4, trust_remote_code=True, use_fast=False)
100
 
 
325
  )
326
 
327
  if __name__ == "__main__":
328
+ demo.queue(max_size=50).launch(share=True, ssr_mode=False, show_error=True)