Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,6 @@ import requests
|
|
17 |
from transformers import (
|
18 |
Qwen2VLForConditionalGeneration,
|
19 |
Qwen2_5_VLForConditionalGeneration,
|
20 |
-
AutoModelForImageTextToText,
|
21 |
AutoProcessor,
|
22 |
TextIteratorStreamer,
|
23 |
AutoModel,
|
@@ -30,8 +29,20 @@ MAX_MAX_NEW_TOKENS = 4096
|
|
30 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
31 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
32 |
|
33 |
-
|
|
|
|
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# --- Model Loading ---
|
36 |
|
37 |
# To address the warnings, we add `use_fast=False` to ensure we use the
|
@@ -81,7 +92,9 @@ model_v4 = AutoModel.from_pretrained(
|
|
81 |
MODEL_ID_V4,
|
82 |
trust_remote_code=True,
|
83 |
torch_dtype=torch.bfloat16,
|
84 |
-
|
|
|
|
|
85 |
).eval().to(device)
|
86 |
tokenizer_v4 = AutoTokenizer.from_pretrained(MODEL_ID_V4, trust_remote_code=True, use_fast=False)
|
87 |
|
@@ -312,4 +325,4 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
312 |
)
|
313 |
|
314 |
if __name__ == "__main__":
|
315 |
-
demo.queue(max_size=50).launch(share=True, show_error=True)
|
|
|
17 |
from transformers import (
|
18 |
Qwen2VLForConditionalGeneration,
|
19 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
20 |
AutoProcessor,
|
21 |
TextIteratorStreamer,
|
22 |
AutoModel,
|
|
|
29 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
30 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
31 |
|
32 |
+
# Let the environment (e.g., Hugging Face Spaces) determine the device.
|
33 |
+
# This avoids conflicts with the CUDA environment setup by the platform.
|
34 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
35 |
|
36 |
+
print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
|
37 |
+
print("torch.__version__ =", torch.__version__)
|
38 |
+
print("torch.version.cuda =", torch.version.cuda)
|
39 |
+
print("cuda available:", torch.cuda.is_available())
|
40 |
+
print("cuda device count:", torch.cuda.device_count())
|
41 |
+
if torch.cuda.is_available():
|
42 |
+
print("current device:", torch.cuda.current_device())
|
43 |
+
print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
|
44 |
+
|
45 |
+
print("Using device:", device)
|
46 |
# --- Model Loading ---
|
47 |
|
48 |
# To address the warnings, we add `use_fast=False` to ensure we use the
|
|
|
92 |
MODEL_ID_V4,
|
93 |
trust_remote_code=True,
|
94 |
torch_dtype=torch.bfloat16,
|
95 |
+
# Using 'sdpa' can sometimes cause issues in certain environments,
|
96 |
+
# letting transformers choose the default is safer.
|
97 |
+
# attn_implementation='sdpa'
|
98 |
).eval().to(device)
|
99 |
tokenizer_v4 = AutoTokenizer.from_pretrained(MODEL_ID_V4, trust_remote_code=True, use_fast=False)
|
100 |
|
|
|
325 |
)
|
326 |
|
327 |
if __name__ == "__main__":
|
328 |
+
demo.queue(max_size=50).launch(share=True, ssr_mode=False, show_error=True)
|