Spaces:
Running
on
Zero
Running
on
Zero
Update vlm.py
Browse files
vlm.py
CHANGED
@@ -27,7 +27,7 @@ processor = AutoProcessor.from_pretrained(model_id)
|
|
27 |
model = Mistral3ForConditionalGeneration.from_pretrained(
|
28 |
model_id,
|
29 |
#_attn_implementation="flash_attention_2",
|
30 |
-
torch_dtype=torch.
|
31 |
).eval().to(device)
|
32 |
|
33 |
#
|
@@ -122,7 +122,7 @@ def stream_response(
|
|
122 |
tokenize=True,
|
123 |
return_dict=True,
|
124 |
return_tensors="pt",
|
125 |
-
).to(model.device, dtype=torch.
|
126 |
|
127 |
# Generate
|
128 |
streamer = TextIteratorStreamer(
|
|
|
27 |
model = Mistral3ForConditionalGeneration.from_pretrained(
|
28 |
model_id,
|
29 |
#_attn_implementation="flash_attention_2",
|
30 |
+
torch_dtype=torch.float16
|
31 |
).eval().to(device)
|
32 |
|
33 |
#
|
|
|
122 |
tokenize=True,
|
123 |
return_dict=True,
|
124 |
return_tensors="pt",
|
125 |
+
).to(model.device, dtype=torch.float16)
|
126 |
|
127 |
# Generate
|
128 |
streamer = TextIteratorStreamer(
|