Spaces:
Sleeping
Sleeping
Commit
·
5822c0a
1
Parent(s):
8a7a9dd
Move to vLLM server
Browse files- Dockerfile +4 -1
- evaluate_demo.py +2 -2
- launch_space.sh +10 -0
- vllm_serve.sh +12 -0
Dockerfile
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
FROM syzygianinfern0/stormbase:latest
|
2 |
|
|
|
|
|
|
|
3 |
# Set up a new user named "user" with user ID 1000
|
4 |
RUN useradd -m -u 1000 user
|
5 |
|
@@ -20,4 +23,4 @@ COPY --chown=user . $HOME/app
|
|
20 |
EXPOSE 7860
|
21 |
|
22 |
# Run your Gradio app
|
23 |
-
CMD ["
|
|
|
1 |
FROM syzygianinfern0/stormbase:latest
|
2 |
|
3 |
+
# Install vllm (move it to Dockerfile.stormbase and remove this line)
|
4 |
+
RUN pip install vllm matplotlib
|
5 |
+
|
6 |
# Set up a new user named "user" with user ID 1000
|
7 |
RUN useradd -m -u 1000 user
|
8 |
|
|
|
23 |
EXPOSE 7860
|
24 |
|
25 |
# Run your Gradio app
|
26 |
+
CMD ["./launch_space.sh"]
|
evaluate_demo.py
CHANGED
@@ -8,7 +8,7 @@ from neus_v.smooth_scoring import smooth_confidence_scores
|
|
8 |
from neus_v.utils import clear_gpu_memory
|
9 |
from neus_v.veval.eval import evaluate_video_with_sequence_of_images
|
10 |
from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
|
11 |
-
from neus_v.vlm.
|
12 |
|
13 |
# Suppress specific warnings
|
14 |
warnings.filterwarnings(
|
@@ -24,7 +24,7 @@ num_of_frame_in_sequence = 3
|
|
24 |
model = "InternVL2-8B"
|
25 |
device = 0
|
26 |
# Load the vision-language model
|
27 |
-
vision_language_model =
|
28 |
# Load distributions
|
29 |
print(f"Loading distributions from {pickle_path}")
|
30 |
with open(pickle_path, "rb") as f:
|
|
|
8 |
from neus_v.utils import clear_gpu_memory
|
9 |
from neus_v.veval.eval import evaluate_video_with_sequence_of_images
|
10 |
from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
|
11 |
+
from neus_v.vlm.vllm_client import VLLMClient
|
12 |
|
13 |
# Suppress specific warnings
|
14 |
warnings.filterwarnings(
|
|
|
24 |
model = "InternVL2-8B"
|
25 |
device = 0
|
26 |
# Load the vision-language model
|
27 |
+
vision_language_model = VLLMClient(api_base="http://localhost:8000/v1", model="OpenGVLab/InternVL2_5-8B")
|
28 |
# Load distributions
|
29 |
print(f"Loading distributions from {pickle_path}")
|
30 |
with open(pickle_path, "rb") as f:
|
launch_space.sh
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Start vLLM server in background
|
4 |
+
./vllm_serve.sh &
|
5 |
+
|
6 |
+
# Wait briefly to ensure vLLM is up before Gradio tries to connect
|
7 |
+
sleep 60
|
8 |
+
|
9 |
+
# Start Gradio app
|
10 |
+
python3 evaluate_demo.py
|
vllm_serve.sh
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
MODEL="OpenGVLab/InternVL2_5-8B"
|
4 |
+
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
|
5 |
+
export NCCL_P2P_DISABLE=1
|
6 |
+
export CUDA_VISIBLE_DEVICES="0"
|
7 |
+
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
8 |
+
PORT=8000
|
9 |
+
vllm serve $MODEL \
|
10 |
+
--port $PORT \
|
11 |
+
--trust-remote-code \
|
12 |
+
--limit-mm-per-prompt image=4
|