Spaces:

Syzygianinfern0
/

NeuS-V

Sleeping

Syzygianinfern0 commited on Apr 6

Commit

5822c0a

1 Parent(s): 8a7a9dd

Move to vLLM server

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -1,5 +1,8 @@
 FROM syzygianinfern0/stormbase:latest
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
@@ -20,4 +23,4 @@ COPY --chown=user . $HOME/app
 EXPOSE 7860
 # Run your Gradio app
-CMD ["python3", "evaluate_demo.py"]

 FROM syzygianinfern0/stormbase:latest
+# Install vllm (move it to Dockerfile.stormbase and remove this line)
+RUN pip install vllm matplotlib
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 EXPOSE 7860
 # Run your Gradio app
+CMD ["./launch_space.sh"]

evaluate_demo.py CHANGED Viewed

@@ -8,7 +8,7 @@ from neus_v.smooth_scoring import smooth_confidence_scores
 from neus_v.utils import clear_gpu_memory
 from neus_v.veval.eval import evaluate_video_with_sequence_of_images
 from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
-from neus_v.vlm.internvl import InternVL
 # Suppress specific warnings
 warnings.filterwarnings(
@@ -24,7 +24,7 @@ num_of_frame_in_sequence = 3
 model = "InternVL2-8B"
 device = 0
 # Load the vision-language model
-vision_language_model = InternVL(model_name=model, device=device)
 # Load distributions
 print(f"Loading distributions from {pickle_path}")
 with open(pickle_path, "rb") as f:

 from neus_v.utils import clear_gpu_memory
 from neus_v.veval.eval import evaluate_video_with_sequence_of_images
 from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
+from neus_v.vlm.vllm_client import VLLMClient
 # Suppress specific warnings
 warnings.filterwarnings(
 model = "InternVL2-8B"
 device = 0
 # Load the vision-language model
+vision_language_model = VLLMClient(api_base="http://localhost:8000/v1", model="OpenGVLab/InternVL2_5-8B")
 # Load distributions
 print(f"Loading distributions from {pickle_path}")
 with open(pickle_path, "rb") as f:

launch_space.sh ADDED Viewed

+#!/bin/bash
+# Start vLLM server in background
+./vllm_serve.sh &
+# Wait briefly to ensure vLLM is up before Gradio tries to connect
+sleep 60
+# Start Gradio app
+python3 evaluate_demo.py

vllm_serve.sh ADDED Viewed

+#!/bin/bash
+MODEL="OpenGVLab/InternVL2_5-8B"
+export CUDA_DEVICE_ORDER="PCI_BUS_ID"
+export NCCL_P2P_DISABLE=1
+export CUDA_VISIBLE_DEVICES="0"
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+PORT=8000
+vllm serve $MODEL \
+    --port $PORT \
+    --trust-remote-code \
+    --limit-mm-per-prompt image=4