Syzygianinfern0 commited on
Commit
5822c0a
·
1 Parent(s): 8a7a9dd

Move to vLLM server

Browse files
Files changed (4) hide show
  1. Dockerfile +4 -1
  2. evaluate_demo.py +2 -2
  3. launch_space.sh +10 -0
  4. vllm_serve.sh +12 -0
Dockerfile CHANGED
@@ -1,5 +1,8 @@
1
  FROM syzygianinfern0/stormbase:latest
2
 
 
 
 
3
  # Set up a new user named "user" with user ID 1000
4
  RUN useradd -m -u 1000 user
5
 
@@ -20,4 +23,4 @@ COPY --chown=user . $HOME/app
20
  EXPOSE 7860
21
 
22
  # Run your Gradio app
23
- CMD ["python3", "evaluate_demo.py"]
 
1
  FROM syzygianinfern0/stormbase:latest
2
 
3
+ # Install vllm (move it to Dockerfile.stormbase and remove this line)
4
+ RUN pip install vllm matplotlib
5
+
6
  # Set up a new user named "user" with user ID 1000
7
  RUN useradd -m -u 1000 user
8
 
 
23
  EXPOSE 7860
24
 
25
  # Run your Gradio app
26
+ CMD ["./launch_space.sh"]
evaluate_demo.py CHANGED
@@ -8,7 +8,7 @@ from neus_v.smooth_scoring import smooth_confidence_scores
8
  from neus_v.utils import clear_gpu_memory
9
  from neus_v.veval.eval import evaluate_video_with_sequence_of_images
10
  from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
11
- from neus_v.vlm.internvl import InternVL
12
 
13
  # Suppress specific warnings
14
  warnings.filterwarnings(
@@ -24,7 +24,7 @@ num_of_frame_in_sequence = 3
24
  model = "InternVL2-8B"
25
  device = 0
26
  # Load the vision-language model
27
- vision_language_model = InternVL(model_name=model, device=device)
28
  # Load distributions
29
  print(f"Loading distributions from {pickle_path}")
30
  with open(pickle_path, "rb") as f:
 
8
  from neus_v.utils import clear_gpu_memory
9
  from neus_v.veval.eval import evaluate_video_with_sequence_of_images
10
  from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
11
+ from neus_v.vlm.vllm_client import VLLMClient
12
 
13
  # Suppress specific warnings
14
  warnings.filterwarnings(
 
24
  model = "InternVL2-8B"
25
  device = 0
26
  # Load the vision-language model
27
+ vision_language_model = VLLMClient(api_base="http://localhost:8000/v1", model="OpenGVLab/InternVL2_5-8B")
28
  # Load distributions
29
  print(f"Loading distributions from {pickle_path}")
30
  with open(pickle_path, "rb") as f:
launch_space.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start vLLM server in background
4
+ ./vllm_serve.sh &
5
+
6
+ # Wait briefly to ensure vLLM is up before Gradio tries to connect
7
+ sleep 60
8
+
9
+ # Start Gradio app
10
+ python3 evaluate_demo.py
vllm_serve.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ MODEL="OpenGVLab/InternVL2_5-8B"
4
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
5
+ export NCCL_P2P_DISABLE=1
6
+ export CUDA_VISIBLE_DEVICES="0"
7
+ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
8
+ PORT=8000
9
+ vllm serve $MODEL \
10
+ --port $PORT \
11
+ --trust-remote-code \
12
+ --limit-mm-per-prompt image=4