Gül Sena Altıntaş commited on
Commit
889a42a
·
1 Parent(s): 1d3a5fe

Improved serving script

Browse files

Added gemma supertoken model
Small bug persists with reading HF_TOKEN

Files changed (2) hide show
  1. app.py +6 -2
  2. serve_on_killarney.sh +62 -31
app.py CHANGED
@@ -9,11 +9,13 @@ import re
9
  import logging
10
  from typing import List, Dict, Any
11
  import gc
 
12
 
13
  # Set up logging
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
 
17
  # Model configurations - maps display names to HF model paths
18
  PREDEFINED_MODELS = [
19
  "meta-llama/Llama-3.2-1B",
@@ -25,8 +27,8 @@ PREDEFINED_MODELS = [
25
  "CohereForAI/aya-expanse-8b",
26
  "common-pile/comma-v0.1-2t",
27
  "google/byt5-small",
28
- "google/byt5-small",
29
  "gsaltintas/supertoken_models-llama_gpt2",
 
30
  ]
31
  # Global cache for loaded models
32
  model_cache = {}
@@ -104,10 +106,10 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
104
  if progress_callback:
105
  progress_callback(0.1, f"🔄 Starting to load model: {model_path}")
106
 
107
- logger.info(f"Loading model: {model_path}")
108
 
109
  # Check if CUDA is available
110
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
111
 
112
  if progress_callback:
113
  progress_callback(0.2, f"📥 Loading tokenizer for {model_path}...")
@@ -122,6 +124,8 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
122
  if progress_callback:
123
  progress_callback(0.5, f"🧠 Loading model weights for {model_path}... (this may take a while)")
124
 
 
 
125
  # Load model with appropriate settings
126
  model = AutoModelForCausalLM.from_pretrained(
127
  model_path,
 
9
  import logging
10
  from typing import List, Dict, Any
11
  import gc
12
+ import os
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+ print("hf_toke_fromglobal", os.environ.get("HF_TOKEN"))
19
  # Model configurations - maps display names to HF model paths
20
  PREDEFINED_MODELS = [
21
  "meta-llama/Llama-3.2-1B",
 
27
  "CohereForAI/aya-expanse-8b",
28
  "common-pile/comma-v0.1-2t",
29
  "google/byt5-small",
 
30
  "gsaltintas/supertoken_models-llama_gpt2",
31
+ "gsaltintas/supertoken_models-llama_google-gemma-2-2b"
32
  ]
33
  # Global cache for loaded models
34
  model_cache = {}
 
106
  if progress_callback:
107
  progress_callback(0.1, f"🔄 Starting to load model: {model_path}")
108
 
 
109
 
110
  # Check if CUDA is available
111
  device = "cuda" if torch.cuda.is_available() else "cpu"
112
+ logger.info(f"Loading model: {model_path} using device: {device}")
113
 
114
  if progress_callback:
115
  progress_callback(0.2, f"📥 Loading tokenizer for {model_path}...")
 
124
  if progress_callback:
125
  progress_callback(0.5, f"🧠 Loading model weights for {model_path}... (this may take a while)")
126
 
127
+ logger.info(os.getcwd())
128
+ logger.info("hf token", os.environ.get("HF_TOKEN"))
129
  # Load model with appropriate settings
130
  model = AutoModelForCausalLM.from_pretrained(
131
  model_path,
serve_on_killarney.sh CHANGED
@@ -5,21 +5,24 @@ CLUSTER_HOST="killarney"
5
  CLUSTER_USER="gsa"
6
 
7
  # Job configuration
 
8
  SCRIPT_NAME="gradio_job.slurm"
9
- APP_PATH="/home/gsa/quick-tokenizer-accuracy/app.py"
 
10
  JOB_NAME="gradio-app"
11
- PARTITION="l40s"
 
12
  NODES=1
13
  NTASKS_PER_NODE=1
14
  CPUS_PER_TASK=4
15
  MEM="8G"
16
  TIME="02:00:00"
17
- GRADIO_PORT=7860
18
- ACCOUNT="aip-craffel"
19
- script_location="/project/aip-craffel/gsa/$SCRIPT_NAME"
20
 
21
- ENV_PATH="/home/gsa/tokenizers/.venv/bin/activate"
22
- OUTPUT_PATH="/project/aip-craffel/gsa/.slurm"
23
 
24
  # Function to cleanup temporary files
25
  cleanup() {
@@ -37,14 +40,14 @@ trap cleanup EXIT INT TERM
37
  cat > "$SCRIPT_NAME" << EOF
38
  #!/bin/bash
39
  #SBATCH --job-name=$JOB_NAME
40
- #SBATCH --partition=$PARTITION
41
  #SBATCH --nodes=$NODES
42
  #SBATCH --ntasks-per-node=$NTASKS_PER_NODE
43
  #SBATCH --cpus-per-task=$CPUS_PER_TASK
44
  #SBATCH --mem=$MEM
45
  #SBATCH --time=$TIME
46
  #SBATCH --account=$ACCOUNT
47
- #SBATCH --output=$OUTPUT_PATH/%j.out
48
 
49
  # Print job info
50
  echo "Job started on node: \$(hostname)"
@@ -57,15 +60,15 @@ echo "Starting time: \$(date)"
57
  module load slurm/killarney/24.05.7 StdEnv/2023 gcc/13.3 openmpi/5.0.3 cuda/12.6 python/3.10.13
58
 
59
  # Activate virtual environment
60
- source $ENV_PATH
61
 
62
  # Set up environment
63
  export GRADIO_SERVER_NAME="0.0.0.0"
64
  export GRADIO_SERVER_PORT=$GRADIO_PORT
65
 
66
  # Start Gradio app
67
- echo "Starting Gradio app on port $GRADIO_PORT..."
68
- python $APP_PATH --no-browser
69
 
70
  # Keep the job alive
71
  echo "Gradio app finished at: \$(date)"
@@ -81,7 +84,7 @@ if [ $? -ne 0 ]; then
81
  fi
82
 
83
  echo "Submitting job to cluster..."
84
- JOB_ID=$(ssh -t "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'cd /project && sbatch --parsable $script_location'")
85
 
86
  if [ $? -ne 0 ]; then
87
  echo "Error: Failed to submit job to cluster"
@@ -122,7 +125,7 @@ done
122
 
123
  # Get the allocated node
124
  NODE=$(ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'squeue -j $JOB_ID -h -o \"%N\"'")
125
- echo "Job is running on node: $NODE"
126
 
127
  # Wait a moment for the Gradio app to start
128
  echo "Waiting for Gradio app to initialize..."
@@ -147,10 +150,50 @@ if [ -n "$GRADIO_CHECK" ]; then
147
  else
148
  echo "⚠ Warning: Gradio app may not have started properly"
149
  echo "Check the job output:"
150
- ssh "$CLUSTER_USER@$CLUSTER_HOST" \
151
- "bash -l -c 'tail ${JOB_ID}.out'"
152
  fi
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  # Connection info
155
  cat <<EOF
156
 
@@ -168,29 +211,17 @@ Alternative direct SSH with forwarding:
168
  ssh -L $GRADIO_PORT:localhost:$GRADIO_PORT $CLUSTER_USER@$NODE.$CLUSTER_HOST
169
 
170
  Check job status:
171
- ssh $CLUSTER_USER@$CLUSTER_HOST 'squeue -j $JOB_ID '
172
 
173
  Cancel job:
174
- ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID '
175
  =========================================
176
 
177
  EOF
178
-
179
- # Optional port forwarding
180
- read -p "Would you like to set up port forwarding now? (y/n): " -n 1 -r
181
- echo ""
182
- if [[ $REPLY =~ ^[Yy]$ ]]; then
183
- echo "Setting up port forwarding..."
184
- ssh -L "${GRADIO_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
185
- -t "echo 'Port forwarding active: localhost:${GRADIO_PORT} -> ${NODE}:${GRADIO_PORT}'; bash"
186
- echo ""
187
- echo "Port forwarding ended."
188
- else
189
- echo "Skipping port forwarding."
190
  echo "Later you can run: ssh -L $GRADIO_PORT:$NODE:$GRADIO_PORT $CLUSTER_USER@$CLUSTER_HOST"
191
  fi
192
 
193
  echo ""
194
  echo "Job $JOB_ID is still running on $CLUSTER_HOST:$NODE"
195
- echo "Don't forget to cancel it when done: ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID'"
196
 
 
5
  CLUSTER_USER="gsa"
6
 
7
  # Job configuration
8
+ ACCOUNT="aip-craffel"
9
  SCRIPT_NAME="gradio_job.slurm"
10
+ APP_DIR="/project/$ACCOUNT/$CLUSTER_USER/quick-tokenizer-accuracy"
11
+ APP_PATH="app.py"
12
  JOB_NAME="gradio-app"
13
+ GPU_TYPE="l40s"
14
+ NUM_GPUS=1
15
  NODES=1
16
  NTASKS_PER_NODE=1
17
  CPUS_PER_TASK=4
18
  MEM="8G"
19
  TIME="02:00:00"
20
+ GRADIO_PORT=7861
21
+ LOCAL_PORT=7861
22
+ script_location="$APP_DIR/$SCRIPT_NAME"
23
 
24
+ ENV_PATH="/home/$CLUSTER_USER/tokenizers/.venv/bin/activate"
25
+ OUTPUT_DIR="/project/$ACCOUNT/$CLUSTER_USER/.slurm"
26
 
27
  # Function to cleanup temporary files
28
  cleanup() {
 
40
  cat > "$SCRIPT_NAME" << EOF
41
  #!/bin/bash
42
  #SBATCH --job-name=$JOB_NAME
43
+ #SBATCH --gres=gpu:$GPU_TYPE:$NUM_GPUS
44
  #SBATCH --nodes=$NODES
45
  #SBATCH --ntasks-per-node=$NTASKS_PER_NODE
46
  #SBATCH --cpus-per-task=$CPUS_PER_TASK
47
  #SBATCH --mem=$MEM
48
  #SBATCH --time=$TIME
49
  #SBATCH --account=$ACCOUNT
50
+ #SBATCH --output=$OUTPUT_DIR/%j.out
51
 
52
  # Print job info
53
  echo "Job started on node: \$(hostname)"
 
60
  module load slurm/killarney/24.05.7 StdEnv/2023 gcc/13.3 openmpi/5.0.3 cuda/12.6 python/3.10.13
61
 
62
  # Activate virtual environment
63
+ source "${ENV_PATH}"
64
 
65
  # Set up environment
66
  export GRADIO_SERVER_NAME="0.0.0.0"
67
  export GRADIO_SERVER_PORT=$GRADIO_PORT
68
 
69
  # Start Gradio app
70
+ echo "Starting Gradio app on port ${GRADIO_PORT}..."
71
+ gradio "${APP_PATH}" --watch-dirs "${APP_DIR}"
72
 
73
  # Keep the job alive
74
  echo "Gradio app finished at: \$(date)"
 
84
  fi
85
 
86
  echo "Submitting job to cluster..."
87
+ JOB_ID=$(ssh -t "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'cd $APP_DIR && sbatch --parsable $script_location'")
88
 
89
  if [ $? -ne 0 ]; then
90
  echo "Error: Failed to submit job to cluster"
 
125
 
126
  # Get the allocated node
127
  NODE=$(ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'squeue -j $JOB_ID -h -o \"%N\"'")
128
+ echo "Job (${JOB_ID}) is running on node: ${NODE}"
129
 
130
  # Wait a moment for the Gradio app to start
131
  echo "Waiting for Gradio app to initialize..."
 
150
  else
151
  echo "⚠ Warning: Gradio app may not have started properly"
152
  echo "Check the job output:"
153
+ ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'tail \"${OUTPUT_DIR}/${JOB_ID}.out\"'"
 
154
  fi
155
 
156
+
157
+ cancel_job() {
158
+ read -p "Would you like to cancel the job? (y/n): " -n 1 -r
159
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
160
+ ## job id known only remotely
161
+ # ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel \${JOB_ID}'"
162
+ ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel ${JOB_ID} '"
163
+ # ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel ${JOB_ID}'"
164
+ fi
165
+ }
166
+
167
+ # Optional port forwarding
168
+ read -p "Would you like to set up port forwarding now? (y/n): " -n 1 -r
169
+ echo ""
170
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
171
+ # ssh -L "${GRADIO_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
172
+ # -t "echo 'Port forwarding active: localhost:${GRADIO_PORT} -> ${NODE}:${GRADIO_PORT}'; bash
173
+ # If GRADIO_PORT is in use locally, pick a random free port
174
+ if lsof -iTCP:"$GRADIO_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
175
+ echo "Port $GRADIO_PORT is already in use locally — selecting a free one..."
176
+ LOCAL_PORT=$(comm -23 \
177
+ <(seq 1024 65535 | sort) \
178
+ <(lsof -nP -iTCP -sTCP:LISTEN | awk 'NR>1 {print $9}' | awk -F: '{print $NF}' | sort -u) \
179
+ | awk 'BEGIN{srand()} {ports[NR]=$0} END{print ports[int(rand()*NR)+1]}')
180
+ else
181
+ LOCAL_PORT="$GRADIO_PORT"
182
+ fi
183
+
184
+ echo "Using local port: $LOCAL_PORT"
185
+
186
+ echo "Setting up port forwarding... Open https://localhost:${LOCAL_PORT} in your browser to access the app."
187
+ ssh -L "${LOCAL_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
188
+ -t "echo 'Port forwarding active: localhost:${LOCAL_PORT} -> ${NODE}:${GRADIO_PORT}'; bash"
189
+
190
+
191
+ echo ""
192
+ echo "Port forwarding ended."
193
+ cancel_job
194
+ else
195
+ echo "Skipping port forwarding."
196
+
197
  # Connection info
198
  cat <<EOF
199
 
 
211
  ssh -L $GRADIO_PORT:localhost:$GRADIO_PORT $CLUSTER_USER@$NODE.$CLUSTER_HOST
212
 
213
  Check job status:
214
+ ssh $CLUSTER_USER@$CLUSTER_HOST \"'squeue -j $JOB_ID '\"
215
 
216
  Cancel job:
217
+ ssh $CLUSTER_USER@$CLUSTER_HOST \"'scancel $JOB_ID '\"
218
  =========================================
219
 
220
  EOF
 
 
 
 
 
 
 
 
 
 
 
 
221
  echo "Later you can run: ssh -L $GRADIO_PORT:$NODE:$GRADIO_PORT $CLUSTER_USER@$CLUSTER_HOST"
222
  fi
223
 
224
  echo ""
225
  echo "Job $JOB_ID is still running on $CLUSTER_HOST:$NODE"
226
+ # echo "Don't forget to cancel it when done: ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID'"
227