Spaces:
Sleeping
Sleeping
Gül Sena Altıntaş
commited on
Commit
·
889a42a
1
Parent(s):
1d3a5fe
Improved serving script
Browse filesAdded gemma supertoken model
Small bug persists with reading HF_TOKEN
- app.py +6 -2
- serve_on_killarney.sh +62 -31
app.py
CHANGED
@@ -9,11 +9,13 @@ import re
|
|
9 |
import logging
|
10 |
from typing import List, Dict, Any
|
11 |
import gc
|
|
|
12 |
|
13 |
# Set up logging
|
14 |
logging.basicConfig(level=logging.INFO)
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
|
|
17 |
# Model configurations - maps display names to HF model paths
|
18 |
PREDEFINED_MODELS = [
|
19 |
"meta-llama/Llama-3.2-1B",
|
@@ -25,8 +27,8 @@ PREDEFINED_MODELS = [
|
|
25 |
"CohereForAI/aya-expanse-8b",
|
26 |
"common-pile/comma-v0.1-2t",
|
27 |
"google/byt5-small",
|
28 |
-
"google/byt5-small",
|
29 |
"gsaltintas/supertoken_models-llama_gpt2",
|
|
|
30 |
]
|
31 |
# Global cache for loaded models
|
32 |
model_cache = {}
|
@@ -104,10 +106,10 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
|
|
104 |
if progress_callback:
|
105 |
progress_callback(0.1, f"🔄 Starting to load model: {model_path}")
|
106 |
|
107 |
-
logger.info(f"Loading model: {model_path}")
|
108 |
|
109 |
# Check if CUDA is available
|
110 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
111 |
|
112 |
if progress_callback:
|
113 |
progress_callback(0.2, f"📥 Loading tokenizer for {model_path}...")
|
@@ -122,6 +124,8 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
|
|
122 |
if progress_callback:
|
123 |
progress_callback(0.5, f"🧠 Loading model weights for {model_path}... (this may take a while)")
|
124 |
|
|
|
|
|
125 |
# Load model with appropriate settings
|
126 |
model = AutoModelForCausalLM.from_pretrained(
|
127 |
model_path,
|
|
|
9 |
import logging
|
10 |
from typing import List, Dict, Any
|
11 |
import gc
|
12 |
+
import os
|
13 |
|
14 |
# Set up logging
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
+
print("hf_toke_fromglobal", os.environ.get("HF_TOKEN"))
|
19 |
# Model configurations - maps display names to HF model paths
|
20 |
PREDEFINED_MODELS = [
|
21 |
"meta-llama/Llama-3.2-1B",
|
|
|
27 |
"CohereForAI/aya-expanse-8b",
|
28 |
"common-pile/comma-v0.1-2t",
|
29 |
"google/byt5-small",
|
|
|
30 |
"gsaltintas/supertoken_models-llama_gpt2",
|
31 |
+
"gsaltintas/supertoken_models-llama_google-gemma-2-2b"
|
32 |
]
|
33 |
# Global cache for loaded models
|
34 |
model_cache = {}
|
|
|
106 |
if progress_callback:
|
107 |
progress_callback(0.1, f"🔄 Starting to load model: {model_path}")
|
108 |
|
|
|
109 |
|
110 |
# Check if CUDA is available
|
111 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
112 |
+
logger.info(f"Loading model: {model_path} using device: {device}")
|
113 |
|
114 |
if progress_callback:
|
115 |
progress_callback(0.2, f"📥 Loading tokenizer for {model_path}...")
|
|
|
124 |
if progress_callback:
|
125 |
progress_callback(0.5, f"🧠 Loading model weights for {model_path}... (this may take a while)")
|
126 |
|
127 |
+
logger.info(os.getcwd())
|
128 |
+
logger.info("hf token", os.environ.get("HF_TOKEN"))
|
129 |
# Load model with appropriate settings
|
130 |
model = AutoModelForCausalLM.from_pretrained(
|
131 |
model_path,
|
serve_on_killarney.sh
CHANGED
@@ -5,21 +5,24 @@ CLUSTER_HOST="killarney"
|
|
5 |
CLUSTER_USER="gsa"
|
6 |
|
7 |
# Job configuration
|
|
|
8 |
SCRIPT_NAME="gradio_job.slurm"
|
9 |
-
|
|
|
10 |
JOB_NAME="gradio-app"
|
11 |
-
|
|
|
12 |
NODES=1
|
13 |
NTASKS_PER_NODE=1
|
14 |
CPUS_PER_TASK=4
|
15 |
MEM="8G"
|
16 |
TIME="02:00:00"
|
17 |
-
GRADIO_PORT=
|
18 |
-
|
19 |
-
script_location="
|
20 |
|
21 |
-
ENV_PATH="/home/
|
22 |
-
|
23 |
|
24 |
# Function to cleanup temporary files
|
25 |
cleanup() {
|
@@ -37,14 +40,14 @@ trap cleanup EXIT INT TERM
|
|
37 |
cat > "$SCRIPT_NAME" << EOF
|
38 |
#!/bin/bash
|
39 |
#SBATCH --job-name=$JOB_NAME
|
40 |
-
#SBATCH --
|
41 |
#SBATCH --nodes=$NODES
|
42 |
#SBATCH --ntasks-per-node=$NTASKS_PER_NODE
|
43 |
#SBATCH --cpus-per-task=$CPUS_PER_TASK
|
44 |
#SBATCH --mem=$MEM
|
45 |
#SBATCH --time=$TIME
|
46 |
#SBATCH --account=$ACCOUNT
|
47 |
-
#SBATCH --output=$
|
48 |
|
49 |
# Print job info
|
50 |
echo "Job started on node: \$(hostname)"
|
@@ -57,15 +60,15 @@ echo "Starting time: \$(date)"
|
|
57 |
module load slurm/killarney/24.05.7 StdEnv/2023 gcc/13.3 openmpi/5.0.3 cuda/12.6 python/3.10.13
|
58 |
|
59 |
# Activate virtual environment
|
60 |
-
source $ENV_PATH
|
61 |
|
62 |
# Set up environment
|
63 |
export GRADIO_SERVER_NAME="0.0.0.0"
|
64 |
export GRADIO_SERVER_PORT=$GRADIO_PORT
|
65 |
|
66 |
# Start Gradio app
|
67 |
-
echo "Starting Gradio app on port $GRADIO_PORT..."
|
68 |
-
|
69 |
|
70 |
# Keep the job alive
|
71 |
echo "Gradio app finished at: \$(date)"
|
@@ -81,7 +84,7 @@ if [ $? -ne 0 ]; then
|
|
81 |
fi
|
82 |
|
83 |
echo "Submitting job to cluster..."
|
84 |
-
JOB_ID=$(ssh -t "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'cd
|
85 |
|
86 |
if [ $? -ne 0 ]; then
|
87 |
echo "Error: Failed to submit job to cluster"
|
@@ -122,7 +125,7 @@ done
|
|
122 |
|
123 |
# Get the allocated node
|
124 |
NODE=$(ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'squeue -j $JOB_ID -h -o \"%N\"'")
|
125 |
-
echo "Job is running on node: $NODE"
|
126 |
|
127 |
# Wait a moment for the Gradio app to start
|
128 |
echo "Waiting for Gradio app to initialize..."
|
@@ -147,10 +150,50 @@ if [ -n "$GRADIO_CHECK" ]; then
|
|
147 |
else
|
148 |
echo "⚠ Warning: Gradio app may not have started properly"
|
149 |
echo "Check the job output:"
|
150 |
-
ssh "$CLUSTER_USER@$CLUSTER_HOST" \
|
151 |
-
"bash -l -c 'tail ${JOB_ID}.out'"
|
152 |
fi
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# Connection info
|
155 |
cat <<EOF
|
156 |
|
@@ -168,29 +211,17 @@ Alternative direct SSH with forwarding:
|
|
168 |
ssh -L $GRADIO_PORT:localhost:$GRADIO_PORT $CLUSTER_USER@$NODE.$CLUSTER_HOST
|
169 |
|
170 |
Check job status:
|
171 |
-
ssh $CLUSTER_USER@$CLUSTER_HOST 'squeue -j $JOB_ID '
|
172 |
|
173 |
Cancel job:
|
174 |
-
ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID '
|
175 |
=========================================
|
176 |
|
177 |
EOF
|
178 |
-
|
179 |
-
# Optional port forwarding
|
180 |
-
read -p "Would you like to set up port forwarding now? (y/n): " -n 1 -r
|
181 |
-
echo ""
|
182 |
-
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
183 |
-
echo "Setting up port forwarding..."
|
184 |
-
ssh -L "${GRADIO_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
|
185 |
-
-t "echo 'Port forwarding active: localhost:${GRADIO_PORT} -> ${NODE}:${GRADIO_PORT}'; bash"
|
186 |
-
echo ""
|
187 |
-
echo "Port forwarding ended."
|
188 |
-
else
|
189 |
-
echo "Skipping port forwarding."
|
190 |
echo "Later you can run: ssh -L $GRADIO_PORT:$NODE:$GRADIO_PORT $CLUSTER_USER@$CLUSTER_HOST"
|
191 |
fi
|
192 |
|
193 |
echo ""
|
194 |
echo "Job $JOB_ID is still running on $CLUSTER_HOST:$NODE"
|
195 |
-
echo "Don't forget to cancel it when done: ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID'"
|
196 |
|
|
|
5 |
CLUSTER_USER="gsa"
|
6 |
|
7 |
# Job configuration
|
8 |
+
ACCOUNT="aip-craffel"
|
9 |
SCRIPT_NAME="gradio_job.slurm"
|
10 |
+
APP_DIR="/project/$ACCOUNT/$CLUSTER_USER/quick-tokenizer-accuracy"
|
11 |
+
APP_PATH="app.py"
|
12 |
JOB_NAME="gradio-app"
|
13 |
+
GPU_TYPE="l40s"
|
14 |
+
NUM_GPUS=1
|
15 |
NODES=1
|
16 |
NTASKS_PER_NODE=1
|
17 |
CPUS_PER_TASK=4
|
18 |
MEM="8G"
|
19 |
TIME="02:00:00"
|
20 |
+
GRADIO_PORT=7861
|
21 |
+
LOCAL_PORT=7861
|
22 |
+
script_location="$APP_DIR/$SCRIPT_NAME"
|
23 |
|
24 |
+
ENV_PATH="/home/$CLUSTER_USER/tokenizers/.venv/bin/activate"
|
25 |
+
OUTPUT_DIR="/project/$ACCOUNT/$CLUSTER_USER/.slurm"
|
26 |
|
27 |
# Function to cleanup temporary files
|
28 |
cleanup() {
|
|
|
40 |
cat > "$SCRIPT_NAME" << EOF
|
41 |
#!/bin/bash
|
42 |
#SBATCH --job-name=$JOB_NAME
|
43 |
+
#SBATCH --gres=gpu:$GPU_TYPE:$NUM_GPUS
|
44 |
#SBATCH --nodes=$NODES
|
45 |
#SBATCH --ntasks-per-node=$NTASKS_PER_NODE
|
46 |
#SBATCH --cpus-per-task=$CPUS_PER_TASK
|
47 |
#SBATCH --mem=$MEM
|
48 |
#SBATCH --time=$TIME
|
49 |
#SBATCH --account=$ACCOUNT
|
50 |
+
#SBATCH --output=$OUTPUT_DIR/%j.out
|
51 |
|
52 |
# Print job info
|
53 |
echo "Job started on node: \$(hostname)"
|
|
|
60 |
module load slurm/killarney/24.05.7 StdEnv/2023 gcc/13.3 openmpi/5.0.3 cuda/12.6 python/3.10.13
|
61 |
|
62 |
# Activate virtual environment
|
63 |
+
source "${ENV_PATH}"
|
64 |
|
65 |
# Set up environment
|
66 |
export GRADIO_SERVER_NAME="0.0.0.0"
|
67 |
export GRADIO_SERVER_PORT=$GRADIO_PORT
|
68 |
|
69 |
# Start Gradio app
|
70 |
+
echo "Starting Gradio app on port ${GRADIO_PORT}..."
|
71 |
+
gradio "${APP_PATH}" --watch-dirs "${APP_DIR}"
|
72 |
|
73 |
# Keep the job alive
|
74 |
echo "Gradio app finished at: \$(date)"
|
|
|
84 |
fi
|
85 |
|
86 |
echo "Submitting job to cluster..."
|
87 |
+
JOB_ID=$(ssh -t "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'cd $APP_DIR && sbatch --parsable $script_location'")
|
88 |
|
89 |
if [ $? -ne 0 ]; then
|
90 |
echo "Error: Failed to submit job to cluster"
|
|
|
125 |
|
126 |
# Get the allocated node
|
127 |
NODE=$(ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'squeue -j $JOB_ID -h -o \"%N\"'")
|
128 |
+
echo "Job (${JOB_ID}) is running on node: ${NODE}"
|
129 |
|
130 |
# Wait a moment for the Gradio app to start
|
131 |
echo "Waiting for Gradio app to initialize..."
|
|
|
150 |
else
|
151 |
echo "⚠ Warning: Gradio app may not have started properly"
|
152 |
echo "Check the job output:"
|
153 |
+
ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'tail \"${OUTPUT_DIR}/${JOB_ID}.out\"'"
|
|
|
154 |
fi
|
155 |
|
156 |
+
|
157 |
+
cancel_job() {
|
158 |
+
read -p "Would you like to cancel the job? (y/n): " -n 1 -r
|
159 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
160 |
+
## job id known only remotely
|
161 |
+
# ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel \${JOB_ID}'"
|
162 |
+
ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel ${JOB_ID} '"
|
163 |
+
# ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel ${JOB_ID}'"
|
164 |
+
fi
|
165 |
+
}
|
166 |
+
|
167 |
+
# Optional port forwarding
|
168 |
+
read -p "Would you like to set up port forwarding now? (y/n): " -n 1 -r
|
169 |
+
echo ""
|
170 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
171 |
+
# ssh -L "${GRADIO_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
|
172 |
+
# -t "echo 'Port forwarding active: localhost:${GRADIO_PORT} -> ${NODE}:${GRADIO_PORT}'; bash
|
173 |
+
# If GRADIO_PORT is in use locally, pick a random free port
|
174 |
+
if lsof -iTCP:"$GRADIO_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
|
175 |
+
echo "Port $GRADIO_PORT is already in use locally — selecting a free one..."
|
176 |
+
LOCAL_PORT=$(comm -23 \
|
177 |
+
<(seq 1024 65535 | sort) \
|
178 |
+
<(lsof -nP -iTCP -sTCP:LISTEN | awk 'NR>1 {print $9}' | awk -F: '{print $NF}' | sort -u) \
|
179 |
+
| awk 'BEGIN{srand()} {ports[NR]=$0} END{print ports[int(rand()*NR)+1]}')
|
180 |
+
else
|
181 |
+
LOCAL_PORT="$GRADIO_PORT"
|
182 |
+
fi
|
183 |
+
|
184 |
+
echo "Using local port: $LOCAL_PORT"
|
185 |
+
|
186 |
+
echo "Setting up port forwarding... Open https://localhost:${LOCAL_PORT} in your browser to access the app."
|
187 |
+
ssh -L "${LOCAL_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
|
188 |
+
-t "echo 'Port forwarding active: localhost:${LOCAL_PORT} -> ${NODE}:${GRADIO_PORT}'; bash"
|
189 |
+
|
190 |
+
|
191 |
+
echo ""
|
192 |
+
echo "Port forwarding ended."
|
193 |
+
cancel_job
|
194 |
+
else
|
195 |
+
echo "Skipping port forwarding."
|
196 |
+
|
197 |
# Connection info
|
198 |
cat <<EOF
|
199 |
|
|
|
211 |
ssh -L $GRADIO_PORT:localhost:$GRADIO_PORT $CLUSTER_USER@$NODE.$CLUSTER_HOST
|
212 |
|
213 |
Check job status:
|
214 |
+
ssh $CLUSTER_USER@$CLUSTER_HOST \"'squeue -j $JOB_ID '\"
|
215 |
|
216 |
Cancel job:
|
217 |
+
ssh $CLUSTER_USER@$CLUSTER_HOST \"'scancel $JOB_ID '\"
|
218 |
=========================================
|
219 |
|
220 |
EOF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
echo "Later you can run: ssh -L $GRADIO_PORT:$NODE:$GRADIO_PORT $CLUSTER_USER@$CLUSTER_HOST"
|
222 |
fi
|
223 |
|
224 |
echo ""
|
225 |
echo "Job $JOB_ID is still running on $CLUSTER_HOST:$NODE"
|
226 |
+
# echo "Don't forget to cancel it when done: ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID'"
|
227 |
|