Spaces:
Runtime error
Runtime error
da03
commited on
Commit
·
64a144d
1
Parent(s):
888f299
- dispatcher.py +1 -1
- start_system.sh +1 -1
- start_workers.py +2 -2
- worker.py +2 -2
dispatcher.py
CHANGED
@@ -1210,7 +1210,7 @@ if __name__ == "__main__":
|
|
1210 |
import argparse
|
1211 |
|
1212 |
parser = argparse.ArgumentParser(description="Dispatcher for Neural OS")
|
1213 |
-
parser.add_argument("--port", type=int, default=
|
1214 |
args = parser.parse_args()
|
1215 |
|
1216 |
logger.info(f"🌐 Starting dispatcher on 0.0.0.0:{args.port}")
|
|
|
1210 |
import argparse
|
1211 |
|
1212 |
parser = argparse.ArgumentParser(description="Dispatcher for Neural OS")
|
1213 |
+
parser.add_argument("--port", type=int, default=7860, help="Port to run the dispatcher on")
|
1214 |
args = parser.parse_args()
|
1215 |
|
1216 |
logger.info(f"🌐 Starting dispatcher on 0.0.0.0:{args.port}")
|
start_system.sh
CHANGED
@@ -148,7 +148,7 @@ echo "✅ Dispatcher started (PID: $DISPATCHER_PID)"
|
|
148 |
|
149 |
# Start workers
|
150 |
echo "🔧 Starting $NUM_GPUS GPU workers..."
|
151 |
-
python start_workers.py --num-gpus $NUM_GPUS --no-monitor > workers.log 2>&1
|
152 |
WORKER_START_EXIT_CODE=$?
|
153 |
|
154 |
# Wait for workers to fully load models and register (60 seconds)
|
|
|
148 |
|
149 |
# Start workers
|
150 |
echo "🔧 Starting $NUM_GPUS GPU workers..."
|
151 |
+
python start_workers.py --num-gpus $NUM_GPUS --dispatcher-url "http://localhost:$DISPATCHER_PORT" --no-monitor > workers.log 2>&1
|
152 |
WORKER_START_EXIT_CODE=$?
|
153 |
|
154 |
# Wait for workers to fully load models and register (60 seconds)
|
start_workers.py
CHANGED
@@ -13,7 +13,7 @@ import os
|
|
13 |
from typing import List
|
14 |
|
15 |
class WorkerManager:
|
16 |
-
def __init__(self, num_gpus: int, dispatcher_url: str = "http://localhost:
|
17 |
self.num_gpus = num_gpus
|
18 |
self.dispatcher_url = dispatcher_url
|
19 |
self.processes: List[subprocess.Popen] = []
|
@@ -143,7 +143,7 @@ def main():
|
|
143 |
parser = argparse.ArgumentParser(description="Start multiple GPU workers")
|
144 |
parser.add_argument("--num-gpus", type=int, required=True,
|
145 |
help="Number of GPU workers to start")
|
146 |
-
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:
|
147 |
help="URL of the dispatcher service")
|
148 |
parser.add_argument("--no-monitor", action="store_true",
|
149 |
help="Start workers but don't monitor them")
|
|
|
13 |
from typing import List
|
14 |
|
15 |
class WorkerManager:
|
16 |
+
def __init__(self, num_gpus: int, dispatcher_url: str = "http://localhost:7860"):
|
17 |
self.num_gpus = num_gpus
|
18 |
self.dispatcher_url = dispatcher_url
|
19 |
self.processes: List[subprocess.Popen] = []
|
|
|
143 |
parser = argparse.ArgumentParser(description="Start multiple GPU workers")
|
144 |
parser.add_argument("--num-gpus", type=int, required=True,
|
145 |
help="Number of GPU workers to start")
|
146 |
+
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:7860",
|
147 |
help="URL of the dispatcher service")
|
148 |
parser.add_argument("--no-monitor", action="store_true",
|
149 |
help="Start workers but don't monitor them")
|
worker.py
CHANGED
@@ -27,7 +27,7 @@ torch.backends.cuda.matmul.allow_tf32 = True
|
|
27 |
torch.backends.cudnn.allow_tf32 = True
|
28 |
|
29 |
class GPUWorker:
|
30 |
-
def __init__(self, worker_address: str, dispatcher_url: str = "http://localhost:
|
31 |
self.worker_address = worker_address # e.g., "localhost:8001", "192.168.1.100:8002"
|
32 |
# Parse port from worker address
|
33 |
if ':' in worker_address:
|
@@ -771,7 +771,7 @@ if __name__ == "__main__":
|
|
771 |
# Parse command line arguments
|
772 |
parser = argparse.ArgumentParser(description="GPU Worker for Neural OS")
|
773 |
parser.add_argument("--worker-address", type=str, required=True, help="Worker address (e.g., 'localhost:8001', '192.168.1.100:8002')")
|
774 |
-
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:
|
775 |
args = parser.parse_args()
|
776 |
|
777 |
# Parse port from worker address for validation
|
|
|
27 |
torch.backends.cudnn.allow_tf32 = True
|
28 |
|
29 |
class GPUWorker:
|
30 |
+
def __init__(self, worker_address: str, dispatcher_url: str = "http://localhost:7860"):
|
31 |
self.worker_address = worker_address # e.g., "localhost:8001", "192.168.1.100:8002"
|
32 |
# Parse port from worker address
|
33 |
if ':' in worker_address:
|
|
|
771 |
# Parse command line arguments
|
772 |
parser = argparse.ArgumentParser(description="GPU Worker for Neural OS")
|
773 |
parser.add_argument("--worker-address", type=str, required=True, help="Worker address (e.g., 'localhost:8001', '192.168.1.100:8002')")
|
774 |
+
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:7860", help="Dispatcher URL")
|
775 |
args = parser.parse_args()
|
776 |
|
777 |
# Parse port from worker address for validation
|