Spaces:
Running
Running
| services: | |
| Llama2-70B-INT8: | |
| container_name: worker4 | |
| image: mlenergy/tgi:latest | |
| command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"] | |
| shm_size: 1g | |
| environment: | |
| HUGGING_FACE_HUB_TOKEN: "${HF_TOKEN}" | |
| networks: | |
| - leaderboard | |
| volumes: | |
| - /data/leaderboard/tgi-data:/data | |
| deploy: | |
| restart_policy: | |
| condition: any | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| device_ids: ["0", "1"] | |
| capabilities: [gpu] | |
| MPT-30B: | |
| container_name: worker5 | |
| image: mlenergy/tgi:latest | |
| command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"] | |
| shm_size: 1g | |
| networks: | |
| - leaderboard | |
| volumes: | |
| - /data/leaderboard/tgi-data:/data | |
| deploy: | |
| restart_policy: | |
| condition: any | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| device_ids: ["2", "3"] | |
| capabilities: [gpu] | |
| networks: | |
| leaderboard: | |
| name: leaderboard | |
| external: true | |