81 lines
1.8 KiB
YAML
81 lines
1.8 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
# Controller service - downloads models and runs API
|
|
controller:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.controller
|
|
ports:
|
|
- "9999:9999"
|
|
volumes:
|
|
- ./models:/app/models
|
|
networks:
|
|
distributed-llama:
|
|
ipv4_address: 172.20.0.10
|
|
environment:
|
|
- MODEL_NAME=${MODEL_NAME:-llama3_2_3b_instruct_q40}
|
|
- NTHREADS=${CONTROLLER_NTHREADS:-4}
|
|
- MAX_SEQ_LEN=${MAX_SEQ_LEN:-4096}
|
|
- BUFFER_FLOAT_TYPE=${BUFFER_FLOAT_TYPE:-q80}
|
|
command: >
|
|
--model ${MODEL_NAME:-llama3_2_3b_instruct_q40}
|
|
--port 9999
|
|
--nthreads ${CONTROLLER_NTHREADS:-4}
|
|
--max-seq-len ${MAX_SEQ_LEN:-4096}
|
|
--buffer-float-type ${BUFFER_FLOAT_TYPE:-q80}
|
|
--workers 172.20.0.11:9999 172.20.0.12:9999 172.20.0.13:9999
|
|
depends_on:
|
|
- worker1
|
|
- worker2
|
|
- worker3
|
|
|
|
# Worker services
|
|
worker1:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.worker
|
|
networks:
|
|
distributed-llama:
|
|
ipv4_address: 172.20.0.11
|
|
environment:
|
|
- NTHREADS=${WORKER_NTHREADS:-4}
|
|
command: >
|
|
--port 9999
|
|
--nthreads ${WORKER_NTHREADS:-4}
|
|
|
|
worker2:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.worker
|
|
networks:
|
|
distributed-llama:
|
|
ipv4_address: 172.20.0.12
|
|
environment:
|
|
- NTHREADS=${WORKER_NTHREADS:-4}
|
|
command: >
|
|
--port 9999
|
|
--nthreads ${WORKER_NTHREADS:-4}
|
|
|
|
worker3:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.worker
|
|
networks:
|
|
distributed-llama:
|
|
ipv4_address: 172.20.0.13
|
|
environment:
|
|
- NTHREADS=${WORKER_NTHREADS:-4}
|
|
command: >
|
|
--port 9999
|
|
--nthreads ${WORKER_NTHREADS:-4}
|
|
|
|
networks:
|
|
distributed-llama:
|
|
driver: bridge
|
|
ipam:
|
|
config:
|
|
- subnet: 172.20.0.0/16
|
|
|
|
volumes:
|
|
models: |