Files
dllama/docker-compose.yml
Chris 42172cbb6f
Some checks failed
main / Linux (amd64, ubuntu-22.04) (push) Successful in 49s
main / Linux (arm64, ubuntu-24.04-arm) (push) Has been cancelled
main / Windows (push) Has been cancelled
init
2025-10-24 11:42:14 +02:00

81 lines
1.8 KiB
YAML

version: '3.8'
services:
# Controller service - downloads models and runs API
controller:
build:
context: .
dockerfile: Dockerfile.controller
ports:
- "9999:9999"
volumes:
- ./models:/app/models
networks:
distributed-llama:
ipv4_address: 172.20.0.10
environment:
- MODEL_NAME=${MODEL_NAME:-llama3_2_3b_instruct_q40}
- NTHREADS=${CONTROLLER_NTHREADS:-4}
- MAX_SEQ_LEN=${MAX_SEQ_LEN:-4096}
- BUFFER_FLOAT_TYPE=${BUFFER_FLOAT_TYPE:-q80}
command: >
--model ${MODEL_NAME:-llama3_2_3b_instruct_q40}
--port 9999
--nthreads ${CONTROLLER_NTHREADS:-4}
--max-seq-len ${MAX_SEQ_LEN:-4096}
--buffer-float-type ${BUFFER_FLOAT_TYPE:-q80}
--workers 172.20.0.11:9999 172.20.0.12:9999 172.20.0.13:9999
depends_on:
- worker1
- worker2
- worker3
# Worker services
worker1:
build:
context: .
dockerfile: Dockerfile.worker
networks:
distributed-llama:
ipv4_address: 172.20.0.11
environment:
- NTHREADS=${WORKER_NTHREADS:-4}
command: >
--port 9999
--nthreads ${WORKER_NTHREADS:-4}
worker2:
build:
context: .
dockerfile: Dockerfile.worker
networks:
distributed-llama:
ipv4_address: 172.20.0.12
environment:
- NTHREADS=${WORKER_NTHREADS:-4}
command: >
--port 9999
--nthreads ${WORKER_NTHREADS:-4}
worker3:
build:
context: .
dockerfile: Dockerfile.worker
networks:
distributed-llama:
ipv4_address: 172.20.0.13
environment:
- NTHREADS=${WORKER_NTHREADS:-4}
command: >
--port 9999
--nthreads ${WORKER_NTHREADS:-4}
networks:
distributed-llama:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
volumes:
models: