160 lines
4.2 KiB
Docker
160 lines
4.2 KiB
Docker
# Dockerfile for Distributed Llama Controller (Raspberry Pi)
|
|
# This variant can download models and start the API server
|
|
FROM arm64v8/debian:bookworm-slim
|
|
|
|
# Install dependencies
|
|
RUN apt-get update && apt-get install -y \
|
|
build-essential \
|
|
g++ \
|
|
make \
|
|
git \
|
|
python3 \
|
|
python3-pip \
|
|
curl \
|
|
wget \
|
|
ca-certificates \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set working directory
|
|
WORKDIR /app
|
|
|
|
# Copy source code
|
|
COPY src/ ./src/
|
|
COPY Makefile ./
|
|
COPY launch.py ./
|
|
|
|
# Build the applications
|
|
RUN make dllama && make dllama-api
|
|
|
|
# Create models directory for volume mount
|
|
RUN mkdir -p /app/models
|
|
|
|
# Create a script to download models
|
|
COPY <<EOF /app/download-model.sh
|
|
#!/bin/bash
|
|
if [ -z "\$1" ]; then
|
|
echo "Usage: download-model.sh <model_name>"
|
|
echo "Available models:"
|
|
python3 launch.py
|
|
exit 1
|
|
fi
|
|
|
|
python3 launch.py "\$1" -skip-run -skip-script -y
|
|
EOF
|
|
|
|
RUN chmod +x /app/download-model.sh
|
|
|
|
# Create entrypoint script
|
|
COPY <<EOF /app/entrypoint.sh
|
|
#!/bin/bash
|
|
|
|
# Default values
|
|
MODEL_NAME=""
|
|
API_PORT=9999
|
|
NTHREADS=4
|
|
MAX_SEQ_LEN=4096
|
|
WORKERS=""
|
|
BUFFER_FLOAT_TYPE="q80"
|
|
|
|
# Parse command line arguments
|
|
while [[ \$# -gt 0 ]]; do
|
|
case \$1 in
|
|
--model)
|
|
MODEL_NAME="\$2"
|
|
shift 2
|
|
;;
|
|
--port)
|
|
API_PORT="\$2"
|
|
shift 2
|
|
;;
|
|
--nthreads)
|
|
NTHREADS="\$2"
|
|
shift 2
|
|
;;
|
|
--max-seq-len)
|
|
MAX_SEQ_LEN="\$2"
|
|
shift 2
|
|
;;
|
|
--workers)
|
|
shift
|
|
WORKERS="\$@"
|
|
break
|
|
;;
|
|
--buffer-float-type)
|
|
BUFFER_FLOAT_TYPE="\$2"
|
|
shift 2
|
|
;;
|
|
--download)
|
|
MODEL_NAME="\$2"
|
|
echo "Downloading model: \$MODEL_NAME"
|
|
/app/download-model.sh "\$MODEL_NAME"
|
|
exit 0
|
|
;;
|
|
--help)
|
|
echo "Usage: docker run distributed-llama-controller [OPTIONS]"
|
|
echo ""
|
|
echo "Options:"
|
|
echo " --download <model> Download a model and exit"
|
|
echo " --model <model> Model name to use"
|
|
echo " --port <port> API server port (default: 9999)"
|
|
echo " --nthreads <n> Number of threads (default: 4)"
|
|
echo " --max-seq-len <n> Maximum sequence length (default: 4096)"
|
|
echo " --buffer-float-type <type> Buffer float type (default: q80)"
|
|
echo " --workers <workers> Space-separated list of worker addresses (e.g., 10.0.0.2:9999 10.0.0.3:9999)"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " # Download a model"
|
|
echo " docker run -v ./models:/app/models distributed-llama-controller --download llama3_2_3b_instruct_q40"
|
|
echo ""
|
|
echo " # Run API server with workers"
|
|
echo " docker run -p 9999:9999 -v ./models:/app/models distributed-llama-controller \\"
|
|
echo " --model llama3_2_3b_instruct_q40 --workers 10.0.0.2:9999 10.0.0.3:9999"
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown option: \$1"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ -z "\$MODEL_NAME" ]; then
|
|
echo "Error: --model is required"
|
|
echo "Use --help for usage information"
|
|
exit 1
|
|
fi
|
|
|
|
MODEL_PATH="/app/models/\$MODEL_NAME/dllama_model_\$MODEL_NAME.m"
|
|
TOKENIZER_PATH="/app/models/\$MODEL_NAME/dllama_tokenizer_\$MODEL_NAME.t"
|
|
|
|
if [ ! -f "\$MODEL_PATH" ] || [ ! -f "\$TOKENIZER_PATH" ]; then
|
|
echo "Error: Model files not found for \$MODEL_NAME"
|
|
echo "Model path: \$MODEL_PATH"
|
|
echo "Tokenizer path: \$TOKENIZER_PATH"
|
|
echo ""
|
|
echo "Please download the model first:"
|
|
echo "docker run -v ./models:/app/models distributed-llama-controller --download \$MODEL_NAME"
|
|
exit 1
|
|
fi
|
|
|
|
# Build the command
|
|
CMD="./dllama-api --port \$API_PORT --model \$MODEL_PATH --tokenizer \$TOKENIZER_PATH --buffer-float-type \$BUFFER_FLOAT_TYPE --nthreads \$NTHREADS --max-seq-len \$MAX_SEQ_LEN"
|
|
|
|
if [ ! -z "\$WORKERS" ]; then
|
|
CMD="\$CMD --workers \$WORKERS"
|
|
fi
|
|
|
|
echo "Starting API server with command:"
|
|
echo "\$CMD"
|
|
echo ""
|
|
|
|
exec \$CMD
|
|
EOF
|
|
|
|
RUN chmod +x /app/entrypoint.sh
|
|
|
|
# Expose the default API port
|
|
EXPOSE 9999
|
|
|
|
# Use the entrypoint script
|
|
ENTRYPOINT ["/app/entrypoint.sh"] |