new stuff
This commit is contained in:
103
scripts/entrypoint-controller.sh
Normal file
103
scripts/entrypoint-controller.sh
Normal file
@@ -0,0 +1,103 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Default values
|
||||
MODEL_NAME=""
|
||||
API_PORT=9999
|
||||
NTHREADS=4
|
||||
MAX_SEQ_LEN=4096
|
||||
WORKERS=""
|
||||
BUFFER_FLOAT_TYPE="q80"
|
||||
|
||||
# Parse command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--model)
|
||||
MODEL_NAME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--port)
|
||||
API_PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--nthreads)
|
||||
NTHREADS="$2"
|
||||
shift 2
|
||||
;;
|
||||
--max-seq-len)
|
||||
MAX_SEQ_LEN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--workers)
|
||||
shift
|
||||
WORKERS="$@"
|
||||
break
|
||||
;;
|
||||
--buffer-float-type)
|
||||
BUFFER_FLOAT_TYPE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--download)
|
||||
MODEL_NAME="$2"
|
||||
echo "Downloading model: $MODEL_NAME"
|
||||
/app/download-model.sh "$MODEL_NAME"
|
||||
exit 0
|
||||
;;
|
||||
--help)
|
||||
echo "Usage: docker run distributed-llama-controller [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --download <model> Download a model and exit"
|
||||
echo " --model <model> Model name to use"
|
||||
echo " --port <port> API server port (default: 9999)"
|
||||
echo " --nthreads <n> Number of threads (default: 4)"
|
||||
echo " --max-seq-len <n> Maximum sequence length (default: 4096)"
|
||||
echo " --buffer-float-type <type> Buffer float type (default: q80)"
|
||||
echo " --workers <workers> Space-separated list of worker addresses"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " # Download a model"
|
||||
echo " docker run -v ./models:/app/models distributed-llama-controller --download llama3_2_3b_instruct_q40"
|
||||
echo ""
|
||||
echo " # Run API server with workers"
|
||||
echo " docker run -p 9999:9999 -v ./models:/app/models distributed-llama-controller \\"
|
||||
echo " --model llama3_2_3b_instruct_q40 --workers 10.0.0.2:9999 10.0.0.3:9999"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$MODEL_NAME" ]; then
|
||||
echo "Error: --model is required (unless using --download)"
|
||||
echo "Use --help for usage information"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
MODEL_PATH="/app/models/$MODEL_NAME/dllama_model_$MODEL_NAME.m"
|
||||
TOKENIZER_PATH="/app/models/$MODEL_NAME/dllama_tokenizer_$MODEL_NAME.t"
|
||||
|
||||
if [ ! -f "$MODEL_PATH" ] || [ ! -f "$TOKENIZER_PATH" ]; then
|
||||
echo "Error: Model files not found for $MODEL_NAME"
|
||||
echo "Model path: $MODEL_PATH"
|
||||
echo "Tokenizer path: $TOKENIZER_PATH"
|
||||
echo ""
|
||||
echo "Please download the model first:"
|
||||
echo "docker run -v ./models:/app/models distributed-llama-controller --download $MODEL_NAME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build the command
|
||||
CMD="./dllama-api --port $API_PORT --model $MODEL_PATH --tokenizer $TOKENIZER_PATH --buffer-float-type $BUFFER_FLOAT_TYPE --nthreads $NTHREADS --max-seq-len $MAX_SEQ_LEN"
|
||||
|
||||
if [ ! -z "$WORKERS" ]; then
|
||||
CMD="$CMD --workers $WORKERS"
|
||||
fi
|
||||
|
||||
echo "Starting API server with command:"
|
||||
echo "$CMD"
|
||||
echo ""
|
||||
|
||||
exec $CMD
|
||||
Reference in New Issue
Block a user