# Distributed Llama Docker Environment Configuration # Copy this file to .env and customize as needed # Model configuration MODEL_NAME=llama3_2_3b_instruct_q40 MAX_SEQ_LEN=4096 BUFFER_FLOAT_TYPE=q80 # Thread configuration CONTROLLER_NTHREADS=4 WORKER_NTHREADS=4 # To use a different model, change MODEL_NAME to one of: # - llama3_1_8b_instruct_q40 # - llama3_1_405b_instruct_q40 # - llama3_2_1b_instruct_q40 # - llama3_2_3b_instruct_q40 # - llama3_3_70b_instruct_q40 # - deepseek_r1_distill_llama_8b_q40 # - qwen3_0.6b_q40 # - qwen3_1.7b_q40 # - qwen3_8b_q40 # - qwen3_14b_q40 # - qwen3_30b_a3b_q40 # Performance tuning: # - Adjust CONTROLLER_NTHREADS and WORKER_NTHREADS based on your Pi's CPU cores # - For Pi 4 (4 cores): use 4 threads # - For Pi 3 (4 cores): use 2-4 threads # - For Pi Zero 2 (4 cores): use 2 threads