diff --git a/README.md b/README.md index 27debf7..5bd4444 100644 --- a/README.md +++ b/README.md @@ -22,4 +22,6 @@ `llama-server -hf unsloth/gemma-4-E2B-it-GGUF:Q4_K_S --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 194 t/s -`llama-server -hf unsloth/gemma-4-E4B-it-GGUF:Q3_K_XL --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 126 t/s \ No newline at end of file +`llama-server -hf unsloth/gemma-4-E4B-it-GGUF:Q3_K_XL --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 126 t/s + +`llama-server -hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:UD-Q4_K_XL --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 20 t/s \ No newline at end of file diff --git a/web/generator.php b/web/generator.php index 79f1c68..51b77c2 100644 --- a/web/generator.php +++ b/web/generator.php @@ -1,9 +1,20 @@ withBaseUri('http://localhost:8080/v1'); +$factory = $factory->withHttpClient(new Psr18Client(HttpClient::create([ + 'timeout' => $httpTimeoutSeconds, + 'max_duration' => $httpMaxDurationSeconds, +]))); if ($apiKey !== '') { $factory = $factory->withApiKey($apiKey); }