refactor: enhance error logging and add HTTP timeout configuration in generator
This commit is contained in:
@@ -22,4 +22,6 @@
|
|||||||
|
|
||||||
`llama-server -hf unsloth/gemma-4-E2B-it-GGUF:Q4_K_S --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 194 t/s
|
`llama-server -hf unsloth/gemma-4-E2B-it-GGUF:Q4_K_S --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 194 t/s
|
||||||
|
|
||||||
`llama-server -hf unsloth/gemma-4-E4B-it-GGUF:Q3_K_XL --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 126 t/s
|
`llama-server -hf unsloth/gemma-4-E4B-it-GGUF:Q3_K_XL --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 126 t/s
|
||||||
|
|
||||||
|
`llama-server -hf unsloth/gemma-4-26B-A4B-it-qat-GGUF:UD-Q4_K_XL --reasoning off -fa on -ngl 99 -b 2048 -ub 2048 -c 4096 --temp 1.0 --top-p 0.95 --top-k 64 --no-mmap -t 4` -> 20 t/s
|
||||||
@@ -1,9 +1,20 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
declare(strict_types=1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
// overwrite log behaviour so all php errors are logged into errors.log
|
||||||
|
ini_set('log_errors', '1');
|
||||||
|
ini_set('error_log', __DIR__ . '/../errors.log');
|
||||||
|
ini_set('display_errors', '0');
|
||||||
|
error_reporting(E_ALL);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
require_once __DIR__ . '/../vendor/autoload.php';
|
require_once __DIR__ . '/../vendor/autoload.php';
|
||||||
|
|
||||||
|
use Symfony\Component\HttpClient\HttpClient;
|
||||||
|
use Symfony\Component\HttpClient\Psr18Client;
|
||||||
|
|
||||||
function readLocalEnvValue(string $envFilePath, string $key): string
|
function readLocalEnvValue(string $envFilePath, string $key): string
|
||||||
{
|
{
|
||||||
if (! is_file($envFilePath) || ! is_readable($envFilePath)) {
|
if (! is_file($envFilePath) || ! is_readable($envFilePath)) {
|
||||||
@@ -238,6 +249,12 @@ $apiKey = getenv('LLAMA_API_KEY') ?: '';
|
|||||||
$model = getenv('LLAMA_MODEL') ?: 'local-model';
|
$model = getenv('LLAMA_MODEL') ?: 'local-model';
|
||||||
$maxTokens = (int) (getenv('LLAMA_MAX_TOKENS') ?: 4096);
|
$maxTokens = (int) (getenv('LLAMA_MAX_TOKENS') ?: 4096);
|
||||||
$reasoningEffort = getenv('LLAMA_REASONING_EFFORT') ?: 'none';
|
$reasoningEffort = getenv('LLAMA_REASONING_EFFORT') ?: 'none';
|
||||||
|
$httpTimeoutSeconds = (float) (getenv('LLAMA_HTTP_TIMEOUT') ?: '600');
|
||||||
|
$httpMaxDurationSeconds = (float) (getenv('LLAMA_HTTP_MAX_DURATION') ?: '0');
|
||||||
|
|
||||||
|
if ($httpTimeoutSeconds <= 0) {
|
||||||
|
$httpTimeoutSeconds = 600.0;
|
||||||
|
}
|
||||||
|
|
||||||
$topicHint = isset($_GET['topic']) ? trim((string) $_GET['topic']) : '';
|
$topicHint = isset($_GET['topic']) ? trim((string) $_GET['topic']) : '';
|
||||||
$topicHint = mb_substr($topicHint, 0, 120);
|
$topicHint = mb_substr($topicHint, 0, 120);
|
||||||
@@ -276,6 +293,10 @@ $bannedTerms = [
|
|||||||
$bannedTermsList = implode(', ', $bannedTerms);
|
$bannedTermsList = implode(', ', $bannedTerms);
|
||||||
|
|
||||||
$factory = OpenAI::factory()->withBaseUri('http://localhost:8080/v1');
|
$factory = OpenAI::factory()->withBaseUri('http://localhost:8080/v1');
|
||||||
|
$factory = $factory->withHttpClient(new Psr18Client(HttpClient::create([
|
||||||
|
'timeout' => $httpTimeoutSeconds,
|
||||||
|
'max_duration' => $httpMaxDurationSeconds,
|
||||||
|
])));
|
||||||
if ($apiKey !== '') {
|
if ($apiKey !== '') {
|
||||||
$factory = $factory->withApiKey($apiKey);
|
$factory = $factory->withApiKey($apiKey);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user