diff --git a/.gitea/workflows/autoupdate.yml b/.gitea/workflows/autoupdate.yml index bea568a..535ee32 100644 --- a/.gitea/workflows/autoupdate.yml +++ b/.gitea/workflows/autoupdate.yml @@ -14,6 +14,7 @@ jobs: uses: actions/checkout@v4 - name: run crawler run: | + sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini cd crawler php crawler.php git config --global user.email "gitea@haschek.at" diff --git a/crawler/crawler.php b/crawler/crawler.php index 3a0a11a..4f9e7fe 100644 --- a/crawler/crawler.php +++ b/crawler/crawler.php @@ -1,43 +1,41 @@ loadHTML($html); +// $dom = new DOMDocument; +// $dom->loadHTML($html); -$xpath = new DOMXPath($dom); -$query = '//ul[@class="pagination"]/child::*'; -$nodes = $xpath->query($query); +// $xpath = new DOMXPath($dom); +// $query = '//ul[@class="pagination"]/child::*'; +// $nodes = $xpath->query($query); -$GLOBALS['pdfs'] = 0; +// $GLOBALS['pdfs'] = 0; -// Loop through the selected nodes -foreach ($nodes as $node) { - // Do something with each node, for example, echo its content - $url = $node->getElementsByTagName('a')[0]->getAttribute('href'); - $number = intval($node->nodeValue); - if($number > $last_page){ - $last_page = $number; - } -} +// // Loop through the selected nodes +// foreach ($nodes as $node) { +// // Do something with each node, for example, echo its content +// $url = $node->getElementsByTagName('a')[0]->getAttribute('href'); +// $number = intval($node->nodeValue); +// if($number > $last_page){ +// $last_page = $number; +// } +// } -echo "[i] Found $last_page pages\n"; +// echo "[i] Found $last_page pages\n"; -//create an array with all pages -$pages = range(1,65); +// //create an array with all pages +// $pages = range(1,65); // foreach($pages as $page) // { @@ -174,6 +172,11 @@ function getResults($run,$event) $url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event"; if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf')) file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url)); + if($GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$run' AND event = '$event'")->fetchArray() != false) + { + echo " [i] Skipping run $run in event $event\n"; + return; + } convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv'); analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event); } diff --git a/crawler/data.db b/crawler/data.db index c45634b..90880ab 100644 Binary files a/crawler/data.db and b/crawler/data.db differ