This commit is contained in:
piapassecker 2023-12-25 16:39:12 +01:00
commit 4de6df7175
3 changed files with 31 additions and 27 deletions

View File

@ -14,6 +14,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: run crawler - name: run crawler
run: | run: |
sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini
cd crawler cd crawler
php crawler.php php crawler.php
git config --global user.email "gitea@haschek.at" git config --global user.email "gitea@haschek.at"

View File

@ -1,43 +1,41 @@
<?php <?php
if(!file_exists('tmp/ergebnisse.html'))
{
$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
file_put_contents('tmp/ergebnisse.html', $ergebnisse);
}
$GLOBALS['db'] = new SQLite3('data.db'); $GLOBALS['db'] = new SQLite3('data.db');
if(!$GLOBALS['db']) exit("Error loading database"); if(!$GLOBALS['db']) exit("Error loading database");
//389-7625.pdf.csv // if(!file_exists('tmp/ergebnisse.html'))
// analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389); // {
// exit(); // $ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
// file_put_contents('tmp/ergebnisse.html', $ergebnisse);
// }
$html = file_get_contents('tmp/ergebnisse.html'); // $html = file_get_contents('tmp/ergebnisse.html');
$dom = new DOMDocument; // $dom = new DOMDocument;
$dom->loadHTML($html); // $dom->loadHTML($html);
$xpath = new DOMXPath($dom); // $xpath = new DOMXPath($dom);
$query = '//ul[@class="pagination"]/child::*'; // $query = '//ul[@class="pagination"]/child::*';
$nodes = $xpath->query($query); // $nodes = $xpath->query($query);
$GLOBALS['pdfs'] = 0; // $GLOBALS['pdfs'] = 0;
// Loop through the selected nodes // // Loop through the selected nodes
foreach ($nodes as $node) { // foreach ($nodes as $node) {
// Do something with each node, for example, echo its content // // Do something with each node, for example, echo its content
$url = $node->getElementsByTagName('a')[0]->getAttribute('href'); // $url = $node->getElementsByTagName('a')[0]->getAttribute('href');
$number = intval($node->nodeValue); // $number = intval($node->nodeValue);
if($number > $last_page){ // if($number > $last_page){
$last_page = $number; // $last_page = $number;
} // }
} // }
echo "[i] Found $last_page pages\n"; // echo "[i] Found $last_page pages\n";
//create an array with all pages // //create an array with all pages
$pages = range(1,65); // $pages = range(1,65);
// foreach($pages as $page) // foreach($pages as $page)
// { // {
@ -174,6 +172,11 @@ function getResults($run,$event)
$url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event"; $url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event";
if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf')) if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url)); file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
if($GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$run' AND event = '$event'")->fetchArray() != false)
{
echo " [i] Skipping run $run in event $event\n";
return;
}
convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv'); convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event); analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event);
} }

Binary file not shown.