Merge branch 'master' of https://gitea.haschek.at/Crispi/dogstats
This commit is contained in:
commit
4de6df7175
@ -14,6 +14,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
- name: run crawler
|
||||
run: |
|
||||
sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini
|
||||
cd crawler
|
||||
php crawler.php
|
||||
git config --global user.email "gitea@haschek.at"
|
||||
|
@ -1,43 +1,41 @@
|
||||
<?php
|
||||
|
||||
if(!file_exists('tmp/ergebnisse.html'))
|
||||
{
|
||||
$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
||||
file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
||||
}
|
||||
|
||||
|
||||
$GLOBALS['db'] = new SQLite3('data.db');
|
||||
if(!$GLOBALS['db']) exit("Error loading database");
|
||||
|
||||
//389-7625.pdf.csv
|
||||
// analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389);
|
||||
// exit();
|
||||
// if(!file_exists('tmp/ergebnisse.html'))
|
||||
// {
|
||||
// $ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
||||
// file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
||||
// }
|
||||
|
||||
$html = file_get_contents('tmp/ergebnisse.html');
|
||||
// $html = file_get_contents('tmp/ergebnisse.html');
|
||||
|
||||
$dom = new DOMDocument;
|
||||
$dom->loadHTML($html);
|
||||
// $dom = new DOMDocument;
|
||||
// $dom->loadHTML($html);
|
||||
|
||||
$xpath = new DOMXPath($dom);
|
||||
$query = '//ul[@class="pagination"]/child::*';
|
||||
$nodes = $xpath->query($query);
|
||||
// $xpath = new DOMXPath($dom);
|
||||
// $query = '//ul[@class="pagination"]/child::*';
|
||||
// $nodes = $xpath->query($query);
|
||||
|
||||
$GLOBALS['pdfs'] = 0;
|
||||
// $GLOBALS['pdfs'] = 0;
|
||||
|
||||
// Loop through the selected nodes
|
||||
foreach ($nodes as $node) {
|
||||
// Do something with each node, for example, echo its content
|
||||
$url = $node->getElementsByTagName('a')[0]->getAttribute('href');
|
||||
$number = intval($node->nodeValue);
|
||||
if($number > $last_page){
|
||||
$last_page = $number;
|
||||
}
|
||||
}
|
||||
// // Loop through the selected nodes
|
||||
// foreach ($nodes as $node) {
|
||||
// // Do something with each node, for example, echo its content
|
||||
// $url = $node->getElementsByTagName('a')[0]->getAttribute('href');
|
||||
// $number = intval($node->nodeValue);
|
||||
// if($number > $last_page){
|
||||
// $last_page = $number;
|
||||
// }
|
||||
// }
|
||||
|
||||
echo "[i] Found $last_page pages\n";
|
||||
// echo "[i] Found $last_page pages\n";
|
||||
|
||||
//create an array with all pages
|
||||
$pages = range(1,65);
|
||||
// //create an array with all pages
|
||||
// $pages = range(1,65);
|
||||
|
||||
// foreach($pages as $page)
|
||||
// {
|
||||
@ -174,6 +172,11 @@ function getResults($run,$event)
|
||||
$url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event";
|
||||
if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
|
||||
file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
|
||||
if($GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$run' AND event = '$event'")->fetchArray() != false)
|
||||
{
|
||||
echo " [i] Skipping run $run in event $event\n";
|
||||
return;
|
||||
}
|
||||
convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
|
||||
analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event);
|
||||
}
|
||||
|
BIN
crawler/data.db
BIN
crawler/data.db
Binary file not shown.
Reference in New Issue
Block a user