Merge branch 'master' of https://gitea.haschek.at/Crispi/dogstats
This commit is contained in:
commit
4de6df7175
@ -14,6 +14,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
- name: run crawler
|
- name: run crawler
|
||||||
run: |
|
run: |
|
||||||
|
sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini
|
||||||
cd crawler
|
cd crawler
|
||||||
php crawler.php
|
php crawler.php
|
||||||
git config --global user.email "gitea@haschek.at"
|
git config --global user.email "gitea@haschek.at"
|
||||||
|
@ -1,43 +1,41 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
if(!file_exists('tmp/ergebnisse.html'))
|
|
||||||
{
|
|
||||||
$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
|
||||||
file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
|
||||||
}
|
|
||||||
|
|
||||||
$GLOBALS['db'] = new SQLite3('data.db');
|
$GLOBALS['db'] = new SQLite3('data.db');
|
||||||
if(!$GLOBALS['db']) exit("Error loading database");
|
if(!$GLOBALS['db']) exit("Error loading database");
|
||||||
|
|
||||||
//389-7625.pdf.csv
|
// if(!file_exists('tmp/ergebnisse.html'))
|
||||||
// analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389);
|
// {
|
||||||
// exit();
|
// $ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
||||||
|
// file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
||||||
|
// }
|
||||||
|
|
||||||
$html = file_get_contents('tmp/ergebnisse.html');
|
// $html = file_get_contents('tmp/ergebnisse.html');
|
||||||
|
|
||||||
$dom = new DOMDocument;
|
// $dom = new DOMDocument;
|
||||||
$dom->loadHTML($html);
|
// $dom->loadHTML($html);
|
||||||
|
|
||||||
$xpath = new DOMXPath($dom);
|
// $xpath = new DOMXPath($dom);
|
||||||
$query = '//ul[@class="pagination"]/child::*';
|
// $query = '//ul[@class="pagination"]/child::*';
|
||||||
$nodes = $xpath->query($query);
|
// $nodes = $xpath->query($query);
|
||||||
|
|
||||||
$GLOBALS['pdfs'] = 0;
|
// $GLOBALS['pdfs'] = 0;
|
||||||
|
|
||||||
// Loop through the selected nodes
|
// // Loop through the selected nodes
|
||||||
foreach ($nodes as $node) {
|
// foreach ($nodes as $node) {
|
||||||
// Do something with each node, for example, echo its content
|
// // Do something with each node, for example, echo its content
|
||||||
$url = $node->getElementsByTagName('a')[0]->getAttribute('href');
|
// $url = $node->getElementsByTagName('a')[0]->getAttribute('href');
|
||||||
$number = intval($node->nodeValue);
|
// $number = intval($node->nodeValue);
|
||||||
if($number > $last_page){
|
// if($number > $last_page){
|
||||||
$last_page = $number;
|
// $last_page = $number;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
echo "[i] Found $last_page pages\n";
|
// echo "[i] Found $last_page pages\n";
|
||||||
|
|
||||||
//create an array with all pages
|
// //create an array with all pages
|
||||||
$pages = range(1,65);
|
// $pages = range(1,65);
|
||||||
|
|
||||||
// foreach($pages as $page)
|
// foreach($pages as $page)
|
||||||
// {
|
// {
|
||||||
@ -174,6 +172,11 @@ function getResults($run,$event)
|
|||||||
$url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event";
|
$url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event";
|
||||||
if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
|
if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
|
||||||
file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
|
file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
|
||||||
|
if($GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$run' AND event = '$event'")->fetchArray() != false)
|
||||||
|
{
|
||||||
|
echo " [i] Skipping run $run in event $event\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
|
convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
|
||||||
analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event);
|
analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event);
|
||||||
}
|
}
|
||||||
|
BIN
crawler/data.db
BIN
crawler/data.db
Binary file not shown.
Reference in New Issue
Block a user