more data
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s

This commit is contained in:
Chris 2023-11-26 15:48:28 +01:00
parent 949a9dfe62
commit 9b4d159615
2 changed files with 16 additions and 13 deletions

View File

@ -7,7 +7,7 @@ $GLOBALS['db'] = new SQLite3('data.db');
if(!$GLOBALS['db']) exit("Error loading database"); if(!$GLOBALS['db']) exit("Error loading database");
//389-7625.pdf.csv //389-7625.pdf.csv
// analyzeResultCSV('tmp/csv/389-7625.pdf.csv',7625,389); // analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389);
// exit(); // exit();
$html = file_get_contents('tmp/ergebnisse.html'); $html = file_get_contents('tmp/ergebnisse.html');
@ -34,20 +34,21 @@ foreach ($nodes as $node) {
echo "[i] Found $last_page pages\n"; echo "[i] Found $last_page pages\n";
//create an array with all pages //create an array with all pages
$pages = []; $pages = range(1,65);
for($i = 1; $i <= $last_page; $i++){
$pages[] = 'https://www.dognow.at/ergebnisse/?page=' . $i;
}
//loop through all pages
foreach($pages as $key=> $page){ scanPage(1,false);
if(file_exists('tmp/pages/' . ($key+1) . '.html')){
function scanPage($key,$usecache=true)
{
$page = 'https://www.dognow.at/ergebnisse/?page=' . $key;
if(file_exists('tmp/pages/' . ($key) . '.html' && $usecache===true)){
$html = file_get_contents($page); $html = file_get_contents($page);
} }
else else
{ {
$html = file_get_contents($page); $html = file_get_contents($page);
file_put_contents('tmp/pages/' . ($key+1) . '.html', $html); file_put_contents('tmp/pages/' . ($key) . '.html', $html);
} }
$dom = new DOMDocument; $dom = new DOMDocument;
$dom->loadHTML($html); $dom->loadHTML($html);
@ -72,20 +73,20 @@ foreach($pages as $key=> $page){
if($res->fetchArray() == false) if($res->fetchArray() == false)
$GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')"); $GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
crawlRuns($id); crawlRuns($id,$usecache);
echo " [E] $id - $name - $organizer - $date\n"; echo " [E] $id - $name - $organizer - $date\n";
} }
//exit();
} }
var_dump($GLOBALS['pdfs']); var_dump($GLOBALS['pdfs']);
function crawlRuns($eventid) function crawlRuns($eventid,$usecache=true)
{ {
if(file_exists('tmp/events/' . $eventid . '.html')) if(file_exists('tmp/events/' . $eventid . '.html') && $usecache===true)
$data = file_get_contents('tmp/events/' . $eventid . '.html'); $data = file_get_contents('tmp/events/' . $eventid . '.html');
else else
{ {
@ -94,6 +95,8 @@ function crawlRuns($eventid)
file_put_contents('tmp/events/' . $eventid . '.html', $data); file_put_contents('tmp/events/' . $eventid . '.html', $data);
} }
//get first table using DOMDocument //get first table using DOMDocument
$dom = new DOMDocument; $dom = new DOMDocument;
$dom->loadHTML($data); $dom->loadHTML($data);

Binary file not shown.