more data
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s

This commit is contained in:
Chris 2023-11-26 15:48:28 +01:00
parent 949a9dfe62
commit 9b4d159615
2 changed files with 16 additions and 13 deletions

View File

@ -7,7 +7,7 @@ $GLOBALS['db'] = new SQLite3('data.db');
if(!$GLOBALS['db']) exit("Error loading database");
//389-7625.pdf.csv
// analyzeResultCSV('tmp/csv/389-7625.pdf.csv',7625,389);
// analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389);
// exit();
$html = file_get_contents('tmp/ergebnisse.html');
@ -34,20 +34,21 @@ foreach ($nodes as $node) {
echo "[i] Found $last_page pages\n";
//create an array with all pages
$pages = [];
for($i = 1; $i <= $last_page; $i++){
$pages[] = 'https://www.dognow.at/ergebnisse/?page=' . $i;
}
$pages = range(1,65);
//loop through all pages
foreach($pages as $key=> $page){
if(file_exists('tmp/pages/' . ($key+1) . '.html')){
scanPage(1,false);
function scanPage($key,$usecache=true)
{
$page = 'https://www.dognow.at/ergebnisse/?page=' . $key;
if(file_exists('tmp/pages/' . ($key) . '.html' && $usecache===true)){
$html = file_get_contents($page);
}
else
{
$html = file_get_contents($page);
file_put_contents('tmp/pages/' . ($key+1) . '.html', $html);
file_put_contents('tmp/pages/' . ($key) . '.html', $html);
}
$dom = new DOMDocument;
$dom->loadHTML($html);
@ -72,20 +73,20 @@ foreach($pages as $key=> $page){
if($res->fetchArray() == false)
$GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
crawlRuns($id);
crawlRuns($id,$usecache);
echo " [E] $id - $name - $organizer - $date\n";
}
//exit();
}
var_dump($GLOBALS['pdfs']);
function crawlRuns($eventid)
function crawlRuns($eventid,$usecache=true)
{
if(file_exists('tmp/events/' . $eventid . '.html'))
if(file_exists('tmp/events/' . $eventid . '.html') && $usecache===true)
$data = file_get_contents('tmp/events/' . $eventid . '.html');
else
{
@ -94,6 +95,8 @@ function crawlRuns($eventid)
file_put_contents('tmp/events/' . $eventid . '.html', $data);
}
//get first table using DOMDocument
$dom = new DOMDocument;
$dom->loadHTML($data);

Binary file not shown.