more data
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
This commit is contained in:
parent
949a9dfe62
commit
9b4d159615
@ -7,7 +7,7 @@ $GLOBALS['db'] = new SQLite3('data.db');
|
||||
if(!$GLOBALS['db']) exit("Error loading database");
|
||||
|
||||
//389-7625.pdf.csv
|
||||
// analyzeResultCSV('tmp/csv/389-7625.pdf.csv',7625,389);
|
||||
// analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389);
|
||||
// exit();
|
||||
|
||||
$html = file_get_contents('tmp/ergebnisse.html');
|
||||
@ -34,20 +34,21 @@ foreach ($nodes as $node) {
|
||||
echo "[i] Found $last_page pages\n";
|
||||
|
||||
//create an array with all pages
|
||||
$pages = [];
|
||||
for($i = 1; $i <= $last_page; $i++){
|
||||
$pages[] = 'https://www.dognow.at/ergebnisse/?page=' . $i;
|
||||
}
|
||||
$pages = range(1,65);
|
||||
|
||||
//loop through all pages
|
||||
foreach($pages as $key=> $page){
|
||||
if(file_exists('tmp/pages/' . ($key+1) . '.html')){
|
||||
|
||||
scanPage(1,false);
|
||||
|
||||
function scanPage($key,$usecache=true)
|
||||
{
|
||||
$page = 'https://www.dognow.at/ergebnisse/?page=' . $key;
|
||||
if(file_exists('tmp/pages/' . ($key) . '.html' && $usecache===true)){
|
||||
$html = file_get_contents($page);
|
||||
}
|
||||
else
|
||||
{
|
||||
$html = file_get_contents($page);
|
||||
file_put_contents('tmp/pages/' . ($key+1) . '.html', $html);
|
||||
file_put_contents('tmp/pages/' . ($key) . '.html', $html);
|
||||
}
|
||||
$dom = new DOMDocument;
|
||||
$dom->loadHTML($html);
|
||||
@ -72,20 +73,20 @@ foreach($pages as $key=> $page){
|
||||
if($res->fetchArray() == false)
|
||||
$GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
|
||||
|
||||
crawlRuns($id);
|
||||
crawlRuns($id,$usecache);
|
||||
|
||||
echo " [E] $id - $name - $organizer - $date\n";
|
||||
}
|
||||
|
||||
//exit();
|
||||
}
|
||||
|
||||
|
||||
|
||||
var_dump($GLOBALS['pdfs']);
|
||||
|
||||
function crawlRuns($eventid)
|
||||
function crawlRuns($eventid,$usecache=true)
|
||||
{
|
||||
if(file_exists('tmp/events/' . $eventid . '.html'))
|
||||
if(file_exists('tmp/events/' . $eventid . '.html') && $usecache===true)
|
||||
$data = file_get_contents('tmp/events/' . $eventid . '.html');
|
||||
else
|
||||
{
|
||||
@ -94,6 +95,8 @@ function crawlRuns($eventid)
|
||||
file_put_contents('tmp/events/' . $eventid . '.html', $data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//get first table using DOMDocument
|
||||
$dom = new DOMDocument;
|
||||
$dom->loadHTML($data);
|
||||
|
BIN
crawler/data.db
BIN
crawler/data.db
Binary file not shown.
Reference in New Issue
Block a user