more data
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
This commit is contained in:
parent
949a9dfe62
commit
9b4d159615
@ -7,7 +7,7 @@ $GLOBALS['db'] = new SQLite3('data.db');
|
|||||||
if(!$GLOBALS['db']) exit("Error loading database");
|
if(!$GLOBALS['db']) exit("Error loading database");
|
||||||
|
|
||||||
//389-7625.pdf.csv
|
//389-7625.pdf.csv
|
||||||
// analyzeResultCSV('tmp/csv/389-7625.pdf.csv',7625,389);
|
// analyzeResultCSV('tmp/csv/389-7637.pdf.csv',7637,389);
|
||||||
// exit();
|
// exit();
|
||||||
|
|
||||||
$html = file_get_contents('tmp/ergebnisse.html');
|
$html = file_get_contents('tmp/ergebnisse.html');
|
||||||
@ -34,20 +34,21 @@ foreach ($nodes as $node) {
|
|||||||
echo "[i] Found $last_page pages\n";
|
echo "[i] Found $last_page pages\n";
|
||||||
|
|
||||||
//create an array with all pages
|
//create an array with all pages
|
||||||
$pages = [];
|
$pages = range(1,65);
|
||||||
for($i = 1; $i <= $last_page; $i++){
|
|
||||||
$pages[] = 'https://www.dognow.at/ergebnisse/?page=' . $i;
|
|
||||||
}
|
|
||||||
|
|
||||||
//loop through all pages
|
|
||||||
foreach($pages as $key=> $page){
|
scanPage(1,false);
|
||||||
if(file_exists('tmp/pages/' . ($key+1) . '.html')){
|
|
||||||
|
function scanPage($key,$usecache=true)
|
||||||
|
{
|
||||||
|
$page = 'https://www.dognow.at/ergebnisse/?page=' . $key;
|
||||||
|
if(file_exists('tmp/pages/' . ($key) . '.html' && $usecache===true)){
|
||||||
$html = file_get_contents($page);
|
$html = file_get_contents($page);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
$html = file_get_contents($page);
|
$html = file_get_contents($page);
|
||||||
file_put_contents('tmp/pages/' . ($key+1) . '.html', $html);
|
file_put_contents('tmp/pages/' . ($key) . '.html', $html);
|
||||||
}
|
}
|
||||||
$dom = new DOMDocument;
|
$dom = new DOMDocument;
|
||||||
$dom->loadHTML($html);
|
$dom->loadHTML($html);
|
||||||
@ -72,20 +73,20 @@ foreach($pages as $key=> $page){
|
|||||||
if($res->fetchArray() == false)
|
if($res->fetchArray() == false)
|
||||||
$GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
|
$GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
|
||||||
|
|
||||||
crawlRuns($id);
|
crawlRuns($id,$usecache);
|
||||||
|
|
||||||
echo " [E] $id - $name - $organizer - $date\n";
|
echo " [E] $id - $name - $organizer - $date\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
//exit();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
var_dump($GLOBALS['pdfs']);
|
var_dump($GLOBALS['pdfs']);
|
||||||
|
|
||||||
function crawlRuns($eventid)
|
function crawlRuns($eventid,$usecache=true)
|
||||||
{
|
{
|
||||||
if(file_exists('tmp/events/' . $eventid . '.html'))
|
if(file_exists('tmp/events/' . $eventid . '.html') && $usecache===true)
|
||||||
$data = file_get_contents('tmp/events/' . $eventid . '.html');
|
$data = file_get_contents('tmp/events/' . $eventid . '.html');
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -94,6 +95,8 @@ function crawlRuns($eventid)
|
|||||||
file_put_contents('tmp/events/' . $eventid . '.html', $data);
|
file_put_contents('tmp/events/' . $eventid . '.html', $data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//get first table using DOMDocument
|
//get first table using DOMDocument
|
||||||
$dom = new DOMDocument;
|
$dom = new DOMDocument;
|
||||||
$dom->loadHTML($data);
|
$dom->loadHTML($data);
|
||||||
|
BIN
crawler/data.db
BIN
crawler/data.db
Binary file not shown.
Reference in New Issue
Block a user