All checks were successful
		
		
	
	Build and push / Pulling repo on server (push) Successful in 2s
				
		
			
				
	
	
		
			246 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			246 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php 
 | 
						|
 | 
						|
 | 
						|
 | 
						|
$GLOBALS['db'] = new SQLite3('data.db');
 | 
						|
if(!$GLOBALS['db']) exit("Error loading database");
 | 
						|
 | 
						|
// if(!file_exists('tmp/ergebnisse.html'))
 | 
						|
// {
 | 
						|
//     $ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
 | 
						|
//     file_put_contents('tmp/ergebnisse.html', $ergebnisse);
 | 
						|
// }
 | 
						|
 | 
						|
// $html = file_get_contents('tmp/ergebnisse.html');
 | 
						|
 | 
						|
// $dom = new DOMDocument;
 | 
						|
// $dom->loadHTML($html);
 | 
						|
 | 
						|
// $xpath = new DOMXPath($dom);
 | 
						|
// $query = '//ul[@class="pagination"]/child::*';
 | 
						|
// $nodes = $xpath->query($query);
 | 
						|
 | 
						|
// $GLOBALS['pdfs'] = 0;
 | 
						|
 | 
						|
// // Loop through the selected nodes
 | 
						|
// foreach ($nodes as $node) {
 | 
						|
//     // Do something with each node, for example, echo its content
 | 
						|
//     $url = $node->getElementsByTagName('a')[0]->getAttribute('href');
 | 
						|
//     $number = intval($node->nodeValue);
 | 
						|
//     if($number > $last_page){
 | 
						|
//         $last_page = $number;
 | 
						|
//     }
 | 
						|
// }
 | 
						|
 | 
						|
// echo "[i] Found $last_page pages\n";
 | 
						|
 | 
						|
// //create an array with all pages
 | 
						|
// $pages = range(1,65);
 | 
						|
 | 
						|
// foreach($pages as $page)
 | 
						|
// {
 | 
						|
//     echo "[i] Crawling page $page\n";
 | 
						|
//     scanPage($page);
 | 
						|
// }
 | 
						|
 | 
						|
scanPage(1,false);
 | 
						|
 | 
						|
function scanPage($key,$usecache=true)
 | 
						|
{
 | 
						|
    $page = 'https://www.dognow.at/ergebnisse/?page=' . $key;
 | 
						|
    if(file_exists('tmp/pages/' . ($key) . '.html' && $usecache===true)){
 | 
						|
        $html = file_get_contents($page);
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        $html = file_get_contents($page);
 | 
						|
        file_put_contents('tmp/pages/' . ($key) . '.html', $html);
 | 
						|
    }
 | 
						|
    $dom = new DOMDocument;
 | 
						|
    $dom->loadHTML($html);
 | 
						|
 | 
						|
    // search for all divs with class "resultboard"
 | 
						|
    $xpath = new DOMXPath($dom);
 | 
						|
    $query = '//div[@class="resultboard info-board info-board-default2"]';
 | 
						|
    $nodes = $xpath->query($query);
 | 
						|
 | 
						|
    // Loop through the selected nodes
 | 
						|
    foreach ($nodes as $node) {
 | 
						|
        // CUPs
 | 
						|
        $div = $node->getElementsByTagName('div')[0];
 | 
						|
        $id = $div->getAttribute('data-event');
 | 
						|
        $name = trim($div->getElementsByTagName('div')[1]->nodeValue);
 | 
						|
        $organizer = trim($div->getElementsByTagName('div')[2]->nodeValue);
 | 
						|
        $date = trim($div->getElementsByTagName('div')[3]->nodeValue);
 | 
						|
        $db_date = date(DATE_RFC3339, strtotime($date));
 | 
						|
 | 
						|
        //if not exists, add to db
 | 
						|
        $res = $GLOBALS['db']->query("SELECT * FROM events WHERE id = '$id'");
 | 
						|
        if($res->fetchArray() == false)
 | 
						|
            $GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
 | 
						|
 | 
						|
        crawlRuns($id,$usecache);
 | 
						|
 | 
						|
        echo "  [E] $id - $name - $organizer - $date\n";
 | 
						|
    }
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
var_dump($GLOBALS['pdfs']);
 | 
						|
 | 
						|
function crawlRuns($eventid,$usecache=true)
 | 
						|
{
 | 
						|
    if(file_exists('tmp/events/' . $eventid . '.html') && $usecache===true)
 | 
						|
        $data = file_get_contents('tmp/events/' . $eventid . '.html');
 | 
						|
    else
 | 
						|
    {
 | 
						|
        //sleep(1);
 | 
						|
        $data = file_get_contents('https://www.dognow.at/ergebnisse/src/data.php?event='. $eventid .'&lauf=0');
 | 
						|
        file_put_contents('tmp/events/' . $eventid . '.html', $data);
 | 
						|
    }
 | 
						|
 | 
						|
    
 | 
						|
 | 
						|
    //get first table using DOMDocument
 | 
						|
    $dom = new DOMDocument;
 | 
						|
    $dom->loadHTML($data);
 | 
						|
 | 
						|
    if(strpos($data,"<b>Einzelwertung</b><br>
 | 
						|
 | 
						|
Derzeit sind keine Ergebnisse")!==false)
 | 
						|
    {
 | 
						|
        echo "  [i] Keine Einzelwertungen\n";
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    // when the string "Cup-Wertung" is found
 | 
						|
    if(strpos($data, 'Cup-Wertung') !== false){
 | 
						|
        echo "  [i] Found Cup-Wertung, skipping first table\n";
 | 
						|
        $table = $dom->getElementsByTagName('table')[1];
 | 
						|
    }
 | 
						|
    else
 | 
						|
        $table = $dom->getElementsByTagName('table')[0];
 | 
						|
 | 
						|
    if(!$table) return;
 | 
						|
    
 | 
						|
    foreach($table->getElementsByTagName('tr') as $row){
 | 
						|
        if(!$row) continue;
 | 
						|
        $rid = $row->getAttribute('id');
 | 
						|
        if($rid)
 | 
						|
            $rid = explode('_', $rid)[1];
 | 
						|
        
 | 
						|
        $tds = $row->getElementsByTagName('td');
 | 
						|
        if(count($tds) == 3) //rally obedience
 | 
						|
        {
 | 
						|
            $runname = trim($tds[0]->nodeValue);
 | 
						|
            $lk = trim($tds[1]->nodeValue);
 | 
						|
            $pdf = $tds[2]->getElementsByTagName('a')[0]->getAttribute('href');
 | 
						|
        }
 | 
						|
        else if(count($tds) == 4) // agility
 | 
						|
        {
 | 
						|
            $runname = trim($tds[0]->nodeValue);
 | 
						|
            $lk = trim($tds[1]->nodeValue);
 | 
						|
            $gk = trim($tds[2]->nodeValue);
 | 
						|
            $pdf = $tds[3]->getElementsByTagName('a')[0]->getAttribute('href');
 | 
						|
 | 
						|
            //add run to db if not exists
 | 
						|
            $res = $GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$rid'");
 | 
						|
            if($res->fetchArray() == false)
 | 
						|
                $GLOBALS['db']->exec("INSERT INTO runs (id, name, event, lk, gk) VALUES ('$rid', '$runname', '$eventid', '$lk', '$gk')");
 | 
						|
 | 
						|
 | 
						|
            getResults($rid,$eventid);
 | 
						|
        }
 | 
						|
 | 
						|
        if(!$runname || !$lk || !$pdf) continue;
 | 
						|
 | 
						|
 | 
						|
        echo "    [R-$rid] $runname - $lk - $gk - $pdf\n";
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    //exit("Crawling $eventid");
 | 
						|
}
 | 
						|
 | 
						|
function getResults($run,$event)
 | 
						|
{
 | 
						|
    $GLOBALS['pdfs']++;
 | 
						|
    //return;
 | 
						|
    if(!$run || !$event) return;
 | 
						|
    $url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event";
 | 
						|
    if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
 | 
						|
        file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
 | 
						|
    /*if($GLOBALS['db']->query("SELECT * FROM runs  WHERE id = '$run' AND event = '$event'")->fetchArray() != false)
 | 
						|
    {
 | 
						|
        echo "    [i] Skipping run $run in event $event\n";
 | 
						|
        return;
 | 
						|
    }*/
 | 
						|
    convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
 | 
						|
    analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event);
 | 
						|
}
 | 
						|
 | 
						|
function analyzeResultCSV($csvfile,$run,$event)
 | 
						|
{
 | 
						|
    if(!file_exists($csvfile)) die(" ERR: File $csvfile not found");
 | 
						|
    $csv = array_map('str_getcsv', file($csvfile));
 | 
						|
 | 
						|
    //prepare header for database
 | 
						|
    foreach($csv[0] as $key=>$value){
 | 
						|
        $csv[0][$key] = preg_replace('/[^A-Za-z0-9]/', '', strtolower($value));
 | 
						|
    }
 | 
						|
    
 | 
						|
    array_walk($csv, function(&$a) use ($csv) {
 | 
						|
      $a = array_combine($csv[0], $a);
 | 
						|
    });
 | 
						|
    array_shift($csv); # remove column header
 | 
						|
 | 
						|
    foreach($csv as $row)
 | 
						|
    {
 | 
						|
        $stnr =  $row['stnr'];
 | 
						|
        $teilnehmer =  $row['teilnehmer'];
 | 
						|
        $hund =  $row['hund'];
 | 
						|
        $rang =  $row['rang'];
 | 
						|
        $verein =  $row['verein'];
 | 
						|
        $f =  $row['f'];
 | 
						|
        $vw =  $row['vw'];
 | 
						|
        $zf =  $row['zf'];
 | 
						|
        $zeit =  $row['zeit'];
 | 
						|
        $gf =  $row['gf'];
 | 
						|
        $msek =  $row['msek'];
 | 
						|
        $punkte =  $row['punkte'];
 | 
						|
        $bew =  $row['bew'];
 | 
						|
 | 
						|
        //add result to db if not exists
 | 
						|
        try
 | 
						|
        {
 | 
						|
            $res = $GLOBALS['db']->query("SELECT * FROM results WHERE stnr = '$stnr' AND run = '$run' AND event = '$event'");
 | 
						|
            if($res->fetchArray() == false)
 | 
						|
                $GLOBALS['db']->exec("INSERT INTO results (stnr, rang, run, event, teilnehmer, hund, verein, f, vw, zf, zeit, gf, msek, bew, punkte) VALUES ('$stnr', '$rang', '$run', '$event', '$teilnehmer', '$hund', '$verein', '$f', '$vw', '$zf', '$zeit', '$gf', '$msek', '$bew', '$punkte')");
 | 
						|
            //else echo "    [i] Skipping $teilnehmer in run $run in event $event\n";
 | 
						|
        }
 | 
						|
        catch(Exception $ex) {
 | 
						|
            //die( $ex->getMessage() );
 | 
						|
            exit($GLOBALS['db']->lastErrorMsg());
 | 
						|
        }
 | 
						|
            
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
function convertPDFtoCSV($pdf,$targetname)
 | 
						|
{
 | 
						|
    if(file_exists($targetname)) return;
 | 
						|
    $csv = analyze($pdf);
 | 
						|
    file_put_contents($targetname, $csv);
 | 
						|
}
 | 
						|
 | 
						|
function analyze($pdf) {
 | 
						|
    echo "    [i] Analyzing $pdf\n";
 | 
						|
    $cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
 | 
						|
    $output = shell_exec($cmd);
 | 
						|
    //var_dump($output);
 | 
						|
    return $output;
 | 
						|
}
 | 
						|
 | 
						|
?>
 |