diff --git a/crawler/crawler.php b/crawler/crawler.php
new file mode 100644
index 0000000..454a549
--- /dev/null
+++ b/crawler/crawler.php
@@ -0,0 +1,164 @@
+loadHTML($html);
+
+$xpath = new DOMXPath($dom);
+$query = '//ul[@class="pagination"]/child::*';
+$nodes = $xpath->query($query);
+
+$GLOBALS['pdfs'] = 0;
+
+// Loop through the selected nodes
+foreach ($nodes as $node) {
+ // Do something with each node, for example, echo its content
+ $url = $node->getElementsByTagName('a')[0]->getAttribute('href');
+ $number = intval($node->nodeValue);
+ if($number > $last_page){
+ $last_page = $number;
+ }
+}
+
+echo "[i] Found $last_page pages\n";
+
+//create an array with all pages
+$pages = [];
+for($i = 1; $i <= $last_page; $i++){
+ $pages[] = 'https://www.dognow.at/ergebnisse/?page=' . $i;
+}
+
+//loop through all pages
+foreach($pages as $key=> $page){
+ if(file_exists('tmp/pages/' . ($key+1) . '.html')){
+ $html = file_get_contents($page);
+ }
+ else
+ {
+ $html = file_get_contents($page);
+ file_put_contents('tmp/pages/' . ($key+1) . '.html', $html);
+ }
+ $dom = new DOMDocument;
+ $dom->loadHTML($html);
+
+ // search for all divs with class "resultboard"
+ $xpath = new DOMXPath($dom);
+ $query = '//div[@class="resultboard info-board info-board-default2"]';
+ $nodes = $xpath->query($query);
+
+ // Loop through the selected nodes
+ foreach ($nodes as $node) {
+ // Do something with each node, for example, echo its content
+ //echo $node->nodeValue . "\n";
+ $div = $node->getElementsByTagName('div')[0];
+ $id = $div->getAttribute('data-event');
+ $name = trim($div->getElementsByTagName('div')[1]->nodeValue);
+ $organizer = trim($div->getElementsByTagName('div')[2]->nodeValue);
+ $date = trim($div->getElementsByTagName('div')[3]->nodeValue);
+
+ crawlRuns($id);
+
+ echo " [E] $id - $name - $organizer - $date\n";
+ }
+
+ //exit();
+}
+
+
+var_dump($GLOBALS['pdfs']);
+
+function crawlRuns($eventid)
+{
+ if(file_exists('tmp/events/' . $eventid . '.html'))
+ $data = file_get_contents('tmp/events/' . $eventid . '.html');
+ else
+ {
+ //sleep(1);
+ $data = file_get_contents('https://www.dognow.at/ergebnisse/src/data.php?event='. $eventid .'&lauf=0');
+ file_put_contents('tmp/events/' . $eventid . '.html', $data);
+ }
+
+ //get first table using DOMDocument
+ $dom = new DOMDocument;
+ $dom->loadHTML($data);
+
+ if(strpos($data,"Einzelwertung
+
+Derzeit sind keine Ergebnisse")!==false)
+ {
+ echo " [i] Keine Einzelwertungen\n";
+ return;
+ }
+
+ // when the string "Cup-Wertung" is found
+ if(strpos($data, 'Cup-Wertung') !== false){
+ echo " [i] Found Cup-Wertung, skipping first table\n";
+ $table = $dom->getElementsByTagName('table')[1];
+ }
+ else
+ $table = $dom->getElementsByTagName('table')[0];
+
+ if(!$table) return;
+
+ foreach($table->getElementsByTagName('tr') as $row){
+ if(!$row) continue;
+ $rid = $row->getAttribute('id');
+ if($rid)
+ $rid = explode('_', $rid)[1];
+
+ $tds = $row->getElementsByTagName('td');
+ if(count($tds) == 3) //rally obedience
+ {
+ $runname = trim($tds[0]->nodeValue);
+ $lk = trim($tds[1]->nodeValue);
+ $pdf = $tds[2]->getElementsByTagName('a')[0]->getAttribute('href');
+ }
+ else if(count($tds) == 4) // agility
+ {
+ $runname = trim($tds[0]->nodeValue);
+ $lk = trim($tds[1]->nodeValue);
+ $gk = trim($tds[2]->nodeValue);
+ $pdf = $tds[3]->getElementsByTagName('a')[0]->getAttribute('href');
+
+ getResults($rid,$eventid);
+ }
+
+ if(!$runname || !$lk || !$pdf) continue;
+
+
+ echo " [R-$rid] $runname - $lk - $gk - $pdf\n";
+ }
+
+
+ //exit("Crawling $eventid");
+}
+
+function getResults($run,$event)
+{
+ $GLOBALS['pdfs']++;
+ //return;
+ if(!$run || !$event) return;
+ $url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event";
+ if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
+ file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
+ convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
+}
+
+function convertPDFtoCSV($pdf,$targetname)
+{
+ if(file_exists($targetname)) return;
+ $csv = analyze($pdf);
+ file_put_contents($targetname, $csv);
+}
+
+function analyze($pdf) {
+ $cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
+ $output = shell_exec($cmd);
+ return $output;
+}
+
+?>
\ No newline at end of file
diff --git a/crawler/parse.php b/crawler/parse.php
new file mode 100644
index 0000000..45f5c1d
--- /dev/null
+++ b/crawler/parse.php
@@ -0,0 +1,19 @@
+