diff --git a/crawler/.gitattributes b/crawler/.gitattributes deleted file mode 100644 index df398aa..0000000 --- a/crawler/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -*.pdf filter=lfs diff=lfs merge=lfs -text -*.csv filter=lfs diff=lfs merge=lfs -text diff --git a/crawler/base.sql b/crawler/base.sql new file mode 100644 index 0000000..9e29999 --- /dev/null +++ b/crawler/base.sql @@ -0,0 +1,39 @@ +BEGIN TRANSACTION; +CREATE TABLE IF NOT EXISTS "events" ( + "ID" INTEGER UNIQUE, + "name" TEXT, + "date" TEXT, + "organizer" TEXT, + PRIMARY KEY("ID") +); +CREATE TABLE IF NOT EXISTS "runs" ( + "id" INTEGER UNIQUE, + "event" INTEGER, + "name" TEXT, + "lk" TEXT, + "gk" TEXT, + FOREIGN KEY("event") REFERENCES "events"("ID"), + PRIMARY KEY("id") +); +CREATE TABLE IF NOT EXISTS "results" ( + "id" INTEGER, + "event" INTEGER, + "run" INTEGER, + "rang" TEXT, + "stnr" INTEGER, + "teilnehmer" TEXT, + "hund" TEXT, + "verein" TEXT, + "f" INTEGER, + "vw" INTEGER, + "zf" INTEGER, + "zeit" REAL, + "gf" REAL, + "msek" REAL, + "bew" TEXT, + "punkte" TEXT, + PRIMARY KEY("id" AUTOINCREMENT), + FOREIGN KEY("run") REFERENCES "runs"("id"), + FOREIGN KEY("event") REFERENCES "events"("ID") +); +COMMIT; diff --git a/crawler/crawler.php b/crawler/crawler.php index 454a549..fba4281 100644 --- a/crawler/crawler.php +++ b/crawler/crawler.php @@ -3,6 +3,11 @@ //$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/'); //file_put_contents('tmp/ergebnisse.html', $ergebnisse); +$GLOBALS['db'] = new SQLite3('data.db'); +if(!$GLOBALS['db']) exit("Error loading database"); + +var_dump($GLOBALS['db']); + $html = file_get_contents('tmp/ergebnisse.html'); $dom = new DOMDocument; @@ -52,13 +57,18 @@ foreach($pages as $key=> $page){ // Loop through the selected nodes foreach ($nodes as $node) { - // Do something with each node, for example, echo its content - //echo $node->nodeValue . "\n"; + // CUPs $div = $node->getElementsByTagName('div')[0]; $id = $div->getAttribute('data-event'); $name = trim($div->getElementsByTagName('div')[1]->nodeValue); $organizer = trim($div->getElementsByTagName('div')[2]->nodeValue); $date = trim($div->getElementsByTagName('div')[3]->nodeValue); + $db_date = date(DATE_RFC3339, strtotime($date)); + + //if not exists, add to db + $res = $GLOBALS['db']->query("SELECT * FROM events WHERE id = '$id'"); + if($res->fetchArray() == false) + $GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')"); crawlRuns($id); @@ -124,6 +134,12 @@ Derzeit sind keine Ergebnisse")!==false) $gk = trim($tds[2]->nodeValue); $pdf = $tds[3]->getElementsByTagName('a')[0]->getAttribute('href'); + //add run to db if not exists + $res = $GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$rid'"); + if($res->fetchArray() == false) + $GLOBALS['db']->exec("INSERT INTO runs (id, name, event, lk, gk) VALUES ('$rid', '$runname', '$eventid', '$lk', '$gk')"); + + getResults($rid,$eventid); } @@ -146,6 +162,50 @@ function getResults($run,$event) if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf')) file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url)); convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv'); + analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event); +} + +function analyzeResultCSV($csv,$run,$event) +{ + $csv = array_map('str_getcsv', file($csv)); + + //prepare header for database + foreach($csv[0] as $key=>$value){ + $csv[0][$key] = preg_replace('/[^A-Za-z0-9]/', '', strtolower($value)); + } + + array_walk($csv, function(&$a) use ($csv) { + $a = array_combine($csv[0], $a); + }); + array_shift($csv); # remove column header + + foreach($csv as $row) + { + $stnr = $row['stnr']; + $teilnehmer = $row['teilnehmer']; + $hund = $row['hund']; + $verein = $row['verein']; + $f = $row['f']; + $vw = $row['vw']; + $zf = $row['zf']; + $zeit = $row['zeit']; + $gf = $row['gf']; + $msek = $row['msek']; + $bew = $row['bew']; + + //add result to db if not exists + try + { + $res = $GLOBALS['db']->query("SELECT * FROM results WHERE stnr = '$stnr' AND run = '$run' AND event = '$event'"); + if($res->fetchArray() == false) + $GLOBALS['db']->exec("INSERT INTO results (stnr, run, event, teilnehmer, hund, verein, f, vw, zf, zeit, gf, msek, bew) VALUES ('$stnr', '$run', '$event', '$teilnehmer', '$hund', '$verein', '$f', '$vw', '$zf', '$zeit', '$gf', '$msek', '$bew')"); + } + catch(Exception $ex) { + //die( $ex->getMessage() ); + exit($GLOBALS['db']->lastErrorMsg()); + } + + } } function convertPDFtoCSV($pdf,$targetname) diff --git a/crawler/data.db b/crawler/data.db new file mode 100644 index 0000000..ad68f01 Binary files /dev/null and b/crawler/data.db differ diff --git a/web/pages/smart/controller.php b/web/pages/smart/controller.php new file mode 100644 index 0000000..518cbe6 --- /dev/null +++ b/web/pages/smart/controller.php @@ -0,0 +1,33 @@ +menu_text = 'Smart'; + $this->menu_image = 'fas fa-robot'; + $this->menu_priority = 1; + } + + function index() + { + $this->set('template', "smart.html.php"); + } + + function search() + { + $db = new SQLite3(ROOT.DS.'../crawler/data.db'); + $q = $_REQUEST['q']; + $res = $db->query("SELECT * FROM results WHERE teilnehmer LIKE '$q'"); + $results = []; + while($row = $res->fetchArray()) + { + $results[] = $row; + } + + $this->set('results', $results); + $this->set('template', 'search.html.php'); + + } + +} \ No newline at end of file diff --git a/web/pages/smart/search.html.php b/web/pages/smart/search.html.php new file mode 100644 index 0000000..0f6fc68 --- /dev/null +++ b/web/pages/smart/search.html.php @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
eventrunrangstnrteilnehmerhundvereinfvwzfzeitgfmsekbewpunkte
\ No newline at end of file diff --git a/web/pages/smart/smart.html.php b/web/pages/smart/smart.html.php new file mode 100644 index 0000000..d1edcaa --- /dev/null +++ b/web/pages/smart/smart.html.php @@ -0,0 +1,11 @@ +

Dein Name wie der auf Teilnahmelisten zu finden ist

+ +
+ + \ No newline at end of file