smart test
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
This commit is contained in:
2
crawler/.gitattributes
vendored
2
crawler/.gitattributes
vendored
@ -1,2 +0,0 @@
|
||||
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
*.csv filter=lfs diff=lfs merge=lfs -text
|
39
crawler/base.sql
Normal file
39
crawler/base.sql
Normal file
@ -0,0 +1,39 @@
|
||||
BEGIN TRANSACTION;
|
||||
CREATE TABLE IF NOT EXISTS "events" (
|
||||
"ID" INTEGER UNIQUE,
|
||||
"name" TEXT,
|
||||
"date" TEXT,
|
||||
"organizer" TEXT,
|
||||
PRIMARY KEY("ID")
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS "runs" (
|
||||
"id" INTEGER UNIQUE,
|
||||
"event" INTEGER,
|
||||
"name" TEXT,
|
||||
"lk" TEXT,
|
||||
"gk" TEXT,
|
||||
FOREIGN KEY("event") REFERENCES "events"("ID"),
|
||||
PRIMARY KEY("id")
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS "results" (
|
||||
"id" INTEGER,
|
||||
"event" INTEGER,
|
||||
"run" INTEGER,
|
||||
"rang" TEXT,
|
||||
"stnr" INTEGER,
|
||||
"teilnehmer" TEXT,
|
||||
"hund" TEXT,
|
||||
"verein" TEXT,
|
||||
"f" INTEGER,
|
||||
"vw" INTEGER,
|
||||
"zf" INTEGER,
|
||||
"zeit" REAL,
|
||||
"gf" REAL,
|
||||
"msek" REAL,
|
||||
"bew" TEXT,
|
||||
"punkte" TEXT,
|
||||
PRIMARY KEY("id" AUTOINCREMENT),
|
||||
FOREIGN KEY("run") REFERENCES "runs"("id"),
|
||||
FOREIGN KEY("event") REFERENCES "events"("ID")
|
||||
);
|
||||
COMMIT;
|
@ -3,6 +3,11 @@
|
||||
//$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
||||
//file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
||||
|
||||
$GLOBALS['db'] = new SQLite3('data.db');
|
||||
if(!$GLOBALS['db']) exit("Error loading database");
|
||||
|
||||
var_dump($GLOBALS['db']);
|
||||
|
||||
$html = file_get_contents('tmp/ergebnisse.html');
|
||||
|
||||
$dom = new DOMDocument;
|
||||
@ -52,13 +57,18 @@ foreach($pages as $key=> $page){
|
||||
|
||||
// Loop through the selected nodes
|
||||
foreach ($nodes as $node) {
|
||||
// Do something with each node, for example, echo its content
|
||||
//echo $node->nodeValue . "\n";
|
||||
// CUPs
|
||||
$div = $node->getElementsByTagName('div')[0];
|
||||
$id = $div->getAttribute('data-event');
|
||||
$name = trim($div->getElementsByTagName('div')[1]->nodeValue);
|
||||
$organizer = trim($div->getElementsByTagName('div')[2]->nodeValue);
|
||||
$date = trim($div->getElementsByTagName('div')[3]->nodeValue);
|
||||
$db_date = date(DATE_RFC3339, strtotime($date));
|
||||
|
||||
//if not exists, add to db
|
||||
$res = $GLOBALS['db']->query("SELECT * FROM events WHERE id = '$id'");
|
||||
if($res->fetchArray() == false)
|
||||
$GLOBALS['db']->exec("INSERT INTO events (id, name, organizer, date) VALUES ('$id', '$name', '$organizer', '$db_date')");
|
||||
|
||||
crawlRuns($id);
|
||||
|
||||
@ -124,6 +134,12 @@ Derzeit sind keine Ergebnisse")!==false)
|
||||
$gk = trim($tds[2]->nodeValue);
|
||||
$pdf = $tds[3]->getElementsByTagName('a')[0]->getAttribute('href');
|
||||
|
||||
//add run to db if not exists
|
||||
$res = $GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$rid'");
|
||||
if($res->fetchArray() == false)
|
||||
$GLOBALS['db']->exec("INSERT INTO runs (id, name, event, lk, gk) VALUES ('$rid', '$runname', '$eventid', '$lk', '$gk')");
|
||||
|
||||
|
||||
getResults($rid,$eventid);
|
||||
}
|
||||
|
||||
@ -146,6 +162,50 @@ function getResults($run,$event)
|
||||
if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf'))
|
||||
file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url));
|
||||
convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv');
|
||||
analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event);
|
||||
}
|
||||
|
||||
function analyzeResultCSV($csv,$run,$event)
|
||||
{
|
||||
$csv = array_map('str_getcsv', file($csv));
|
||||
|
||||
//prepare header for database
|
||||
foreach($csv[0] as $key=>$value){
|
||||
$csv[0][$key] = preg_replace('/[^A-Za-z0-9]/', '', strtolower($value));
|
||||
}
|
||||
|
||||
array_walk($csv, function(&$a) use ($csv) {
|
||||
$a = array_combine($csv[0], $a);
|
||||
});
|
||||
array_shift($csv); # remove column header
|
||||
|
||||
foreach($csv as $row)
|
||||
{
|
||||
$stnr = $row['stnr'];
|
||||
$teilnehmer = $row['teilnehmer'];
|
||||
$hund = $row['hund'];
|
||||
$verein = $row['verein'];
|
||||
$f = $row['f'];
|
||||
$vw = $row['vw'];
|
||||
$zf = $row['zf'];
|
||||
$zeit = $row['zeit'];
|
||||
$gf = $row['gf'];
|
||||
$msek = $row['msek'];
|
||||
$bew = $row['bew'];
|
||||
|
||||
//add result to db if not exists
|
||||
try
|
||||
{
|
||||
$res = $GLOBALS['db']->query("SELECT * FROM results WHERE stnr = '$stnr' AND run = '$run' AND event = '$event'");
|
||||
if($res->fetchArray() == false)
|
||||
$GLOBALS['db']->exec("INSERT INTO results (stnr, run, event, teilnehmer, hund, verein, f, vw, zf, zeit, gf, msek, bew) VALUES ('$stnr', '$run', '$event', '$teilnehmer', '$hund', '$verein', '$f', '$vw', '$zf', '$zeit', '$gf', '$msek', '$bew')");
|
||||
}
|
||||
catch(Exception $ex) {
|
||||
//die( $ex->getMessage() );
|
||||
exit($GLOBALS['db']->lastErrorMsg());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
function convertPDFtoCSV($pdf,$targetname)
|
||||
|
BIN
crawler/data.db
Normal file
BIN
crawler/data.db
Normal file
Binary file not shown.
Reference in New Issue
Block a user