crawler update
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s

This commit is contained in:
Chris 2023-12-11 19:29:14 +00:00
parent 4fc04c2793
commit b00f652283
5 changed files with 13 additions and 3 deletions

View File

@ -1,7 +1,10 @@
<?php <?php
//$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/'); if(!file_exists('tmp/ergebnisse.html'))
//file_put_contents('tmp/ergebnisse.html', $ergebnisse); {
$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
file_put_contents('tmp/ergebnisse.html', $ergebnisse);
}
$GLOBALS['db'] = new SQLite3('data.db'); $GLOBALS['db'] = new SQLite3('data.db');
if(!$GLOBALS['db']) exit("Error loading database"); if(!$GLOBALS['db']) exit("Error loading database");
@ -177,6 +180,7 @@ function getResults($run,$event)
function analyzeResultCSV($csvfile,$run,$event) function analyzeResultCSV($csvfile,$run,$event)
{ {
if(!file_exists($csvfile)) die(" ERR: File $csvfile not found");
$csv = array_map('str_getcsv', file($csvfile)); $csv = array_map('str_getcsv', file($csvfile));
//prepare header for database //prepare header for database
@ -229,8 +233,10 @@ function convertPDFtoCSV($pdf,$targetname)
} }
function analyze($pdf) { function analyze($pdf) {
echo " [i] Analyzing $pdf\n";
$cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf"; $cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
$output = shell_exec($cmd); $output = shell_exec($cmd);
//var_dump($output);
return $output; return $output;
} }

Binary file not shown.

View File

@ -1,2 +1,2 @@
* *.html
!.gitignore !.gitignore

2
crawler/tmp/csv/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
crawler/tmp/results/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore