crawler update
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
All checks were successful
Build and push / Pulling repo on server (push) Successful in 3s
This commit is contained in:
parent
4fc04c2793
commit
b00f652283
@ -1,7 +1,10 @@
|
||||
<?php
|
||||
|
||||
//$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
||||
//file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
||||
if(!file_exists('tmp/ergebnisse.html'))
|
||||
{
|
||||
$ergebnisse = file_get_contents('https://www.dognow.at/ergebnisse/');
|
||||
file_put_contents('tmp/ergebnisse.html', $ergebnisse);
|
||||
}
|
||||
|
||||
$GLOBALS['db'] = new SQLite3('data.db');
|
||||
if(!$GLOBALS['db']) exit("Error loading database");
|
||||
@ -177,6 +180,7 @@ function getResults($run,$event)
|
||||
|
||||
function analyzeResultCSV($csvfile,$run,$event)
|
||||
{
|
||||
if(!file_exists($csvfile)) die(" ERR: File $csvfile not found");
|
||||
$csv = array_map('str_getcsv', file($csvfile));
|
||||
|
||||
//prepare header for database
|
||||
@ -229,8 +233,10 @@ function convertPDFtoCSV($pdf,$targetname)
|
||||
}
|
||||
|
||||
function analyze($pdf) {
|
||||
echo " [i] Analyzing $pdf\n";
|
||||
$cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
|
||||
$output = shell_exec($cmd);
|
||||
//var_dump($output);
|
||||
return $output;
|
||||
}
|
||||
|
||||
|
BIN
crawler/data.db
BIN
crawler/data.db
Binary file not shown.
2
crawler/tmp/.gitignore
vendored
2
crawler/tmp/.gitignore
vendored
@ -1,2 +1,2 @@
|
||||
*
|
||||
*.html
|
||||
!.gitignore
|
2
crawler/tmp/csv/.gitignore
vendored
Normal file
2
crawler/tmp/csv/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
2
crawler/tmp/results/.gitignore
vendored
Normal file
2
crawler/tmp/results/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
Reference in New Issue
Block a user