progress with the crawler
All checks were successful
Build and push / Pulling repo on server (push) Successful in 2s
All checks were successful
Build and push / Pulling repo on server (push) Successful in 2s
This commit is contained in:
19
crawler/parse.php
Normal file
19
crawler/parse.php
Normal file
@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
require_once('../web/inc/helpers.php');
|
||||
|
||||
//loop all files in results folder
|
||||
$files = scandir('tmp/results');
|
||||
foreach ($files as $file) {
|
||||
if ($file == '.' || $file == '..' || !endsWith($file, '.pdf') || file_exists('tmp/csv/' . $file . '.csv'))
|
||||
continue;
|
||||
$csv = analyze('tmp/results/' . $file);
|
||||
file_put_contents('tmp/csv/' . $file . '.csv', $csv);
|
||||
}
|
||||
|
||||
|
||||
function analyze($pdf) {
|
||||
$cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
|
||||
$output = shell_exec($cmd);
|
||||
return $output;
|
||||
}
|
Reference in New Issue
Block a user