progress with the crawler
All checks were successful
Build and push / Pulling repo on server (push) Successful in 2s

This commit is contained in:
2023-11-25 20:12:33 +01:00
parent 9571c10db3
commit 3d9a948beb
3 changed files with 185 additions and 0 deletions

19
crawler/parse.php Normal file
View File

@ -0,0 +1,19 @@
<?php
require_once('../web/inc/helpers.php');
//loop all files in results folder
$files = scandir('tmp/results');
foreach ($files as $file) {
if ($file == '.' || $file == '..' || !endsWith($file, '.pdf') || file_exists('tmp/csv/' . $file . '.csv'))
continue;
$csv = analyze('tmp/results/' . $file);
file_put_contents('tmp/csv/' . $file . '.csv', $csv);
}
function analyze($pdf) {
$cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
$output = shell_exec($cmd);
return $output;
}