This repository has been archived on 2023-12-29. You can view files and clone it, but cannot push or open issues or pull requests.
dogstats/crawler/parse.php

19 lines
532 B
PHP
Raw Normal View History

2023-11-25 20:12:33 +01:00
<?php
require_once('../web/inc/helpers.php');
//loop all files in results folder
$files = scandir('tmp/results');
foreach ($files as $file) {
if ($file == '.' || $file == '..' || !endsWith($file, '.pdf') || file_exists('tmp/csv/' . $file . '.csv'))
continue;
$csv = analyze('tmp/results/' . $file);
file_put_contents('tmp/csv/' . $file . '.csv', $csv);
}
function analyze($pdf) {
$cmd = "java -jar tabula-1.0.5-jar-with-dependencies.jar -f CSV $pdf";
$output = shell_exec($cmd);
return $output;
}