From 807aa70719f63931b7399891b88d5bf0ef6c45d4 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 19:51:46 +0100 Subject: [PATCH 01/10] skip existing runs --- crawler/crawler.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crawler/crawler.php b/crawler/crawler.php index 3a0a11a..e0aaa03 100644 --- a/crawler/crawler.php +++ b/crawler/crawler.php @@ -174,6 +174,11 @@ function getResults($run,$event) $url = "https://www.dognow.at/ergebnisse/pdf.php?lauf=$run&event=$event"; if(!file_exists('tmp/results/' . $event . '-' . $run . '.pdf')) file_put_contents('tmp/results/' . $event . '-' . $run . '.pdf',file_get_contents($url)); + if($GLOBALS['db']->query("SELECT * FROM runs WHERE id = '$run' AND event = '$event'")->fetchArray() != false) + { + echo " [i] Skipping run $run in event $event\n"; + return; + } convertPDFtoCSV('tmp/results/' . $event . '-' . $run . '.pdf','tmp/csv/' . $event . '-' . $run . '.pdf.csv'); analyzeResultCSV('tmp/csv/' . $event . '-' . $run . '.pdf.csv',$run,$event); } From b6b82cae417dfde2a7c0b6d27fe60b1015575ba9 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 19:53:52 +0100 Subject: [PATCH 02/10] streamlined --- crawler/crawler.php | 52 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/crawler/crawler.php b/crawler/crawler.php index e0aaa03..4f9e7fe 100644 --- a/crawler/crawler.php +++ b/crawler/crawler.php @@ -1,43 +1,41 @@ loadHTML($html); +// $dom = new DOMDocument; +// $dom->loadHTML($html); -$xpath = new DOMXPath($dom); -$query = '//ul[@class="pagination"]/child::*'; -$nodes = $xpath->query($query); +// $xpath = new DOMXPath($dom); +// $query = '//ul[@class="pagination"]/child::*'; +// $nodes = $xpath->query($query); -$GLOBALS['pdfs'] = 0; +// $GLOBALS['pdfs'] = 0; -// Loop through the selected nodes -foreach ($nodes as $node) { - // Do something with each node, for example, echo its content - $url = $node->getElementsByTagName('a')[0]->getAttribute('href'); - $number = intval($node->nodeValue); - if($number > $last_page){ - $last_page = $number; - } -} +// // Loop through the selected nodes +// foreach ($nodes as $node) { +// // Do something with each node, for example, echo its content +// $url = $node->getElementsByTagName('a')[0]->getAttribute('href'); +// $number = intval($node->nodeValue); +// if($number > $last_page){ +// $last_page = $number; +// } +// } -echo "[i] Found $last_page pages\n"; +// echo "[i] Found $last_page pages\n"; -//create an array with all pages -$pages = range(1,65); +// //create an array with all pages +// $pages = range(1,65); // foreach($pages as $page) // { From 4eb299339ef87359e9cfde3bec9da9041f15c145 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:02:16 +0100 Subject: [PATCH 03/10] test --- .gitea/workflows/test.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .gitea/workflows/test.yml diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml new file mode 100644 index 0000000..62a4d3a --- /dev/null +++ b/.gitea/workflows/test.yml @@ -0,0 +1,16 @@ +on: [push] + +jobs: + update_database: + runs-on: ubuntu-latest + steps: + - name: All tools we need + run: apt update && apt install -y default-jre git php php-dom php-curl php-sqlite3 + - name: Checkout + uses: actions/checkout@v4 + - name: run test + run: | + ls -la /etc/php* + #sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php82/php.ini + + \ No newline at end of file From b1dbebf17fc10fe1bb41617bb612e6a8f25839fa Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:02:41 +0100 Subject: [PATCH 04/10] test2 --- .gitea/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index 62a4d3a..277d906 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -1,5 +1,6 @@ on: [push] + jobs: update_database: runs-on: ubuntu-latest From 1e1617eb538f20ccc0a04d4f622fd99e3e14a987 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:04:18 +0100 Subject: [PATCH 05/10] shorter --- .gitea/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index 277d906..fad30d0 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -6,12 +6,12 @@ jobs: runs-on: ubuntu-latest steps: - name: All tools we need - run: apt update && apt install -y default-jre git php php-dom php-curl php-sqlite3 + run: apt update && apt install -y - name: Checkout uses: actions/checkout@v4 - name: run test run: | - ls -la /etc/php* + find /etc/php7.4 #sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php82/php.ini \ No newline at end of file From 63cbfb36811d67a1f410ef27704d0a0043daf529 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:05:20 +0100 Subject: [PATCH 06/10] just php --- .gitea/workflows/test.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index fad30d0..993e6fb 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -6,9 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - name: All tools we need - run: apt update && apt install -y - - name: Checkout - uses: actions/checkout@v4 + run: apt update && apt install -y php - name: run test run: | find /etc/php7.4 From a0f70c6619e8b7722b777da7666fb235e3fa40bb Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:06:01 +0100 Subject: [PATCH 07/10] so? --- .gitea/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index 993e6fb..990c195 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -9,7 +9,7 @@ jobs: run: apt update && apt install -y php - name: run test run: | - find /etc/php7.4 + find /etc/php/7.4 #sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php82/php.ini \ No newline at end of file From 9757e589988e58e68f987d2a3f56073c8f7fafb2 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:07:20 +0100 Subject: [PATCH 08/10] real deal --- .gitea/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index 990c195..d99685a 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -10,6 +10,8 @@ jobs: - name: run test run: | find /etc/php/7.4 - #sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php82/php.ini + grep "error_reporting" /etc/php/7.4/cli/php.ini + sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini + grep "error_reporting" /etc/php/7.4/cli/php.ini \ No newline at end of file From d299edd10d210bd115277623a01f1474da3d21ad Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 23 Dec 2023 20:08:39 +0100 Subject: [PATCH 09/10] less errors please --- .gitea/workflows/autoupdate.yml | 1 + .gitea/workflows/test.yml | 17 ----------------- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 .gitea/workflows/test.yml diff --git a/.gitea/workflows/autoupdate.yml b/.gitea/workflows/autoupdate.yml index bea568a..535ee32 100644 --- a/.gitea/workflows/autoupdate.yml +++ b/.gitea/workflows/autoupdate.yml @@ -14,6 +14,7 @@ jobs: uses: actions/checkout@v4 - name: run crawler run: | + sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini cd crawler php crawler.php git config --global user.email "gitea@haschek.at" diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml deleted file mode 100644 index d99685a..0000000 --- a/.gitea/workflows/test.yml +++ /dev/null @@ -1,17 +0,0 @@ -on: [push] - - -jobs: - update_database: - runs-on: ubuntu-latest - steps: - - name: All tools we need - run: apt update && apt install -y php - - name: run test - run: | - find /etc/php/7.4 - grep "error_reporting" /etc/php/7.4/cli/php.ini - sed -i "/^error_reporting/c\error_reporting = E_ALL & ~E_DEPRECATED & ~E_STRICT & ~E_WARNING & ~E_NOTICE" /etc/php/7.4/cli/php.ini - grep "error_reporting" /etc/php/7.4/cli/php.ini - - \ No newline at end of file From 1dabcfbe0b2c39d34c7464b310e9cf86f3751b94 Mon Sep 17 00:00:00 2001 From: Gitea Date: Sun, 24 Dec 2023 03:07:50 +0000 Subject: [PATCH 10/10] update database --- crawler/data.db | Bin 17850368 -> 17854464 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/crawler/data.db b/crawler/data.db index c45634b8826351fcfd7990c38e19ae87177ed944..90880abe8f2bbaad4d1ab2842b774b6568a93a9b 100644 GIT binary patch delta 4332 zcmZwJ30xCb9tZH3$vMf)MCB@2E7Y?pNbp*>c7ohulKGe$SC*SeM?@j(m9+~+tYySb$ ztdaw!C5`pR32A7|4D5*z5|Npgklo&7GhPu`TOftT3WD)UVVuo7L$7P#tcWPM+2yHG zqba!A(>L27rjzN~tO8?6Q$Z)DYbLPs#%l_)E??8@2qCp@{QRgOt!$ybizbs~XcCF8 zjZV?t)t=Pu)GpIHwPUn>wJo#;p;S07>=AN>OksvFMz9N=gr6fbBR7V2Y7>ogAenEa!hFYlJu%FE>W z^4Ib#d72zAPmn|9mU1KcP1z(pk;sYE&{9h8pBSLD9(8M&Lafz_bgRce$Q%6a98 zQl#uxwktWx2IX62u`)-Qp`^Lg4r=oX$f6S|qFjf8IGX%nIBE#cKR;2~-qEPjs%FLx8Vo2Q+G z?&Rqqp$~a_KjWYkA5gG?%Bmg-SjpoG zLRavVLud|9*@R~Ew4Bi8JnbTM7f(9~-NDm+LhtibMQ9aIm4sIEbeGV(Jl!Mo9#0-Z zJv=!o2(93;oX~Qf?hty1r`v?y=IIupw|FWew2Y^lgx=(-l+aS1ZV-Bdr|X1X=jj@u z*Lb>0=vAIB6M7lRf$hJ72QM!XdWoltgkI$70-+apDj~Flr}KoK=jj}w=Xfe6w3w%} zgr4Q;454RuI!)+lo>B=-{44w~)G8yr+1kTP})xMmpW8mT8Xcd9Ab+Z+}e)*;vy7HsS6-WL`0hI?beb>Gge zG(8#Gvv*W-%Jj%VANLWg)3B8~9#!PoFN_y*kY zE&Lt+0sn-h@Et6J<&X_4AO}{$Dp(C`AQ#rcI#>_i!v@$0n_x3+fjr2E0@w=MU^^57 zg&nXHcEN7g1AAc~?1uwz5dH=Kh9BTZI0T2`KX3$of}`*=`~ttiG58Js3&#s%ql#8Z zx>I5=;*N=5eAs;~w&;-DOB^hgh^NF@FPy!d>5?qEWa22k>b+`egPzE>Q7Tkt%xC0g7 zfxB=IDxnJQ!vlB-kKi#>Lk-lz6L`8{Sd!)|U*w*;eSZds3JZ^O48Z(WF#+k-J#;!)$*vv4`Fv-xz zFv8Hz;G@5;f1*F8-=SZi&k+^F9+Qt@x#^+llIf^vyD8f=-!#<}V+uEgS&OV?)?L;d z>zCHa){)j=YhSCc@>KZ^f9mX1Rw`-ALS>}VTWMveG!*M6;j{kkU*SZX_i&xV*XfyI zAVOn*t5`fBZDMLt^0)zM;}Vk->{!Wo?MkGV)6vkOr(_gm` z*~sa6hb^#8=g(k!j<$`ewWDoqaMVAUjcBDhISOry`fV5++)~xD!Oc}Io7+s)MmbOi z)vs-A@Y|}E4GvVbY;aRm8|ClxbZ0HwXucDCWBX%c*`Rp-6o**#VxQQgxVlL%TlUdo z>$hQS#AsE^-gk_uWphWVS~fTO`ToCnIU5^1vi`}>He`gVWrK&STK3VqvbO$5fA*ci z1?!7Nb}7f;8&{C+StICtofWCBS=BzSnbnZM zu{y+?eVkVp!|X1pCcxFB#?Ljxt?rgJ z1+}AI&#wGQ>eJbkpQ^6e@MKl}?3R_HuG#E~^=gMbB8&~6P_IYLhJU82pWU)PSJ!NK zlDcNI$E)io+-%07_D^A({QFDEj+L=&rTzlDaV5TPfqm!4um!fmFMsQ36I8AJ|9_II AF8}}l delta 1669 zcmZY6Yg80v9DwneVXy2g7cmGZ5hc5Dk%dab5~Z?`l_0v1wzi<6n{tt)%qR#dDo|iI zSu1NuF|)G7jkZifEXA^m1(sdZHPZ?T!COW`J#$W<;=}JbGw=J~ecyAY($`_A+}UAh zh}1d-Az0TqHkF5j(six#n(Jr-FN~^S`eBK-& zH^~)pw!BEplv89`9&b8pI&9iv>XeVjd*scgCR4dzbV&(fw=ZqN08Vid1jnR)Ja0$YD;O4;#(XfW-1L!g2af? zc_V(8Y*jY0Wl>Jjur%9pj)gakimpoPxs9MUh%mxL~_ z)^6A&dNuP5exkQE-;k%5wElcHowiA*(bhy28(@S#Opswl00I$&V2nZtM&lxcA`BNJ z91)1bCAbu05ao?39(&vu6Ctz;n%#k012Y1S25bmOGk2LQ%n9;7d4(Kisx=9IcFh!b zLRNHjW0ua{nicJl#9{6@DAw3>qA?ER5rfMx0hi+nT#2g?i#Wt10TYo33zCqGt8opk z#UxC|bx6S!T#p+t6{(np=}5zkxCu9725!NvNXJZMz=~PO#BInzHf~1_?!cY63wI+I z_h2^W;9l4;7x%%AJmg~@=3@a0un>#j!2MW^B`8D@icx~4D8&Oehp-$EV+9^T zIUdDhs6ZtiM-{5^1fIlGsKL{C2G61vby$g2s7C`D;e-p%VKtiI#u_}27w{t1;w8Kc z1#!az*nn5D5wBqrUdJ1F6PxiC-o_TRVk#7Flz^m@ByF=C9i8-f!>j)ccGF zjdtU5Z%bbHTD`ln#BYpynxk7=V{>%l0KUPuIEe4?J$}HCIE2GE0w0dzCmh4iIF1we z1;64q^q?2N;}7)VB>FLcQ#g$?IE!=m6X)?42Jts8U!C(c