diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:36:36 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:42:26 +0200 |
commit | a89a14ef5da44684a16b204e7a70460cc8c4922a (patch) | |
tree | b23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/arm/v6t2 | |
parent | 1db63fcedab0b288820d66e100b1877b1a5a8851 (diff) |
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/arm/v6t2')
-rw-r--r-- | vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm | 212 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm | 65 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm | 113 |
3 files changed, 390 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm b/vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm new file mode 100644 index 0000000..be24615 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm @@ -0,0 +1,212 @@ +dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C norm unorm frac +C StrongARM - - - +C XScale - - - +C Cortex-A7 ? ? ? +C Cortex-A8 ? ? ? +C Cortex-A9 13 14 13 +C Cortex-A15 11.4 11.8 11.1 + +C TODO +C * Optimise inner-loops better, they could likely run a cycle or two faster. +C * Decrease register usage, streamline non-loop code. + +define(`qp_arg', `r0') +define(`fn', `r1') +define(`up_arg', `r2') +define(`n_arg', `r3') +define(`d_arg', `0') +define(`dinv_arg',`4') +define(`cnt_arg', `8') + +define(`n', `r9') +define(`qp', `r5') +define(`up', `r6') +define(`cnt', `r7') +define(`tnc', `r10') +define(`dinv', `r0') +define(`d', `r4') + +ASM_START() +PROLOGUE(mpn_preinv_divrem_1) + stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ldr d, [sp, #9*4+d_arg] + ldr cnt, [sp, #9*4+cnt_arg] + str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn + sub n, r3, #1 + add r3, r1, n + cmp d, #0 + add qp, qp_arg, r3, lsl #2 C put qp at Q[] end + add up, up_arg, n, lsl #2 C put up at U[] end + ldr dinv, [sp, #9*4+dinv_arg] + blt L(nent) + b L(uent) +EPILOGUE() + +PROLOGUE(mpn_divrem_1) + stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub n, r3, #1 + ldr d, [sp, #9*4+d_arg] C d + str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn + add r3, r1, n + cmp d, #0 + add qp, qp_arg, r3, lsl #2 C put qp at Q[] end + add up, up_arg, n, lsl #2 C put up at U[] end + blt L(normalised) + +L(unnorm): + clz cnt, d + mov r0, d, lsl cnt C pass d << cnt + bl mpn_invert_limb +L(uent): + mov d, d, lsl cnt C d <<= cnt + cmp n, #0 + mov r1, #0 C r + blt L(frac) + + ldr r11, [up, #0] + + rsb tnc, cnt, #32 + mov r1, r11, lsr tnc + mov r11, r11, lsl cnt + beq L(uend) + + ldr r3, [up, #-4]! + orr r2, r11, r3, lsr tnc + b L(mid) + +L(utop): + mls r1, d, r8, r11 + mov r11, r3, lsl cnt + ldr r3, [up, #-4]! + cmp r1, r2 + addhi r1, r1, d + subhi r8, r8, #1 + orr r2, r11, r3, lsr tnc + cmp r1, d + bcs L(ufx) +L(uok): str r8, [qp], #-4 +L(mid): add r8, r1, #1 + mov r11, r2 + umlal r2, r8, r1, dinv + subs n, n, #1 + bne L(utop) + + mls r1, d, r8, r11 + mov r11, r3, lsl cnt + cmp r1, r2 + addhi r1, r1, d + subhi r8, r8, #1 + cmp r1, d + rsbcs r1, d, r1 + addcs r8, r8, #1 + str r8, [qp], #-4 + +L(uend):add r8, r1, #1 + mov r2, r11 + umlal r2, r8, r1, dinv + mls r1, d, r8, r11 + cmp r1, r2 + addhi r1, r1, d + subhi r8, r8, #1 + cmp r1, d + rsbcs r1, d, r1 + addcs r8, r8, #1 + str r8, [qp], #-4 +L(frac): + ldr r2, [sp, #9*4+d_arg] C fn + cmp r2, #0 + beq L(fend) + +L(ftop):mov r6, #0 + add r3, r1, #1 + umlal r6, r3, r1, dinv + mov r8, #0 + mls r1, d, r3, r8 + cmp r1, r6 + addhi r1, r1, d + subhi r3, r3, #1 + subs r2, r2, #1 + str r3, [qp], #-4 + bne L(ftop) + +L(fend):mov r11, r1, lsr cnt +L(rtn): mov r0, r11 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} + +L(normalised): + mov r0, d + bl mpn_invert_limb +L(nent): + cmp n, #0 + mov r11, #0 C r + blt L(nend) + + ldr r11, [up, #0] + cmp r11, d + movlo r2, #0 C hi q limb + movhs r2, #1 C hi q limb + subhs r11, r11, d + + str r2, [qp], #-4 + cmp n, #0 + beq L(nend) + +L(ntop):ldr r1, [up, #-4]! + add r12, r11, #1 + umlal r1, r12, r11, dinv + ldr r3, [up, #0] + mls r11, d, r12, r3 + cmp r11, r1 + addhi r11, r11, d + subhi r12, r12, #1 + cmp d, r11 + bls L(nfx) +L(nok): str r12, [qp], #-4 + subs n, n, #1 + bne L(ntop) + +L(nend):mov r1, r11 C r + mov cnt, #0 C shift cnt + b L(frac) + +L(nfx): add r12, r12, #1 + rsb r11, d, r11 + b L(nok) +L(ufx): rsb r1, d, r1 + add r8, r8, #1 + b L(uok) +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm new file mode 100644 index 0000000..8a38351 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm @@ -0,0 +1,65 @@ +dnl ARM v6t2 mpn_gcd_11. + +dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2019 Free Software Foundation, +dnl Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/bit (approx) +C StrongARM - +C XScale - +C Cortex-A5 5.2 +C Cortex-A7 5.04 +C Cortex-A8 3.59 +C Cortex-A9 9.5 +C Cortex-A15 3.2 +C Cortex-A17 5.25 +C Cortex-A53 3.57 + +define(`u0', `r0') +define(`v0', `r1') + +ASM_START() + TEXT + ALIGN(64) +PROLOGUE(mpn_gcd_11) + subs r3, u0, v0 C 0 + beq L(end) C + + ALIGN(16) +L(top): rbit r12, r3 C 1,5 + clz r12, r12 C 2 + rsbcc r3, r3, #0 C v = abs(u-v), even 1 + movcs u0, v0 C u = min(u,v) 1 + lsr v0, r3, r12 C 3 + subs r3, u0, v0 C 4 + bne L(top) C + +L(end): bx lr +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm new file mode 100644 index 0000000..3b23808 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm @@ -0,0 +1,113 @@ +dnl ARM v6t2 mpn_gcd_22. + +dnl Copyright 2019 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/bit (approx) +C StrongARM - +C XScale - +C Cortex-A5 10.1 +C Cortex-A7 9.1 +C Cortex-A8 6.3 +C Cortex-A9 ? +C Cortex-A12 7.7 +C Cortex-A15 5.7 +C Cortex-A17 ? +C Cortex-A53 7.0 + + +define(`gp', `r0') + +define(`u1', `r1') +define(`u0', `r2') +define(`v1', `r3') +define(`v0', `r4') + +define(`t0', `r5') +define(`t1', `r6') +define(`cnt', `r7') + +ASM_START() +PROLOGUE(mpn_gcd_22) + push { r4-r7 } + + ldr v0, [sp,#16] C + +L(top): subs t0, u0, v0 C 0 7 + beq L(lowz) + sbcs t1, u1, v1 C 1 8 + + rbit cnt, t0 C 1 + + negcc t0, t0 + mvncc t1, t1 +L(bck): movcc v0, u0 + movcc v1, u1 + + clz cnt, cnt C 2 + rsb r12, cnt, #32 C 3 + + lsr u0, t0, cnt C 3 + lsl r12, t1, r12 C 4 + lsr u1, t1, cnt C 3 + orr u0, u0, r12 C 5 + + orrs r12, u1, v1 + bne L(top) + + + str r12, [gp,#4] C high result limb <= 0 + + mov r6, gp + mov r0, u0 C pass 1st argument + mov r1, v0 C pass 2nd argument + mov r7, r14 C preserve link register + bl mpn_gcd_11 + str r0, [r6,#0] + mov r14, r7 + pop { r4-r7 } + bx r14 + +L(lowz):C We come here when v0 - u0 = 0 + C 1. If v1 - u1 = 0, then gcd is u = v. + C 2. Else compute gcd_21({v1,v0}, |u1-v1|) + subs t0, u1, v1 + beq L(end) + mov t1, #0 + rbit cnt, t0 C 1 + negcc t0, t0 + b L(bck) + +L(end): str v0, [gp,#0] + str v1, [gp,#4] + pop { r4-r7 } + bx r14 +EPILOGUE() |