From a89a14ef5da44684a16b204e7a70460cc8c4922a Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 21 Jun 2024 23:36:36 +0200 Subject: Basic constant folding implementation --- vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm | 70 ++++++++++++++ vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm | 117 ++++++++++++++++++++++++ vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm | 129 ++++++++++++++++++++++++++ vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm | 156 ++++++++++++++++++++++++++++++++ 4 files changed, 472 insertions(+) create mode 100644 vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm create mode 100644 vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm create mode 100644 vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm create mode 100644 vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm (limited to 'vendor/gmp-6.3.0/mpn/arm/v5') diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm new file mode 100644 index 0000000..3c2b48f --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm @@ -0,0 +1,70 @@ +dnl ARM v5 mpn_gcd_11. + +dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn +dnl Granlund. + +dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/bit (approx) +C StrongARM - +C XScale ? +C Cortex-A5 6.45 obsolete +C Cortex-A7 6.41 obsolete +C Cortex-A8 5.0 obsolete +C Cortex-A9 5.9 obsolete +C Cortex-A15 4.40 obsolete +C Cortex-A17 5.68 obsolete +C Cortex-A53 4.37 obsolete +C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1 + +define(`u0', `r0') +define(`v0', `r1') + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_gcd_11) + subs r3, u0, v0 C 0 + beq L(end) C + + ALIGN(16) +L(top): sub r2, v0, u0 C 0,5 + and r12, r2, r3 C 1 + clz r12, r12 C 2 + rsb r12, r12, #31 C 3 + rsbcc r3, r3, #0 C v = abs(u-v), even 1 + movcs u0, v0 C u = min(u,v) 1 + lsr v0, r3, r12 C 4 + subs r3, u0, v0 C 5 + bne L(top) C + +L(end): bx lr +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm new file mode 100644 index 0000000..0643b7c --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm @@ -0,0 +1,117 @@ +dnl ARM v5 mpn_gcd_22. + +dnl Copyright 2019, 2022 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/bit (approx) +C StrongARM - +C XScale - +C ARM11 13 +C Cortex-A5 ? +C Cortex-A7 ? +C Cortex-A8 ? +C Cortex-A9 ? +C Cortex-A12 ? +C Cortex-A15 ? +C Cortex-A17 ? +C Cortex-A53 ? + + +define(`gp', `r0') + +define(`u1', `r1') +define(`u0', `r2') +define(`v1', `r3') +define(`v0', `r4') + +define(`t0', `r5') +define(`t1', `r6') +define(`cnt', `r7') + +ASM_START() +PROLOGUE(mpn_gcd_22) + push { r4-r7 } + + ldr v0, [sp,#16] C + +L(top): subs t0, u0, v0 C 0 7 + beq L(lowz) + sbcs t1, u1, v1 C 1 8 + + sub cnt, v0, u0 + and cnt, cnt, t0 + + negcc t0, t0 + mvncc t1, t1 +L(bck): movcc v0, u0 + movcc v1, u1 + + clz r12, cnt C 2 + rsb cnt, r12, #31 C 3 + add r12, r12, #1 + + lsr u0, t0, cnt C 3 + lsl r12, t1, r12 C 4 + lsr u1, t1, cnt C 3 + orr u0, u0, r12 C 5 + + orrs r12, u1, v1 + bne L(top) + + + str r12, [gp,#4] C high result limb <= 0 + + mov r6, gp + mov r0, u0 C pass 1st argument + mov r1, v0 C pass 2nd argument + mov r7, r14 C preserve link register + bl mpn_gcd_11 + str r0, [r6,#0] + mov r14, r7 + pop { r4-r7 } + bx r14 + +L(lowz):C We come here when v0 - u0 = 0 + C 1. If v1 - u1 = 0, then gcd is u = v. + C 2. Else compute gcd_21({v1,v0}, |u1-v1|) + subs t0, u1, v1 + beq L(end) + mov t1, #0 + sub cnt, v1, u1 + and cnt, cnt, t0 + negcc t0, t0 + b L(bck) + +L(end): str v0, [gp,#0] + str v1, [gp,#4] + pop { r4-r7 } + bx r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm new file mode 100644 index 0000000..3cf0cd7 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm @@ -0,0 +1,129 @@ +dnl ARM mpn_mod_1_1p + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C StrongARM - +C XScale ? +C Cortex-A7 ? +C Cortex-A8 ? +C Cortex-A9 7 +C Cortex-A15 6 + +define(`ap', `r0') +define(`n', `r1') +define(`d', `r2') +define(`cps',`r3') + +ASM_START() +PROLOGUE(mpn_mod_1_1p) + push {r4-r10} + add r0, r0, r1, asl #2 + ldr r5, [r0, #-4]! + ldr r12, [r0, #-4]! + subs r1, r1, #2 + ble L(4) + ldr r8, [r3, #12] + mov r4, r12 + mov r10, r5 + umull r7, r5, r10, r8 + sub r1, r1, #1 + b L(mid) + +L(top): adds r12, r6, r7 + adcs r10, r4, r5 + sub r1, r1, #1 + mov r6, #0 + movcs r6, r8 + umull r7, r5, r10, r8 + adds r4, r12, r6 + subcs r4, r4, r2 +L(mid): ldr r6, [r0, #-4]! + teq r1, #0 + bne L(top) + + adds r12, r6, r7 + adcs r5, r4, r5 + subcs r5, r5, r2 +L(4): ldr r1, [r3, #4] + cmp r1, #0 + beq L(7) + ldr r4, [r3, #8] + umull r0, r6, r5, r4 + adds r12, r0, r12 + addcs r6, r6, #1 + rsb r0, r1, #32 + mov r0, r12, lsr r0 + orr r5, r0, r6, asl r1 + mov r12, r12, asl r1 + b L(8) +L(7): cmp r5, r2 + subcs r5, r5, r2 +L(8): ldr r0, [r3, #0] + umull r4, r3, r5, r0 + add r5, r5, #1 + adds r0, r4, r12 + adc r5, r3, r5 + mul r5, r2, r5 + sub r12, r12, r5 + cmp r12, r0 + addhi r12, r12, r2 + cmp r2, r12 + subls r12, r12, r2 + mov r0, r12, lsr r1 + pop {r4-r10} + bx r14 +EPILOGUE() + +PROLOGUE(mpn_mod_1_1p_cps) + stmfd sp!, {r4, r5, r6, r14} + mov r5, r0 + clz r4, r1 + mov r0, r1, asl r4 + rsb r6, r0, #0 + bl mpn_invert_limb + str r0, [r5, #0] + str r4, [r5, #4] + cmp r4, #0 + beq L(2) + rsb r1, r4, #32 + mov r3, #1 + mov r3, r3, asl r4 + orr r3, r3, r0, lsr r1 + mul r3, r6, r3 + mov r4, r3, lsr r4 + str r4, [r5, #8] +L(2): mul r0, r6, r0 + str r0, [r5, #12] + ldmfd sp!, {r4, r5, r6, pc} +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm new file mode 100644 index 0000000..aa26ecb --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm @@ -0,0 +1,156 @@ +dnl ARM mpn_mod_1s_2p + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C StrongARM - +C XScale ? +C Cortex-A7 ? +C Cortex-A8 ? +C Cortex-A9 4.25 +C Cortex-A15 3 + +define(`ap', `r0') +define(`n', `r1') +define(`d', `r2') +define(`cps',`r3') + +ASM_START() +PROLOGUE(mpn_mod_1s_2p) + push {r4-r10} + tst n, #1 + add r7, r3, #8 + ldmia r7, {r7, r8, r12} C load B1, B2, B3 + add ap, ap, n, lsl #2 C put ap at operand end + beq L(evn) + +L(odd): subs n, n, #1 + beq L(1) + ldmdb ap!, {r4,r6,r9} + mov r10, #0 + umlal r4, r10, r6, r7 + umlal r4, r10, r9, r8 + b L(com) + +L(evn): ldmdb ap!, {r4,r10} +L(com): subs n, n, #2 + ble L(end) + ldmdb ap!, {r5,r6} + b L(mid) + +L(top): mov r9, #0 + umlal r5, r9, r6, r7 C B1 + umlal r5, r9, r4, r8 C B2 + ldmdb ap!, {r4,r6} + umlal r5, r9, r10, r12 C B3 + ble L(xit) + mov r10, #0 + umlal r4, r10, r6, r7 C B1 + umlal r4, r10, r5, r8 C B2 + ldmdb ap!, {r5,r6} + umlal r4, r10, r9, r12 C B3 +L(mid): subs n, n, #4 + bge L(top) + + mov r9, #0 + umlal r5, r9, r6, r7 C B1 + umlal r5, r9, r4, r8 C B2 + umlal r5, r9, r10, r12 C B3 + mov r4, r5 + +L(end): movge r9, r10 C executed iff coming via xit + ldr r6, [r3, #4] C cps[1] = cnt + mov r5, #0 + umlal r4, r5, r9, r7 + mov r7, r5, lsl r6 +L(x): rsb r1, r6, #32 + orr r8, r7, r4, lsr r1 + mov r9, r4, lsl r6 + ldr r5, [r3, #0] + add r0, r8, #1 + umull r12, r1, r8, r5 + adds r4, r12, r9 + adc r1, r1, r0 + mul r5, r2, r1 + sub r9, r9, r5 + cmp r9, r4 + addhi r9, r9, r2 + cmp r2, r9 + subls r9, r9, r2 + mov r0, r9, lsr r6 + pop {r4-r10} + bx r14 + +L(xit): mov r10, #0 + umlal r4, r10, r6, r7 C B1 + umlal r4, r10, r5, r8 C B2 + umlal r4, r10, r9, r12 C B3 + b L(end) + +L(1): ldr r6, [r3, #4] C cps[1] = cnt + ldr r4, [ap, #-4] C ap[0] + mov r7, #0 + b L(x) +EPILOGUE() + +PROLOGUE(mpn_mod_1s_2p_cps) + push {r4-r8, r14} + clz r4, r1 + mov r5, r1, lsl r4 C b <<= cnt + mov r6, r0 C r6 = cps + mov r0, r5 + bl mpn_invert_limb + rsb r3, r4, #32 + mov r3, r0, lsr r3 + mov r2, #1 + orr r3, r3, r2, lsl r4 + rsb r1, r5, #0 + mul r2, r1, r3 + umull r3, r12, r2, r0 + add r12, r2, r12 + mvn r12, r12 + mul r1, r5, r12 + cmp r1, r3 + addhi r1, r1, r5 + umull r12, r7, r1, r0 + add r7, r1, r7 + mvn r7, r7 + mul r3, r5, r7 + cmp r3, r12 + addhi r3, r3, r5 + mov r5, r2, lsr r4 + mov r7, r1, lsr r4 + mov r8, r3, lsr r4 + stmia r6, {r0,r4,r5,r7,r8} C fill cps + pop {r4-r8, pc} +EPILOGUE() -- cgit v1.2.3