From a89a14ef5da44684a16b204e7a70460cc8c4922a Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 21 Jun 2024 23:36:36 +0200 Subject: Basic constant folding implementation --- vendor/gmp-6.3.0/mpn/powerpc64/p6/lshift.asm | 132 +++++++++++++++++++++++++ vendor/gmp-6.3.0/mpn/powerpc64/p6/lshiftc.asm | 136 ++++++++++++++++++++++++++ vendor/gmp-6.3.0/mpn/powerpc64/p6/rshift.asm | 131 +++++++++++++++++++++++++ 3 files changed, 399 insertions(+) create mode 100644 vendor/gmp-6.3.0/mpn/powerpc64/p6/lshift.asm create mode 100644 vendor/gmp-6.3.0/mpn/powerpc64/p6/lshiftc.asm create mode 100644 vendor/gmp-6.3.0/mpn/powerpc64/p6/rshift.asm (limited to 'vendor/gmp-6.3.0/mpn/powerpc64/p6') diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/p6/lshift.asm b/vendor/gmp-6.3.0/mpn/powerpc64/p6/lshift.asm new file mode 100644 index 0000000..1a200fb --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/powerpc64/p6/lshift.asm @@ -0,0 +1,132 @@ +dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt + +dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C POWER3/PPC630 ? +C POWER4/PPC970 ? +C POWER5 2.25 +C POWER6 4 + +C TODO +C * Micro-optimise header code +C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236 +C bytes, 4-way code would become about 50% larger. + +C INPUT PARAMETERS +define(`rp_param', `r3') +define(`up', `r4') +define(`n', `r5') +define(`cnt', `r6') + +define(`tnc',`r0') +define(`retval',`r3') +define(`rp', `r7') + +ASM_START() +PROLOGUE(mpn_lshift,toc) + +ifdef(`HAVE_ABI_mode32',` + rldicl n, n, 0,32 C FIXME: avoid this zero extend +') + mflr r12 + sldi r8, n, 3 + sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block + LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) + add up, up, r8 C make up point at end of up[] + add r11, r11, r10 C address of L(oN) for N = cnt + srdi r10, n, 1 + add rp, rp_param, r8 C make rp point at end of rp[] + subfic tnc, cnt, 64 + rlwinm. r8, n, 0,31,31 C extract bit 0 + mtctr r10 + beq L(evn) + +L(odd): ld r9, -8(up) + cmpdi cr0, n, 1 C n = 1? + beq L(1) + ld r8, -16(up) + addi r11, r11, -84 C L(o1) - L(e1) - 64 + mtlr r11 + srd r3, r9, tnc C retval + addi up, up, 8 + addi rp, rp, -8 + blr C branch to L(oN) + +L(evn): ld r8, -8(up) + ld r9, -16(up) + addi r11, r11, -64 + mtlr r11 + srd r3, r8, tnc C retval + blr C branch to L(eN) + +L(1): srd r3, r9, tnc C retval + sld r8, r9, cnt + std r8, -8(rp) + mtlr r12 +ifdef(`HAVE_ABI_mode32', +` mr r4, r3 + srdi r3, r3, 32 +') + blr + + +define(SHIFT,` +L(lo$1):ld r8, -24(up) + std r11, -8(rp) + addi rp, rp, -16 +L(o$1): srdi r10, r8, eval(64-$1) + rldimi r10, r9, $1, 0 + ld r9, -32(up) + addi up, up, -16 + std r10, 0(rp) +L(e$1): srdi r11, r9, eval(64-$1) + rldimi r11, r8, $1, 0 + bdnz L(lo$1) + std r11, -8(rp) + sldi r10, r9, $1 + b L(com) + nop + nop +') + + ALIGN(64) +forloop(`i',1,63,`SHIFT(i)') + +L(com): std r10, -16(rp) + mtlr r12 +ifdef(`HAVE_ABI_mode32', +` mr r4, r3 + srdi r3, r3, 32 +') + blr +EPILOGUE() +ASM_END() diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/p6/lshiftc.asm b/vendor/gmp-6.3.0/mpn/powerpc64/p6/lshiftc.asm new file mode 100644 index 0000000..e4b3caa --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/powerpc64/p6/lshiftc.asm @@ -0,0 +1,136 @@ +dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt + +dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C POWER3/PPC630 ? +C POWER4/PPC970 ? +C POWER5 2.25 +C POWER6 4 + +C TODO +C * Micro-optimise header code +C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236 +C bytes, 4-way code would become about 50% larger. + +C INPUT PARAMETERS +define(`rp_param', `r3') +define(`up', `r4') +define(`n', `r5') +define(`cnt', `r6') + +define(`tnc',`r0') +define(`retval',`r3') +define(`rp', `r7') + +ASM_START() +PROLOGUE(mpn_lshiftc,toc) + +ifdef(`HAVE_ABI_mode32',` + rldicl n, n, 0,32 C FIXME: avoid this zero extend +') + mflr r12 + sldi r8, n, 3 + sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block + LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) + add up, up, r8 C make up point at end of up[] + add r11, r11, r10 C address of L(oN) for N = cnt + srdi r10, n, 1 + add rp, rp_param, r8 C make rp point at end of rp[] + subfic tnc, cnt, 64 + rlwinm. r8, n, 0,31,31 C extract bit 0 + mtctr r10 + beq L(evn) + +L(odd): ld r9, -8(up) + cmpdi cr0, n, 1 C n = 1? + beq L(1) + ld r8, -16(up) + addi r11, r11, -88 C L(o1) - L(e1) - 64 + mtlr r11 + srd r3, r9, tnc C retval + addi up, up, 8 + addi rp, rp, -8 + blr C branch to L(oN) + +L(evn): ld r8, -8(up) + ld r9, -16(up) + addi r11, r11, -64 + mtlr r11 + srd r3, r8, tnc C retval + blr C branch to L(eN) + +L(1): srd r3, r9, tnc C retval + sld r8, r9, cnt + nor r8, r8, r8 + std r8, -8(rp) + mtlr r12 +ifdef(`HAVE_ABI_mode32', +` mr r4, r3 + srdi r3, r3, 32 +') + blr + + +define(SHIFT,` +L(lo$1):ld r8, -24(up) + nor r11, r11, r11 + std r11, -8(rp) + addi rp, rp, -16 +L(o$1): srdi r10, r8, eval(64-$1) + rldimi r10, r9, $1, 0 + ld r9, -32(up) + addi up, up, -16 + nor r10, r10, r10 + std r10, 0(rp) +L(e$1): srdi r11, r9, eval(64-$1) + rldimi r11, r8, $1, 0 + bdnz L(lo$1) + sldi r10, r9, $1 + b L(com) + nop +') + + ALIGN(64) +forloop(`i',1,63,`SHIFT(i)') + +L(com): nor r11, r11, r11 + nor r10, r10, r10 + std r11, -8(rp) + std r10, -16(rp) + mtlr r12 +ifdef(`HAVE_ABI_mode32', +` mr r4, r3 + srdi r3, r3, 32 +') + blr +EPILOGUE() +ASM_END() diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/p6/rshift.asm b/vendor/gmp-6.3.0/mpn/powerpc64/p6/rshift.asm new file mode 100644 index 0000000..9e848c1 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/powerpc64/p6/rshift.asm @@ -0,0 +1,131 @@ +dnl PowerPC-64 mpn_rshift -- rp[] = up[] << cnt + +dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C POWER3/PPC630 ? +C POWER4/PPC970 ? +C POWER5 2 +C POWER6 3.5 (mysteriously 3.0 for cnt=1) + +C TODO +C * Micro-optimise header code +C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248 +C bytes, 4-way code would become about 50% larger. + +C INPUT PARAMETERS +define(`rp_param', `r3') +define(`up', `r4') +define(`n', `r5') +define(`cnt', `r6') + +define(`tnc',`r0') +define(`retval',`r3') +define(`rp', `r7') + +ASM_START() +PROLOGUE(mpn_rshift,toc) + +ifdef(`HAVE_ABI_mode32',` + rldicl n, n, 0,32 C FIXME: avoid this zero extend +') + mflr r12 + LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) + sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block + add r11, r11, r10 C address of L(oN) for N = cnt + srdi r10, n, 1 + mr rp, rp_param + subfic tnc, cnt, 64 + rlwinm. r8, n, 0,31,31 C extract bit 0 + mtctr r10 + beq L(evn) + +L(odd): ld r9, 0(up) + cmpdi cr0, n, 1 C n = 1? + beq L(1) + ld r8, 8(up) + addi r11, r11, -84 C L(o1) - L(e1) - 64 + mtlr r11 + sld r3, r9, tnc C retval + addi up, up, 8 + addi rp, rp, 8 + blr C branch to L(oN) + +L(evn): ld r8, 0(up) + ld r9, 8(up) + addi r11, r11, -64 + mtlr r11 + sld r3, r8, tnc C retval + addi up, up, 16 + blr C branch to L(eN) + +L(1): sld r3, r9, tnc C retval + srd r8, r9, cnt + std r8, 0(rp) + mtlr r12 +ifdef(`HAVE_ABI_mode32', +` mr r4, r3 + srdi r3, r3, 32 +') + blr + + +define(SHIFT,` +L(lo$1):ld r8, 0(up) + std r11, 0(rp) + addi rp, rp, 16 +L(o$1): srdi r10, r9, $1 + rldimi r10, r8, eval(64-$1), 0 + ld r9, 8(up) + addi up, up, 16 + std r10, -8(rp) +L(e$1): srdi r11, r8, $1 + rldimi r11, r9, eval(64-$1), 0 + bdnz L(lo$1) + std r11, 0(rp) + srdi r10, r9, $1 + b L(com) + nop + nop +') + + ALIGN(64) +forloop(`i',1,63,`SHIFT(i)') + +L(com): std r10, 8(rp) + mtlr r12 +ifdef(`HAVE_ABI_mode32', +` mr r4, r3 + srdi r3, r3, 32 +') + blr +EPILOGUE() +ASM_END() -- cgit v1.2.3