From a89a14ef5da44684a16b204e7a70460cc8c4922a Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 21 Jun 2024 23:36:36 +0200 Subject: Basic constant folding implementation --- .../gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm | 126 +++++++++++ .../mpn/sparc64/ultrasparct3/addmul_1.asm | 182 ++++++++++++++++ .../mpn/sparc64/ultrasparct3/aormul_2.asm | 228 ++++++++++++++++++++ .../mpn/sparc64/ultrasparct3/aormul_4.asm | 219 +++++++++++++++++++ .../mpn/sparc64/ultrasparct3/aorslsh_n.asm | 147 +++++++++++++ .../mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm | 147 +++++++++++++ .../mpn/sparc64/ultrasparct3/bdiv_q_1.asm | 137 ++++++++++++ .../mpn/sparc64/ultrasparct3/cnd_aors_n.asm | 145 +++++++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm | 129 ++++++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm | 78 +++++++ .../mpn/sparc64/ultrasparct3/invert_limb.asm | 92 ++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm | 77 +++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4 | 88 ++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm | 233 +++++++++++++++++++++ .../mpn/sparc64/ultrasparct3/mod_34lsub1.asm | 117 +++++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm | 82 ++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm | 174 +++++++++++++++ .../mpn/sparc64/ultrasparct3/popcount.asm | 70 +++++++ .../mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm | 93 ++++++++ .../gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm | 144 +++++++++++++ .../mpn/sparc64/ultrasparct3/submul_1.asm | 170 +++++++++++++++ 21 files changed, 2878 insertions(+) create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4 create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm create mode 100644 vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm (limited to 'vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3') diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm new file mode 100644 index 0000000..0170746 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm @@ -0,0 +1,126 @@ +dnl SPARC v9 mpn_add_n for T3/T4. + +dnl Contributed to the GNU project by David Miller. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 8 +C UltraSPARC T4: 3 + +C INPUT PARAMETERS +define(`rp', `%i0') +define(`up', `%i1') +define(`vp', `%i2') +define(`n', `%i3') +define(`cy', `%i4') + +define(`u0_off', `%l2') +define(`u1_off', `%l3') +define(`loop_n', `%l6') +define(`tmp', `%l7') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_add_nc) + save %sp, -176, %sp + b,a L(ent) +EPILOGUE() +PROLOGUE(mpn_add_n) + save %sp, -176, %sp + + mov 0, cy +L(ent): + subcc n, 1, n + be L(final_one) + cmp %g0, cy + + ldx [up + 0], %o4 + sllx n, 3, tmp + + ldx [vp + 0], %o5 + add up, tmp, u0_off + + ldx [up + 8], %g5 + neg tmp, loop_n + + ldx [vp + 8], %g1 + add u0_off, 8, u1_off + + sub loop_n, -(2 * 8), loop_n + + brgez,pn loop_n, L(loop_tail) + add vp, (2 * 8), vp + + b,a L(top) + ALIGN(16) +L(top): + addxccc(%o4, %o5, tmp) + ldx [vp + 0], %o5 + + add rp, (2 * 8), rp + ldx [loop_n + u0_off], %o4 + + add vp, (2 * 8), vp + stx tmp, [rp - 16] + + addxccc(%g1, %g5, tmp) + ldx [vp - 8], %g1 + + ldx [loop_n + u1_off], %g5 + sub loop_n, -(2 * 8), loop_n + + brlz loop_n, L(top) + stx tmp, [rp - 8] + +L(loop_tail): + addxccc(%o4, %o5, %g3) + add loop_n, u0_off, up + + addxccc(%g1, %g5, %g5) + stx %g3, [rp + 0] + + brgz,pt loop_n, L(done) + stx %g5, [rp + 8] + + add rp, (2 * 8), rp +L(final_one): + ldx [up+0], %o4 + ldx [vp+0], %o5 + addxccc(%o4, %o5, %g3) + stx %g3, [rp+0] + +L(done): + addxc(%g0, %g0, %i0) + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm new file mode 100644 index 0000000..939811e --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm @@ -0,0 +1,182 @@ +dnl SPARC v9 mpn_addmul_1 for T3/T4/T5. + +dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 26 +C UltraSPARC T4: 4.5 + +C INPUT PARAMETERS +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`v0', `%i3') + +define(`u0', `%l0') +define(`u1', `%l1') +define(`u2', `%l2') +define(`u3', `%l3') +define(`r0', `%l4') +define(`r1', `%l5') +define(`r2', `%l6') +define(`r3', `%l7') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_addmul_1) + save %sp, -176, %sp + ldx [up+0], %g1 + + and n, 3, %g3 + brz %g3, L(b0) + addcc %g0, %g0, %g5 C clear carry limb, flag + cmp %g3, 2 + bcs %xcc, L(b01) + nop + be %xcc, L(b10) + ldx [up+8], %g5 + +L(b11): ldx [up+16], u3 + mulx %g1, v0, %o2 + umulxhi(%g1, v0, %o3) + ldx [rp+0], r1 + mulx %g5, v0, %o4 + ldx [rp+8], r2 + umulxhi(%g5, v0, %o5) + ldx [rp+16], r3 + mulx u3, v0, %g4 + umulxhi(u3, v0, %g5) + addcc %o3, %o4, %o4 + addxccc(%o5, %g4, %g4) + addxc( %g0, %g5, %g5) + addcc r1, %o2, r1 + stx r1, [rp+0] + addxccc(r2, %o4, r2) + stx r2, [rp+8] + addxccc(r3, %g4, r3) + stx r3, [rp+16] + add n, -3, n + add up, 24, up + brz n, L(xit) + add rp, 24, rp + b L(com) + nop + +L(b10): mulx %g1, v0, %o4 + ldx [rp+0], r2 + umulxhi(%g1, v0, %o5) + ldx [rp+8], r3 + mulx %g5, v0, %g4 + umulxhi(%g5, v0, %g5) + addcc %o5, %g4, %g4 + addxc( %g0, %g5, %g5) + addcc r2, %o4, r2 + stx r2, [rp+0] + addxccc(r3, %g4, r3) + stx r3, [rp+8] + add n, -2, n + add up, 16, up + brz n, L(xit) + add rp, 16, rp + b L(com) + nop + +L(b01): ldx [rp+0], r3 + mulx %g1, v0, %g4 + umulxhi(%g1, v0, %g5) + addcc r3, %g4, r3 + stx r3, [rp+0] + add n, -1, n + add up, 8, up + brz n, L(xit) + add rp, 8, rp + +L(com): ldx [up+0], %g1 +L(b0): ldx [up+8], u1 + ldx [up+16], u2 + ldx [up+24], u3 + mulx %g1, v0, %o0 + umulxhi(%g1, v0, %o1) + b L(lo0) + nop + + ALIGN(16) +L(top): ldx [up+0], u0 + addxc( %g0, %g5, %g5) C propagate carry into carry limb + ldx [up+8], u1 + addcc r0, %o0, r0 + ldx [up+16], u2 + addxccc(r1, %o2, r1) + ldx [up+24], u3 + addxccc(r2, %o4, r2) + stx r0, [rp-32] + addxccc(r3, %g4, r3) + stx r1, [rp-24] + mulx u0, v0, %o0 + stx r2, [rp-16] + umulxhi(u0, v0, %o1) + stx r3, [rp-8] +L(lo0): mulx u1, v0, %o2 + ldx [rp+0], r0 + umulxhi(u1, v0, %o3) + ldx [rp+8], r1 + mulx u2, v0, %o4 + ldx [rp+16], r2 + umulxhi(u2, v0, %o5) + ldx [rp+24], r3 + mulx u3, v0, %g4 + addxccc(%g5, %o0, %o0) + umulxhi(u3, v0, %g5) + add up, 32, up + addxccc(%o1, %o2, %o2) + add rp, 32, rp + addxccc(%o3, %o4, %o4) + add n, -4, n + addxccc(%o5, %g4, %g4) + brgz n, L(top) + nop + + addxc( %g0, %g5, %g5) + addcc r0, %o0, r0 + stx r0, [rp-32] + addxccc(r1, %o2, r1) + stx r1, [rp-24] + addxccc(r2, %o4, r2) + stx r2, [rp-16] + addxccc(r3, %g4, r3) + stx r3, [rp-8] +L(xit): addxc( %g0, %g5, %i0) + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm new file mode 100644 index 0000000..ccc6a44 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm @@ -0,0 +1,228 @@ +dnl SPARC v9 mpn_mul_2 and mpn_addmul_2 for T3/T4/T5. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/limb cycles/limb +C mul_2 addmul_2 +C UltraSPARC T3: 22.5 23.5 +C UltraSPARC T4: 3.25 3.75 + + +C The code is reasonably scheduled but also relies on OoO. There was hope that +C this could run at around 3.0 and 3.5 c/l respectively, on T4. Two cycles per +C iteration needs to be removed. +C +C We could almost use 2-way unrolling, but currently the wN registers live too +C long. By changing add x,w1,w1 to add x,w1,w0, i.e. migrate the values down- +C wards, 2-way unrolling should become possible. With n-indexed addressing it +C should run no slower. +C +C The rp loads to g1/g3 are very much over-scheduled. Presumably, they could +C be postponed a full way, and then just one register could be used. + +C INPUT PARAMETERS +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`vp', `%i3') + +define(`v0', `%o0') +define(`v1', `%o1') + +define(`w0', `%o2') +define(`w1', `%o3') +define(`w2', `%o4') +define(`w3', `%o5') + +ifdef(`OPERATION_mul_2',` + define(`AM2', `') + define(`ADDX', `addcc`'$1') + define(`func', `mpn_mul_2') +') +ifdef(`OPERATION_addmul_2',` + define(`AM2', `$1') + define(`ADDX', `addxccc($1,$2,$3)') + define(`func', `mpn_addmul_2') +') + + +MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2) + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(func) + save %sp, -176, %sp + + ldx [vp+0], v0 C load v0 + and n, 3, %g5 + ldx [vp+8], v1 C load v1 + add n, -6, n + ldx [up+0], %g4 + brz %g5, L(b0) + cmp %g5, 2 + bcs L(b1) + nop + be L(b2) + nop + +L(b3): +AM2(` ldx [rp+0], %g1') + mulx %g4, v0, w2 + umulxhi(%g4, v0, w3) + ldx [up+8], %i5 + mulx %g4, v1, %l3 + umulxhi(%g4, v1, %l7) +AM2(` ldx [rp+8], %g3') + add up, -8, up + add rp, -8, rp + b L(lo3) + mov 0, w0 + +L(b2): +AM2(` ldx [rp+0], %g3') + mulx %g4, v0, w3 + umulxhi(%g4, v0, w0) + ldx [up+8], %i4 + mulx %g4, v1, %l1 + umulxhi(%g4, v1, %l5) +AM2(` ldx [rp+8], %g1') + add rp, 16, rp + brlz n, L(end) + mov 0, w1 + ba L(top) + add up, 16, up + +L(b1): +AM2(` ldx [rp+0], %g1') + mulx %g4, v0, w0 + umulxhi(%g4, v0, w1) + ldx [up+8], %i5 + mulx %g4, v1, %l3 + umulxhi(%g4, v1, %l7) +AM2(` ldx [rp+8], %g3') + add up, 8, up + add rp, 8, rp + b L(lo1) + mov 0, w2 + +L(b0): +AM2(` ldx [rp+0], %g3') + mulx %g4, v0, w1 + umulxhi(%g4, v0, w2) + ldx [up+8], %i4 + mulx %g4, v1, %l1 + umulxhi(%g4, v1, %l5) +AM2(` ldx [rp+8], %g1') + b L(lo0) + mov 0, w3 + + ALIGN(16) C cycle +L(top): mulx %i4, v0, %l2 C 0->5 + umulxhi(%i4, v0, %l6) C 0->5 + ldx [up+0], %i5 C 1->6 +AM2(` addcc w3, %g3, w3') C 1 + stx w3, [rp-16] C 2 + ADDX(` %l1, w0, w0') C 2 + addxccc(%l5, w1, w1) C 3 + mulx %i4, v1, %l3 C 3->9 + umulxhi(%i4, v1, %l7) C 4->9 +AM2(` ldx [rp+0], %g3') C 4 + addcc %l2, w0, w0 C 5 + addxccc(%l6, w1, w1) C 5 + addxc( %g0, %g0, w2) C 6 +L(lo1): mulx %i5, v0, %l0 C 6 + umulxhi(%i5, v0, %l4) C 7 + ldx [up+8], %i4 C 7 +AM2(` addcc w0, %g1, w0') C 8 + stx w0, [rp-8] C 8 + ADDX(` %l3, w1, w1') C 9 + addxccc(%l7, w2, w2) C 9 + mulx %i5, v1, %l1 C 10 + umulxhi(%i5, v1, %l5) C 10 +AM2(` ldx [rp+8], %g1') C 11 + addcc %l0, w1, w1 C 11 + addxccc(%l4, w2, w2) C 12 + addxc( %g0, %g0, w3) C 12 +L(lo0): mulx %i4, v0, %l2 C 13 + umulxhi(%i4, v0, %l6) C 13 + ldx [up+16], %i5 C 14 +AM2(` addcc w1, %g3, w1') C 14 + stx w1, [rp+0] C 15 + ADDX(` %l1, w2, w2') C 15 + addxccc(%l5, w3, w3) C 16 + mulx %i4, v1, %l3 C 16 + umulxhi(%i4, v1, %l7) C 17 +AM2(` ldx [rp+16], %g3') C 17 + addcc %l2, w2, w2 C 18 + addxccc(%l6, w3, w3) C 18 + addxc( %g0, %g0, w0) C 19 +L(lo3): mulx %i5, v0, %l0 C 19 + umulxhi(%i5, v0, %l4) C 20 + ldx [up+24], %i4 C 20 +AM2(` addcc w2, %g1, w2') C 21 + stx w2, [rp+8] C 21 + ADDX(` %l3, w3, w3') C 22 + addxccc(%l7, w0, w0) C 22 + mulx %i5, v1, %l1 C 23 + umulxhi(%i5, v1, %l5) C 23 +AM2(` ldx [rp+24], %g1') C 24 + addcc %l0, w3, w3 C 24 + addxccc(%l4, w0, w0) C 25 + addxc( %g0, %g0, w1) C 25 + add up, 32, up + add rp, 32, rp + brgz n, L(top) + add n, -4, n + +L(end): mulx %i4, v0, %l2 + umulxhi(%i4, v0, %l6) +AM2(` addcc w3, %g3, w3') + stx w3, [rp-16] + ADDX(` %l1, w0, w0') + addxccc(%l5, w1, w1) + mulx %i4, v1, %l3 + umulxhi(%i4, v1, %l7) + addcc %l2, w0, w0 + addxccc(%l6, w1, w1) + addxc( %g0, %g0, w2) +AM2(` addcc w0, %g1, w0') + stx w0, [rp-8] + ADDX(` %l3, w1, w1') + stx w1, [rp+0] + addxc(%l7, w2, %i0) + + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm new file mode 100644 index 0000000..845f6d6 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm @@ -0,0 +1,219 @@ +dnl SPARC v9 mpn_mul_4 and mpn_addmul_4 for T3/T4/T5. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/limb cycles/limb +C mul_4 addmul_4 +C UltraSPARC T3: 21.5 22.0 +C UltraSPARC T4: 2.625 2.75 + + +C The code is well-scheduled and relies on OoO very little. There is hope that +C this will run at around 2.5 and 2.75 c/l respectively, on T4. + +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`vp', `%i3') + +define(`v0', `%g1') +define(`v1', `%o7') +define(`v2', `%g2') +define(`v3', `%i3') + +define(`w0', `%o0') +define(`w1', `%o1') +define(`w2', `%o2') +define(`w3', `%o3') +define(`w4', `%o4') + +define(`r0', `%o5') + +define(`u0', `%i4') +define(`u1', `%i5') + +define(`rp0', `rp') +define(`rp1', `%g3') +define(`rp2', `%g4') +define(`up0', `up') +define(`up1', `%g5') + +ifdef(`OPERATION_mul_4',` + define(`AM4', `') + define(`ADDX', `addcc`'$1') + define(`func', `mpn_mul_4') +') +ifdef(`OPERATION_addmul_4',` + define(`AM4', `$1') + define(`ADDX', `addxccc($1,$2,$3)') + define(`func', `mpn_addmul_4') +') + + +MULFUNC_PROLOGUE(mpn_mul_4 mpn_addmul_4) + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(func) + save %sp, -176, %sp + + ldx [up + 0], u1 C load up[0] early + andcc n, 1, %g0 C is n odd? + ldx [vp + 0], v0 + sllx n, 3, n + ldx [vp + 8], v1 + add n, -28, n + ldx [vp + 16], v2 + add rp, -16, rp + ldx [vp + 24], v3 + add up, n, up0 + add rp, n, rp0 + add up0, 8, up1 + add rp0, 8, rp1 + add rp0, 16, rp2 + mulx u1, v0, %l0 + mov 0, w0 + mulx u1, v1, %l1 + mov 0, w1 + mulx u1, v2, %l2 + mov 0, w2 + mulx u1, v3, %l3 + mov 0, w3 + + be L(evn) + neg n, n + +L(odd): mov u1, u0 + ldx [up1 + n], u1 +AM4(` ldx [rp2 + n], r0') + umulxhi(u0, v0, %l4) + umulxhi(u0, v1, %l5) + umulxhi(u0, v2, %l6) + umulxhi(u0, v3, %l7) + b L(mid) + add n, 8, n + +L(evn): ldx [up1 + n], u0 +AM4(` ldx [rp2 + n], r0') + umulxhi(u1, v0, %l4) + umulxhi(u1, v1, %l5) + umulxhi(u1, v2, %l6) + umulxhi(u1, v3, %l7) + add n, 16, n + + ALIGN(16) +L(top): addcc %l0, w0, w0 + mulx u0, v0, %l0 C w 0 + addxccc(%l1, w1, w1) + mulx u0, v1, %l1 C w 1 + addxccc(%l2, w2, w2) + mulx u0, v2, %l2 C w 2 + addxccc(%l3, w3, w3) + mulx u0, v3, %l3 C w 3 + ldx [up0 + n], u1 + addxc( %g0, %g0, w4) +AM4(` addcc r0, w0, w0') + stx w0, [rp0 + n] + ADDX(` %l4, w1, w0') + umulxhi(u0, v0, %l4) C w 1 +AM4(` ldx [rp1 + n], r0') + addxccc(%l5, w2, w1) + umulxhi(u0, v1, %l5) C w 2 + addxccc(%l6, w3, w2) + umulxhi(u0, v2, %l6) C w 3 + addxc( %l7, w4, w3) + umulxhi(u0, v3, %l7) C w 4 +L(mid): addcc %l0, w0, w0 + mulx u1, v0, %l0 C w 1 + addxccc(%l1, w1, w1) + mulx u1, v1, %l1 C w 2 + addxccc(%l2, w2, w2) + mulx u1, v2, %l2 C w 3 + addxccc(%l3, w3, w3) + mulx u1, v3, %l3 C w 4 + ldx [up1 + n], u0 + addxc( %g0, %g0, w4) +AM4(` addcc r0, w0, w0') + stx w0, [rp1 + n] + ADDX(` %l4, w1, w0') + umulxhi(u1, v0, %l4) C w 2 +AM4(` ldx [rp2 + n], r0') + addxccc(%l5, w2, w1) + umulxhi(u1, v1, %l5) C w 3 + addxccc(%l6, w3, w2) + umulxhi(u1, v2, %l6) C w 4 + addxc( %l7, w4, w3) + umulxhi(u1, v3, %l7) C w 5 + brlz n, L(top) + add n, 16, n + +L(end): addcc %l0, w0, w0 + mulx u0, v0, %l0 + addxccc(%l1, w1, w1) + mulx u0, v1, %l1 + addxccc(%l2, w2, w2) + mulx u0, v2, %l2 + addxccc(%l3, w3, w3) + mulx u0, v3, %l3 + addxc( %g0, %g0, w4) +AM4(` addcc r0, w0, w0') + stx w0, [rp0 + n] + ADDX(` %l4, w1, w0') + umulxhi(u0, v0, %l4) +AM4(` ldx [rp1 + n], r0') + addxccc(%l5, w2, w1) + umulxhi(u0, v1, %l5) + addxccc(%l6, w3, w2) + umulxhi(u0, v2, %l6) + addxc( %l7, w4, w3) + umulxhi(u0, v3, %l7) + addcc %l0, w0, w0 + addxccc(%l1, w1, w1) + addxccc(%l2, w2, w2) + addxccc(%l3, w3, w3) + addxc( %g0, %g0, w4) +AM4(` addcc r0, w0, w0') + stx w0, [rp1 + n] + ADDX(` %l4, w1, w0') + addxccc(%l5, w2, w1) + addxccc(%l6, w3, w2) + stx w0, [rp2 + n] + add n, 16, n + stx w1, [rp1 + n] + stx w2, [rp2 + n] + addxc( %l7, w4, %i0) + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm new file mode 100644 index 0000000..1014b1b --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm @@ -0,0 +1,147 @@ +dnl SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 11 +C UltraSPARC T4: 4 + +C For sublsh_n we combine the two shifted limbs using xnor, using the identity +C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) = +C 0 as it is in our usage. This gives us the ones complement for free. +C Unfortunately, the same trick will not work for rsblsh_n, which will instead +C require a separate negation. +C +C FIXME: Add rsblsh_n to this file. + +define(`rp', `%i0') +define(`up', `%i1') +define(`vp', `%i2') +define(`n', `%i3') +define(`cnt',`%i4') + +define(`tnc',`%o5') + +ifdef(`OPERATION_addlsh_n',` + define(`INITCY', `subcc %g0, 0, %g0') + define(`MERGE', `or') + define(`func', `mpn_addlsh_n') +') +ifdef(`OPERATION_sublsh_n',` + define(`INITCY', `subcc %g0, 1, %g0') + define(`MERGE', `xnor') + define(`func', `mpn_sublsh_n') +') + +define(`rp0', `rp') +define(`rp1', `%o2') +define(`up0', `up') +define(`up1', `%o3') +define(`vp0', `vp') +define(`vp1', `%o4') + +MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n) +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(func) + save %sp, -176, %sp + mov 64, tnc + sub tnc, cnt, tnc + + andcc n, 1, %g0 + sllx n, 3, n + add n, -16, n + add up, n, up0 + add vp, n, vp0 + add rp, n, rp0 + add up0, 8, up1 + add vp0, 8, vp1 + add rp0, -8, rp1 + add rp0, -16, rp0 + neg n, n + be L(evn) + INITCY + +L(odd): ldx [vp0 + n], %l1 + mov 0, %l2 + ldx [up0 + n], %l5 + sllx %l1, cnt, %g3 + brgez n, L(wd1) + add n, 8, n + ldx [vp0 + n], %l0 + b L(lo1) + sllx %l1, cnt, %g3 + +L(evn): ldx [vp0 + n], %l0 + mov 0, %l3 + ldx [up0 + n], %l4 + ldx [vp1 + n], %l1 + b L(lo0) + sllx %l0, cnt, %g1 + +L(top): addxccc(%l6, %l4, %o0) + ldx [vp0 + n], %l0 + sllx %l1, cnt, %g3 + stx %o0, [rp0 + n] +L(lo1): srlx %l1, tnc, %l3 + MERGE %l2, %g3, %l7 + ldx [up0 + n], %l4 + addxccc(%l7, %l5, %o1) + ldx [vp1 + n], %l1 + sllx %l0, cnt, %g1 + stx %o1, [rp1 + n] +L(lo0): srlx %l0, tnc, %l2 + MERGE %l3, %g1, %l6 + ldx [up1 + n], %l5 + brlz,pt n, L(top) + add n, 16, n + + addxccc(%l6, %l4, %o0) + sllx %l1, cnt, %g3 + stx %o0, [rp0 + n] +L(wd1): srlx %l1, tnc, %l3 + MERGE %l2, %g3, %l7 + addxccc(%l7, %l5, %o1) + stx %o1, [rp1 + n] + +ifdef(`OPERATION_addlsh_n', +` addxc( %l3, %g0, %i0)') +ifdef(`OPERATION_sublsh_n', +` addxc( %g0, %g0, %g1) + add %g1, -1, %g1 + sub %l3, %g1, %i0') + + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm new file mode 100644 index 0000000..550860d --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm @@ -0,0 +1,147 @@ +dnl SPARC T3/T4/T5 mpn_bdiv_dbm1c. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 25 +C UltraSPARC T4/T5: 4 + +C INPUT PARAMETERS +define(`qp', `%i0') +define(`ap', `%i1') +define(`n', `%i2') +define(`bd', `%i3') +define(`h', `%i4') + +define(`plo0',`%g4') define(`plo1',`%g5') +define(`phi0',`%l0') define(`phi1',`%l1') +define(`a0', `%g1') define(`a1', `%g3') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_bdiv_dbm1c) + save %sp, -176, %sp + + and n, 3, %g5 + ldx [ap + 0], %g2 + add n, -5, n + brz %g5, L(b0) + cmp %g5, 2 + bcs %xcc, L(b1) + nop + be %xcc, L(b2) + nop + +L(b3): ldx [ap + 8], a0 + mulx bd, %g2, plo1 + umulxhi(bd, %g2, phi1) + ldx [ap + 16], a1 + add qp, -24, qp + b L(lo3) + add ap, -8, ap + +L(b2): ldx [ap + 8], a1 + mulx bd, %g2, plo0 + umulxhi(bd, %g2, phi0) + brlz,pt n, L(wd2) + nop +L(gt2): ldx [ap + 16], a0 + add ap, 16, ap + b L(lo2) + add n, -1, n + +L(b1): mulx bd, %g2, plo1 + umulxhi(bd, %g2, phi1) + brlz,pn n, L(wd1) + add qp, -8, qp +L(gt1): ldx [ap + 8], a0 + ldx [ap + 16], a1 + b L(lo1) + add ap, 8, ap + +L(b0): ldx [ap + 8], a1 + mulx bd, %g2, plo0 + umulxhi(bd, %g2, phi0) + ldx [ap + 16], a0 + b L(lo0) + add qp, -16, qp + +L(top): ldx [ap + 0], a0 + sub h, phi1, h +L(lo2): mulx bd, a1, plo1 + umulxhi(bd, a1, phi1) + subcc h, plo0, h + addxc( phi0, %g0, phi0) + stx h, [qp + 0] + ldx [ap + 8], a1 + sub h, phi0, h +L(lo1): mulx bd, a0, plo0 + umulxhi(bd, a0, phi0) + subcc h, plo1, h + addxc( phi1, %g0, phi1) + stx h, [qp + 8] + ldx [ap + 16], a0 + sub h, phi1, h +L(lo0): mulx bd, a1, plo1 + umulxhi(bd, a1, phi1) + subcc h, plo0, h + addxc( phi0, %g0, phi0) + stx h, [qp + 16] + ldx [ap + 24], a1 + sub h, phi0, h +L(lo3): mulx bd, a0, plo0 + umulxhi(bd, a0, phi0) + subcc h, plo1, h + addxc( phi1, %g0, phi1) + stx h, [qp + 24] + add ap, 32, ap + add qp, 32, qp + brgz,pt n, L(top) + add n, -4, n + +L(end): sub h, phi1, h +L(wd2): mulx bd, a1, plo1 + umulxhi(bd, a1, phi1) + subcc h, plo0, h + addxc( phi0, %g0, phi0) + stx h, [qp + 0] + sub h, phi0, h +L(wd1): subcc h, plo1, h + addxc( phi1, %g0, phi1) + stx h, [qp + 8] + sub h, phi1, %i0 + + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm new file mode 100644 index 0000000..9847047 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm @@ -0,0 +1,137 @@ +dnl SPARC T3/T4/T5 mpn_bdiv_q_1. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013, 2017 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 31 +C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops + +C INPUT PARAMETERS +define(`qp', `%i0') +define(`ap', `%i1') +define(`n', `%i2') +define(`d', `%i3') +define(`dinv',`%i4') +define(`cnt', `%i5') + +define(`tnc', `%o2') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_bdiv_q_1) + save %sp, -176, %sp + ldx [ap], %o5 + add d, -1, %g1 + andn %g1, d, %g1 + popc %g1, cnt + + srlx d, cnt, d + srlx d, 1, %g1 + and %g1, 127, %g1 + LEA64(binvert_limb_table, g2, g4) + ldub [%g2+%g1], %g1 + add %g1, %g1, %g2 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + sub %g2, %g1, %g2 + add %g2, %g2, %g1 + mulx %g2, %g2, %g2 + mulx %g2, d, %g2 + sub %g1, %g2, %g1 + add %g1, %g1, %o7 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + add n, -2, n + brz,pt cnt, L(norm) + sub %o7, %g1, dinv + + brlz,pt n, L(edu) + srlx %o5, cnt, %o5 + b L(eee) + mov 0, %g4 +EPILOGUE() + +PROLOGUE(mpn_pi1_bdiv_q_1) + save %sp, -176, %sp + ldx [ap], %o5 + + brz,pt cnt, L(norm) + add n, -2, n + +L(unorm): + brlz,pt n, L(edu) + srlx %o5, cnt, %o5 + mov 0, %g4 +L(eee): sub %g0, cnt, tnc + +L(tpu): ldx [ap+8], %g3 + add ap, 8, ap + sllx %g3, tnc, %g5 + or %g5, %o5, %g5 + srlx %g3, cnt, %o5 + subcc %g5, %g4, %g4 + mulx %g4, dinv, %g1 + stx %g1, [qp] + add qp, 8, qp + umulxhi(d, %g1, %g1) + addxc( %g1, %g0, %g4) + brgz,pt n, L(tpu) + add n, -1, n + + sub %o5, %g4, %o5 +L(edu): mulx %o5, dinv, %g1 + return %i7+8 + stx %g1, [%o0] + +L(norm): + mulx dinv, %o5, %g1 + brlz,pt n, L(edn) + stx %g1, [qp] + add qp, 8, qp + addcc %g0, 0, %g4 + +L(tpn): umulxhi(d, %g1, %g1) + ldx [ap+8], %g5 + add ap, 8, ap + addxc( %g1, %g0, %g1) + subcc %g5, %g1, %g1 + mulx %g1, dinv, %g1 + stx %g1, [qp] + add qp, 8, qp + brgz,pt n, L(tpn) + add n, -1, n + +L(edn): return %i7+8 + nop +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm new file mode 100644 index 0000000..49ccaec --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm @@ -0,0 +1,145 @@ +dnl SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5. + +dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. + +dnl Copyright 2013, 2017 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 8.5 +C UltraSPARC T4: 3 + +C We use a double-pointer trick to allow indexed addressing. Its setup +C cost might be a problem in these functions, since we don't expect huge n +C arguments. +C +C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can +C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn. + +C INPUT PARAMETERS +define(`cnd', `%i0') +define(`rp', `%i1') +define(`up', `%i2') +define(`vp', `%i3') +define(`n', `%i4') + +define(`mask', `cnd') +define(`up0', `%l0') define(`up1', `%l1') +define(`vp0', `%l2') define(`vp1', `%l3') +define(`rp0', `%g4') define(`rp1', `%g5') +define(`u0', `%l4') define(`u1', `%l5') +define(`v0', `%l6') define(`v1', `%l7') +define(`x0', `%g1') define(`x1', `%g3') +define(`w0', `%g1') define(`w1', `%g3') + +ifdef(`OPERATION_cnd_add_n',` + define(`LOGOP', `and $1, $2, $3') + define(`MAKEMASK',`cmp %g0, $1 + addxc( %g0, %g0, $2) + neg $2, $2') + define(`INITCY', `addcc %g0, 0, %g0') + define(`RETVAL', `addxc( %g0, %g0, %i0)') + define(`func', `mpn_cnd_add_n') +') +ifdef(`OPERATION_cnd_sub_n',` + define(`LOGOP', `orn $2, $1, $3') + define(`MAKEMASK',`cmp $1, 1 + addxc( %g0, %g0, $2) + neg $2, $2') + define(`INITCY', `subcc %g0, 1, %g0') + define(`RETVAL', `addxc( %g0, %g0, %i0) + xor %i0, 1, %i0') + define(`func', `mpn_cnd_sub_n') +') + +MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n) + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(func) + save %sp, -176, %sp + + MAKEMASK(cnd,mask) + + andcc n, 1, %g0 + sllx n, 3, n + add n, -16, n + add vp, n, vp0 + add up, n, up0 + add rp, n, rp0 + neg n, n + be L(evn) + INITCY + +L(odd): ldx [vp0 + n], v1 + ldx [up0 + n], u1 + LOGOP( v1, mask, x1) + addxccc(u1, x1, w1) + stx w1, [rp0 + n] + add n, 8, n + brgz n, L(rtn) + nop + +L(evn): add vp0, 8, vp1 + add up0, 8, up1 + add rp0, -24, rp1 + ldx [vp0 + n], v0 + ldx [vp1 + n], v1 + ldx [up0 + n], u0 + ldx [up1 + n], u1 + add n, 16, n + brgz n, L(end) + add rp0, -16, rp0 + +L(top): LOGOP( v0, mask, x0) + ldx [vp0 + n], v0 + LOGOP( v1, mask, x1) + ldx [vp1 + n], v1 + addxccc(u0, x0, w0) + ldx [up0 + n], u0 + addxccc(u1, x1, w1) + ldx [up1 + n], u1 + stx w0, [rp0 + n] + add n, 16, n + brlez n, L(top) + stx w1, [rp1 + n] + +L(end): LOGOP( v0, mask, x0) + LOGOP( v1, mask, x1) + addxccc(u0, x0, w0) + addxccc(u1, x1, w1) + stx w0, [rp0 + n] + stx w1, [rp1 + 32] + +L(rtn): RETVAL + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm new file mode 100644 index 0000000..d7dbdf9 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm @@ -0,0 +1,129 @@ +dnl SPARC T3/T4/T5 mpn_divexact_1. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 31 +C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops + +C INPUT PARAMETERS +define(`qp', `%i0') +define(`ap', `%i1') +define(`n', `%i2') +define(`d', `%i3') + +define(`dinv',`%o4') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_divexact_1) + save %sp, -176, %sp + cmp n, 1 + bne,pt %xcc, L(gt1) + ldx [ap], %o5 + udivx %o5, d, %g1 + stx %g1, [qp] + return %i7+8 + nop + +L(gt1): add d, -1, %g1 + andn %g1, d, %g1 + popc %g1, %i4 C i4 = count_trailing_zeros(d) + + srlx d, %i4, d + srlx d, 1, %g1 + and %g1, 127, %g1 + + LEA64(binvert_limb_table, g2, g4) + ldub [%g2+%g1], %g1 + add %g1, %g1, %g2 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + sub %g2, %g1, %g2 + add %g2, %g2, %g1 + mulx %g2, %g2, %g2 + mulx %g2, d, %g2 + sub %g1, %g2, %g1 + add %g1, %g1, %o7 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + add n, -2, n + brz,pt %i4, L(norm) + sub %o7, %g1, dinv + +L(unnorm): + mov 0, %g4 + sub %g0, %i4, %o2 + srlx %o5, %i4, %o5 +L(top_unnorm): + ldx [ap+8], %g3 + add ap, 8, ap + sllx %g3, %o2, %g5 + or %g5, %o5, %g5 + srlx %g3, %i4, %o5 + subcc %g5, %g4, %g4 + mulx %g4, dinv, %g1 + stx %g1, [qp] + add qp, 8, qp + umulxhi(d, %g1, %g1) + addxc( %g1, %g0, %g4) + brgz,pt n, L(top_unnorm) + add n, -1, n + + sub %o5, %g4, %g4 + mulx %g4, dinv, %g1 + stx %g1, [qp] + return %i7+8 + nop + +L(norm): + mulx dinv, %o5, %g1 + stx %g1, [qp] + add qp, 8, qp + addcc %g0, 0, %g4 +L(top_norm): + umulxhi(d, %g1, %g1) + ldx [ap+8], %g5 + add ap, 8, ap + addxc( %g1, %g0, %g1) + subcc %g5, %g1, %g1 + mulx %g1, dinv, %g1 + stx %g1, [qp] + add qp, 8, qp + brgz,pt n, L(top_norm) + add n, -1, n + + return %i7+8 + nop +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm new file mode 100644 index 0000000..20ed8bf --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm @@ -0,0 +1,78 @@ +dnl SPARC v9 mpn_hamdist for T3/T4. + +dnl Contributed to the GNU project by David Miller. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 18 +C UltraSPARC T4: 3.5 + +C INPUT PARAMETERS +define(`up', `%o0') +define(`vp', `%o1') +define(`n', `%o2') +define(`pcnt', `%o5') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_hamdist) + subcc n, 1, n + be L(final_one) + clr pcnt +L(top): + ldx [up + 0], %g1 + ldx [vp + 0], %g2 + ldx [up + 8], %o4 + ldx [vp + 8], %g3 + sub n, 2, n + xor %g1, %g2, %g1 + add up, 16, up + popc %g1, %g2 + add vp, 16, vp + xor %o4, %g3, %o4 + add pcnt, %g2, pcnt + popc %o4, %g3 + brgz n, L(top) + add pcnt, %g3, pcnt + brlz,pt n, L(done) + nop +L(final_one): + ldx [up + 0], %g1 + ldx [vp + 0], %g2 + xor %g1,%g2, %g1 + popc %g1, %g2 + add pcnt, %g2, pcnt +L(done): + retl + mov pcnt, %o0 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm new file mode 100644 index 0000000..4da49cf --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm @@ -0,0 +1,92 @@ +dnl SPARC T3/T4/T5 mpn_invert_limb. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: ? +C UltraSPARC T4/T5: ? + +C INPUT PARAMETERS +define(`d', `%o0') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_invert_limb) + srlx d, 54, %g1 + LEA64(approx_tab, g2, g3) + and %g1, 0x1fe, %g1 + srlx d, 24, %g4 + lduh [%g2+%g1], %g3 + add %g4, 1, %g4 + sllx %g3, 11, %g2 + add %g2, -1, %g2 + mulx %g3, %g3, %g3 + mulx %g3, %g4, %g3 + srlx %g3, 40, %g3 + sub %g2, %g3, %g2 + sllx %g2, 60, %g1 + mulx %g2, %g2, %g3 + mulx %g3, %g4, %g4 + sub %g1, %g4, %g1 + srlx %g1, 47, %g1 + sllx %g2, 13, %g2 + add %g1, %g2, %g1 + and d, 1, %g2 + srlx %g1, 1, %g4 + sub %g0, %g2, %g3 + and %g4, %g3, %g3 + srlx d, 1, %g4 + add %g4, %g2, %g2 + mulx %g1, %g2, %g2 + sub %g3, %g2, %g2 + umulxhi(%g1, %g2, %g2) + srlx %g2, 1, %g2 + sllx %g1, 31, %g1 + add %g2, %g1, %g1 + mulx %g1, d, %g3 + umulxhi(d, %g1, %g4) + addcc %g3, d, %g0 + addxc( %g4, d, %o0) + jmp %o7+8 + sub %g1, %o0, %o0 +EPILOGUE() + + RODATA + ALIGN(2) + TYPE( approx_tab, object) + SIZE( approx_tab, 512) +approx_tab: +forloop(i,256,512-1,dnl +` .half eval(0x7fd00/i) +')dnl diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm new file mode 100644 index 0000000..c79032d --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm @@ -0,0 +1,77 @@ +dnl SPARC v9-2011 simulation support. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +ASM_START() +PROLOGUE(__gmpn_umulh) + save %sp, -176, %sp + ldx [%sp+2047+176+256], %o0 + ldx [%sp+2047+176+256+8], %o1 + rd %ccr, %o4 + srl %o0, 0, %l4 + srl %o1, 0, %l1 + srlx %o1, 32, %o1 + mulx %o1, %l4, %l2 + srlx %o0, 32, %o0 + mulx %o0, %l1, %l3 + mulx %l1, %l4, %l1 + srlx %l1, 32, %l1 + add %l2, %l1, %l2 + addcc %l2, %l3, %l2 + mulx %o1, %o0, %o1 + mov 0, %l1 + movcs %xcc, 1, %l1 + sllx %l1, 32, %l1 + add %o1, %l1, %o1 + srlx %l2, 32, %o0 + add %o1, %o0, %o0 + stx %o0, [%sp+2047+176+256] + wr %o4, 0, %ccr + ret + restore +EPILOGUE() + +PROLOGUE(__gmpn_lzcnt) + save %sp, -176, %sp + ldx [%sp+2047+176+256], %o0 + brz,a %o0, 2f + mov 64, %o1 + brlz %o0, 2f + mov 0, %o1 +1: sllx %o0, 1, %o0 + brgz %o0, 1b + add %o1, 1, %o1 + stx %o1, [%sp+2047+176+256] +2: ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4 b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4 new file mode 100644 index 0000000..e5d6d8e --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4 @@ -0,0 +1,88 @@ +dnl SPARC v9-2011 simulation support. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl Usage addxccc(r1,r2,r3, t1) +dnl 64-bit add with carry-in and carry-out +dnl FIXME: Register g2 must not be destination + +define(`addxccc',`dnl + add %sp, -512, %sp + stx %g2, [%sp+2047+256+16] + mov 0, %g2 + movcs %xcc, -1, %g2 + addcc %g2, 1, %g0 + addccc $1, $2, $3 + ldx [%sp+2047+256+16], %g2 + sub %sp, -512, %sp +') + + +dnl Usage addxc(r1,r2,r3, t1,t2) +dnl 64-bit add with carry-in + +define(`addxc',`dnl + bcc %xcc, 1f + add $1, $2, $3 + add $3, 1, $3 +1: +') + + +dnl Usage umulxhi(r1,r2,r3) +dnl 64-bit multiply returning upper 64 bits +dnl Calls __gmpn_umulh using a non-standard calling convention + +define(`umulxhi',`dnl + add %sp, -512, %sp + stx $1, [%sp+2047+256] + stx $2, [%sp+2047+256+8] + stx %o7, [%sp+2047+256+16] + call __gmpn_umulh + nop + ldx [%sp+2047+256+16], %o7 + ldx [%sp+2047+256], $3 + sub %sp, -512, %sp +') +dnl Usage lzcnt(r1,r2) +dnl Plain count leading zeros +dnl Calls __gmpn_lzcnt using a non-standard calling convention + +define(`lzcnt',`dnl + add %sp, -512, %sp + stx %o7, [%sp+2047+256+16] + call __gmpn_lzcnt + stx $1, [%sp+2047+256] + ldx [%sp+2047+256+16], %o7 + ldx [%sp+2047+256], $2 + sub %sp, -512, %sp +') diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm new file mode 100644 index 0000000..08facbd --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm @@ -0,0 +1,233 @@ +dnl SPARC T3/T4/T5 mpn_mod_1s_4p. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 30 +C UltraSPARC T4/T5: 4 + +C INPUT PARAMETERS +define(`ap', `%o0') +define(`n', `%o1') +define(`d', `%o2') +define(`cps', `%o3') + + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_mod_1s_4p) + save %sp, -176, %sp + ldx [%i3+16], %o4 + ldx [%i3+24], %o3 + ldx [%i3+32], %o2 + ldx [%i3+40], %o1 + ldx [%i3+48], %o0 + + and %i1, 3, %g3 + sllx %i1, 3, %g1 + add %i0, %g1, %i0 + brz %g3, L(b00) + cmp %g3, 2 + bcs %xcc, L(b01) + nop + be %xcc, L(b10) + nop + +L(b11): ldx [%i0-16], %g2 + mulx %g2, %o4, %g5 + umulxhi(%g2, %o4, %g3) + ldx [%i0-24], %g4 + addcc %g5, %g4, %g5 + addxc( %g3, %g0, %g4) + ldx [%i0-8], %g2 + mulx %g2, %o3, %g1 + umulxhi(%g2, %o3, %g3) + addcc %g1, %g5, %g1 + addxc( %g3, %g4, %g2) + ba,pt %xcc, .L8 + add %i0, -32, %i0 + +L(b00): ldx [%i0-24], %g3 + mulx %g3, %o4, %g2 + umulxhi(%g3, %o4, %g5) + ldx [%i0-32], %g4 + addcc %g2, %g4, %g2 + addxc( %g5, %g0, %g3) + ldx [%i0-16], %g4 + mulx %g4, %o3, %g5 + umulxhi(%g4, %o3, %i5) + addcc %g2, %g5, %g5 + addxc( %g3, %i5, %g4) + ldx [%i0-8], %g2 + mulx %g2, %o2, %g1 + umulxhi(%g2, %o2, %g3) + addcc %g1, %g5, %g1 + addxc( %g3, %g4, %g2) + ba,pt %xcc, .L8 + add %i0, -40, %i0 + +L(b01): ldx [%i0-8], %g1 + mov 0, %g2 + ba,pt %xcc, .L8 + add %i0, -16, %i0 + +L(b10): ldx [%i0-8], %g2 + ldx [%i0-16], %g1 + add %i0, -24, %i0 + +.L8: add %i1, -5, %g3 + brlz,pn %g3, L(end) + nop + +L(top): ldx [%i0-16], %i4 + mulx %i4, %o4, %o5 + umulxhi(%i4, %o4, %i1) + ldx [%i0-24], %i5 + addcc %o5, %i5, %o5 + addxc( %i1, %g0, %i4) + ldx [%i0-8], %i5 + mulx %i5, %o3, %o7 + umulxhi(%i5, %o3, %i1) + addcc %o5, %o7, %o7 + addxc( %i4, %i1, %i5) + ldx [%i0+0], %g4 + mulx %g4, %o2, %i1 + umulxhi(%g4, %o2, %i4) + addcc %o7, %i1, %i1 + addxc( %i5, %i4, %g4) + mulx %g1, %o1, %i5 + umulxhi(%g1, %o1, %i4) + addcc %i1, %i5, %i5 + addxc( %g4, %i4, %g5) + mulx %g2, %o0, %g1 + umulxhi(%g2, %o0, %g4) + addcc %g1, %i5, %g1 + addxc( %g4, %g5, %g2) + add %g3, -4, %g3 + brgez,pt %g3, L(top) + add %i0, -32, %i0 + +L(end): mulx %g2, %o4, %g5 + umulxhi(%g2, %o4, %g3) + addcc %g1, %g5, %g5 + addxc( %g3, %g0, %g2) + ldx [%i3+8], %i0 + ldx [%i3], %g4 + sub %g0, %i0, %i5 + srlx %g5, %i5, %i5 + sllx %g2, %i0, %g2 + or %i5, %g2, %g1 + mulx %g1, %g4, %l7 + umulxhi(%g1, %g4, %g3) + sllx %g5, %i0, %g2 + add %g1, 1, %g1 + addcc %l7, %g2, %g5 + addxc( %g3, %g1, %g1) + mulx %g1, %i2, %g1 + sub %g2, %g1, %g2 + cmp %g2, %g5 + add %i2, %g2, %g1 + movlu %xcc, %g2, %g1 + subcc %g1, %i2, %g2 + movgeu %xcc, %g2, %g1 + return %i7+8 + srlx %g1, %o0, %o0 +EPILOGUE() + +PROLOGUE(mpn_mod_1s_4p_cps) + save %sp, -176, %sp + lzcnt( %i1, %i5) + sllx %i1, %i5, %i1 + call mpn_invert_limb, 0 + mov %i1, %o0 + stx %o0, [%i0] + sra %i5, 0, %g1 + stx %g1, [%i0+8] + sub %g0, %i5, %g2 + srlx %o0, %g2, %g2 + mov 1, %g1 + sllx %g1, %i5, %g1 + or %g2, %g1, %g2 + sub %g0, %i1, %g1 + mulx %g2, %g1, %g2 + srlx %g2, %i5, %g1 + stx %g1, [%i0+16] + + umulxhi(%o0, %g2, %g3) + add %g2, %g3, %g3 + xnor %g0, %g3, %g3 + mulx %g3, %i1, %g3 + mulx %g2, %o0, %g2 + cmp %g2, %g3 + add %i1, %g3, %g1 + movgeu %xcc, %g3, %g1 + srlx %g1, %i5, %g2 + stx %g2, [%i0+24] + + umulxhi(%o0, %g1, %g3) + add %g1, %g3, %g3 + xnor %g0, %g3, %g3 + mulx %g3, %i1, %g3 + mulx %g1, %o0, %g1 + cmp %g1, %g3 + add %i1, %g3, %g2 + movgeu %xcc, %g3, %g2 + srlx %g2, %i5, %g1 + stx %g1, [%i0+32] + + umulxhi(%o0, %g2, %g3) + add %g2, %g3, %g3 + xnor %g0, %g3, %g3 + mulx %g3, %i1, %g3 + mulx %g2, %o0, %g2 + cmp %g2, %g3 + add %i1, %g3, %g1 + movgeu %xcc, %g3, %g1 + srlx %g1, %i5, %g2 + stx %g2, [%i0+40] + + umulxhi(%o0, %g1, %g2) + add %g1, %g2, %g2 + xnor %g0, %g2, %g2 + mulx %g2, %i1, %g2 + mulx %g1, %o0, %o0 + cmp %o0, %g2 + add %i1, %g2, %g3 + movgeu %xcc, %g2, %g3 + srlx %g3, %i5, %i5 + stx %i5, [%i0+48] + + return %i7+8 + nop +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm new file mode 100644 index 0000000..8744280 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm @@ -0,0 +1,117 @@ +dnl SPARC v9 mpn_mod_34lsub1 for T3/T4/T5. + +dnl Copyright 2005, 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T1: - +C UltraSPARC T3: 5 +C UltraSPARC T4: 1.57 + +C This is based on the powerpc64/mode64 code. + +C INPUT PARAMETERS +define(`up', `%i0') +define(`n', `%i1') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_mod_34lsub1) + save %sp, -176, %sp + + mov 0, %g1 + mov 0, %g3 + mov 0, %g4 + addcc %g0, 0, %g5 + + add n, -3, n + brlz n, L(lt3) + nop + + add n, -3, n + ldx [up+0], %l5 + ldx [up+8], %l6 + ldx [up+16], %l7 + brlz n, L(end) + add up, 24, up + + ALIGN(16) +L(top): addxccc(%g1, %l5, %g1) + ldx [up+0], %l5 + addxccc(%g3, %l6, %g3) + ldx [up+8], %l6 + addxccc(%g4, %l7, %g4) + ldx [up+16], %l7 + add n, -3, n + brgez n, L(top) + add up, 24, up + +L(end): addxccc( %g1, %l5, %g1) + addxccc(%g3, %l6, %g3) + addxccc(%g4, %l7, %g4) + addxc( %g5, %g0, %g5) + +L(lt3): cmp n, -2 + blt L(2) + nop + + ldx [up+0], %l5 + mov 0, %l6 + beq L(1) + addcc %g1, %l5, %g1 + + ldx [up+8], %l6 +L(1): addxccc(%g3, %l6, %g3) + addxccc(%g4, %g0, %g4) + addxc( %g5, %g0, %g5) + +L(2): sllx %g1, 16, %l0 + srlx %l0, 16, %l0 C %l0 = %g1 mod 2^48 + srlx %g1, 48, %l3 C %l3 = %g1 div 2^48 + srl %g3, 0, %g1 + sllx %g1, 16, %l4 C %l4 = (%g3 mod 2^32) << 16 + srlx %g3, 32, %l5 C %l5 = %g3 div 2^32 + sethi %hi(0xffff0000), %g1 + andn %g4, %g1, %g1 + sllx %g1, 32, %l6 C %l6 = (%g4 mod 2^16) << 32 + srlx %g4, 16, %l7 C %l7 = %g4 div 2^16 + + add %l0, %l3, %l0 + add %l4, %l5, %l4 + add %l6, %l7, %l6 + + add %l0, %l4, %l0 + add %l6, %g5, %l6 + + add %l0, %l6, %i0 + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm new file mode 100644 index 0000000..494e1d3 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm @@ -0,0 +1,82 @@ +dnl SPARC T3/T4/T5 mpn_modexact_1c_odd. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 30 +C UltraSPARC T4/T5: 26 + +C INPUT PARAMETERS +define(`ap', `%o0') +define(`n', `%o1') +define(`d', `%o2') +define(`cy', `%o3') + +define(`dinv',`%o5') +define(`a0', `%g1') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_modexact_1c_odd) + srlx d, 1, %g1 + and %g1, 127, %g1 + + LEA64(binvert_limb_table, g2, g4) + ldub [%g2+%g1], %g1 + add %g1, %g1, %g2 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + sub %g2, %g1, %g2 + add %g2, %g2, %g1 + mulx %g2, %g2, %g2 + mulx %g2, d, %g2 + sub %g1, %g2, %g1 + add %g1, %g1, %o5 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + sub %o5, %g1, dinv + add n, -1, n + +L(top): ldx [ap], a0 + add ap, 8, ap + subcc a0, cy, %g3 + mulx %g3, dinv, %g5 + umulxhi(d, %g5, %g5) + addxc( %g5, %g0, cy) + brnz,pt n, L(top) + add n, -1, n + + retl + mov cy, %o0 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm new file mode 100644 index 0000000..af05d62 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm @@ -0,0 +1,174 @@ +dnl SPARC v9 mpn_mul_1 for T3/T4/T5. + +dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 23 +C UltraSPARC T4: 3 + +C INPUT PARAMETERS +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`v0', `%i3') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_mul_1) + save %sp, -176, %sp + + and n, 3, %g5 + add n, -4, n + brz %g5, L(b0) + cmp %g5, 2 + bcs %xcc, L(b1) + nop + be %xcc, L(b2) + nop + +L(b3): addcc %g0, %g0, %i5 + ldx [up+0], %l0 + ldx [up+8], %l1 + ldx [up+16], %l2 + mulx %l0, v0, %o0 + umulxhi(%l0, v0, %o1) + brgz n, L(gt3) + add rp, -8, rp + mulx %l1, v0, %o2 + umulxhi(%l1, v0, %o3) + b L(wd3) + nop +L(gt3): ldx [up+24], %l3 + mulx %l1, v0, %o2 + umulxhi(%l1, v0, %o3) + add up, 24, up + b L(lo3) + add n, -3, n + +L(b2): addcc %g0, %g0, %o1 + ldx [up+0], %l1 + ldx [up+8], %l2 + brgz n, L(gt2) + add rp, -16, rp + mulx %l1, v0, %o2 + umulxhi(%l1, v0, %o3) + mulx %l2, v0, %o4 + umulxhi(%l2, v0, %o5) + b L(wd2) + nop +L(gt2): ldx [up+16], %l3 + mulx %l1, v0, %o2 + umulxhi(%l1, v0, %o3) + ldx [up+24], %l0 + mulx %l2, v0, %o4 + umulxhi(%l2, v0, %o5) + add up, 16, up + b L(lo2) + add n, -2, n + +L(b1): addcc %g0, %g0, %o3 + ldx [up+0], %l2 + brgz n, L(gt1) + nop + mulx %l2, v0, %o4 + stx %o4, [rp+0] + umulxhi(%l2, v0, %i0) + ret + restore +L(gt1): ldx [up+8], %l3 + ldx [up+16], %l0 + mulx %l2, v0, %o4 + umulxhi(%l2, v0, %o5) + ldx [up+24], %l1 + mulx %l3, v0, %i4 + umulxhi(%l3, v0, %i5) + add rp, -24, rp + add up, 8, up + b L(lo1) + add n, -1, n + +L(b0): addcc %g0, %g0, %o5 + ldx [up+0], %l3 + ldx [up+8], %l0 + ldx [up+16], %l1 + mulx %l3, v0, %i4 + umulxhi(%l3, v0, %i5) + ldx [up+24], %l2 + mulx %l0, v0, %o0 + umulxhi(%l0, v0, %o1) + b L(lo0) + nop + + ALIGN(16) +L(top): ldx [up+0], %l3 C 0 + addxccc(%i4, %o5, %i4) C 0 + mulx %l1, v0, %o2 C 1 + stx %i4, [rp+0] C 1 + umulxhi(%l1, v0, %o3) C 2 +L(lo3): ldx [up+8], %l0 C 2 + addxccc(%o0, %i5, %o0) C 3 + mulx %l2, v0, %o4 C 3 + stx %o0, [rp+8] C 4 + umulxhi(%l2, v0, %o5) C 4 +L(lo2): ldx [up+16], %l1 C 5 + addxccc(%o2, %o1, %o2) C 5 + mulx %l3, v0, %i4 C 6 + stx %o2, [rp+16] C 6 + umulxhi(%l3, v0, %i5) C 7 +L(lo1): ldx [up+24], %l2 C 7 + addxccc(%o4, %o3, %o4) C 8 + mulx %l0, v0, %o0 C 8 + stx %o4, [rp+24] C 9 + umulxhi(%l0, v0, %o1) C 9 + add rp, 32, rp C 10 +L(lo0): add up, 32, up C 10 + brgz n, L(top) C 11 + add n, -4, n C 11 + +L(end): addxccc(%i4, %o5, %i4) + mulx %l1, v0, %o2 + stx %i4, [rp+0] + umulxhi(%l1, v0, %o3) + addxccc(%o0, %i5, %o0) +L(wd3): mulx %l2, v0, %o4 + stx %o0, [rp+8] + umulxhi(%l2, v0, %o5) + addxccc(%o2, %o1, %o2) +L(wd2): stx %o2, [rp+16] + addxccc(%o4, %o3, %o4) + stx %o4, [rp+24] + addxc( %g0, %o5, %i0) + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm new file mode 100644 index 0000000..de80f3c --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm @@ -0,0 +1,70 @@ +dnl SPARC v9 mpn_popcount for T3/T4. + +dnl Contributed to the GNU project by David Miller. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 15 +C UltraSPARC T4: 2.5 + +C INPUT PARAMETERS +define(`up', `%o0') +define(`n', `%o1') +define(`pcnt', `%o5') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_popcount) + subcc n, 1, n + be L(final_one) + clr pcnt +L(top): + ldx [up + 0], %g1 + sub n, 2, n + ldx [up + 8], %o4 + add up, 16, up + popc %g1, %g2 + popc %o4, %g3 + add pcnt, %g2, pcnt + brgz n, L(top) + add pcnt, %g3, pcnt + brlz,pt n, L(done) + nop +L(final_one): + ldx [up + 0], %g1 + popc %g1, %g2 + add pcnt, %g2, pcnt +L(done): + retl + mov pcnt, %o0 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm new file mode 100644 index 0000000..d46499f --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm @@ -0,0 +1,93 @@ +dnl SPARC v9 mpn_sqr_diag_addlsh1 for T3/T4/T5. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: ? +C UltraSPARC T4: >= 4.5 + + +define(`rp', `%i0') +define(`tp', `%i1') +define(`up', `%i2') +define(`n', `%i3') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_sqr_diag_addlsh1) + save %sp, -176, %sp + + ldx [up+0], %g1 + mulx %g1, %g1, %o0 + umulxhi(%g1, %g1, %g2) + stx %o0, [rp+0] + + ldx [up+8], %g1 + ldx [tp+0], %g4 + ldx [tp+8], %g5 + mulx %g1, %g1, %o0 + orcc %g0, %g0, %o5 + b L(dm) + add n, -2, n + + ALIGN(16) +L(top): ldx [up+8], %g1 + addcc %g4, %o2, %o2 + addxccc(%g5, %o0, %g3) + ldx [tp+16], %g4 + ldx [tp+24], %g5 + mulx %g1, %g1, %o0 + stx %o2, [rp+8] + stx %g3, [rp+16] + add rp, 16, rp + add tp, 16, tp +L(dm): add %g2, %o5, %o2 + umulxhi(%g1, %g1, %g2) + addxccc(%g4, %g4, %g4) + addxccc(%g5, %g5, %g5) + add up, 8, up + addxc( %g0, %g0, %o5) + brnz n, L(top) + add n, -1, n + + addcc %o2, %g4, %g4 + addxccc(%o0, %g5, %g5) + stx %g4, [rp+8] + stx %g5, [rp+16] + addxc( %o5, %g2, %g2) + stx %g2, [rp+24] + + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm new file mode 100644 index 0000000..0e4bc93 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm @@ -0,0 +1,144 @@ +dnl SPARC v9 mpn_sub_n for T3/T4. + +dnl Contributed to the GNU project by David Miller. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 8 +C UltraSPARC T4: 3 + +C INPUT PARAMETERS +define(`rp', `%i0') +define(`up', `%i1') +define(`vp', `%i2') +define(`n', `%i3') +define(`cy', `%i4') + +define(`u0_off', `%l0') +define(`u1_off', `%l1') +define(`v0_off', `%l2') +define(`v1_off', `%l3') +define(`r0_off', `%l4') +define(`r1_off', `%l5') +define(`loop_n', `%l6') +define(`tmp', `%l7') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_sub_nc) + save %sp, -176, %sp + ba,pt %xcc, L(ent) + xor cy, 1, cy +EPILOGUE() +PROLOGUE(mpn_sub_n) + save %sp, -176, %sp + mov 1, cy +L(ent): + subcc n, 1, n + be L(final_one) + cmp %g0, cy + + ldx [up + 0], %o4 + sllx n, 3, tmp + + ldx [vp + 0], %o5 + add up, tmp, u0_off + + ldx [up + 8], %g5 + add vp, tmp, v0_off + + ldx [vp + 8], %g1 + add rp, tmp, r0_off + + neg tmp, loop_n + add u0_off, 8, u1_off + + add v0_off, 8, v1_off + sub loop_n, -(2 * 8), loop_n + + sub r0_off, 16, r0_off + brgez,pn loop_n, L(loop_tail) + sub r0_off, 8, r1_off + + b,a L(top) + ALIGN(16) +L(top): + xnor %o5, 0, tmp + ldx [loop_n + v0_off], %o5 + + addxccc(%o4, tmp, %g3) + ldx [loop_n + u0_off], %o4 + + xnor %g1, 0, %g1 + stx %g3, [loop_n + r0_off] + + addxccc(%g5, %g1, tmp) + ldx [loop_n + v1_off], %g1 + + ldx [loop_n + u1_off], %g5 + sub loop_n, -(2 * 8), loop_n + + brlz loop_n, L(top) + stx tmp, [loop_n + r1_off] + +L(loop_tail): + xnor %o5, 0, tmp + xnor %g1, 0, %g1 + + addxccc(%o4, tmp, %g3) + add loop_n, u0_off, up + + addxccc(%g5, %g1, %g5) + add loop_n, r0_off, rp + + stx %g3, [rp + 0] + add loop_n, v0_off, vp + + brgz,pt loop_n, L(done) + stx %g5, [rp + 8] + + add rp, (2 * 8), rp + +L(final_one): + ldx [up+0], %o4 + ldx [vp+0], %o5 + xnor %o5, %g0, %o5 + addxccc(%o4, %o5, %g3) + stx %g3, [rp+0] + +L(done): + clr %i0 + movcc %xcc, 1, %i0 + ret + restore +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm new file mode 100644 index 0000000..5635d1b --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm @@ -0,0 +1,170 @@ +dnl SPARC v9 mpn_submul_1 for T3/T4/T5. + +dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 26 +C UltraSPARC T4: 4.5 + +C INPUT PARAMETERS +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`v0', `%i3') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_submul_1) + save %sp, -176, %sp + ldx [up+0], %g1 + + and n, 3, %g5 + add n, -4, n + brz %g5, L(b00) + cmp %g5, 2 + bcs %xcc, L(b01) + nop + bne %xcc, L(b11) + ldx [up+8], %g4 + +L(b10): add up, 16, up + addcc %g0, 0, %g3 + mulx %g1, v0, %l4 + umulxhi(%g1, v0, %l5) + ldx [rp+0], %o2 + mulx %g4, v0, %l6 + umulxhi(%g4, v0, %l7) + brlz n, L(wd2) + nop +L(gt2): ldx [up+0], %o0 + b L(lo2) + nop + +L(b00): add rp, -16, rp + addcc %g0, 0, %g3 + ldx [up+8], %o1 + mulx %g1, v0, %l0 + umulxhi(%g1, v0, %l1) + ldx [up+16], %o0 + ldx [rp+16], %o2 + mulx %o1, v0, %l2 + umulxhi(%o1, v0, %l3) + b L(lo0) + nop + +L(b01): add up, 8, up + add rp, -8, rp + addcc %g0, 0, %g3 + ldx [rp+8], %o3 + mulx %g1, v0, %l6 + umulxhi(%g1, v0, %l7) + brlz n, L(wd1) + nop + ldx [up+0], %o0 + ldx [up+8], %o1 + mulx %o0, v0, %l0 + umulxhi(%o0, v0, %l1) + b L(lo1) + nop + +L(b11): add up, 24, up + add rp, 8, rp + addcc %g0, 0, %g3 + mulx %g1, v0, %l2 + umulxhi(%g1, v0, %l3) + ldx [up-8], %o1 + ldx [rp-8], %o3 + mulx %g4, v0, %l4 + umulxhi(%g4, v0, %l5) + brlz n, L(end) + nop + + ALIGN(16) +L(top): ldx [up+0], %o0 + addxccc(%g3, %l2, %g1) + ldx [rp+0], %o2 + addxc( %g0, %l3, %g3) + mulx %o1, v0, %l6 + subcc %o3, %g1, %g4 + umulxhi(%o1, v0, %l7) + stx %g4, [rp-8] +L(lo2): ldx [up+8], %o1 + addxccc(%g3, %l4, %g1) + ldx [rp+8], %o3 + addxc( %g0, %l5, %g3) + mulx %o0, v0, %l0 + subcc %o2, %g1, %g4 + umulxhi(%o0, v0, %l1) + stx %g4, [rp+0] +L(lo1): ldx [up+16], %o0 + addxccc(%g3, %l6, %g1) + ldx [rp+16], %o2 + addxc( %g0, %l7, %g3) + mulx %o1, v0, %l2 + subcc %o3, %g1, %g4 + umulxhi(%o1, v0, %l3) + stx %g4, [rp+8] +L(lo0): ldx [up+24], %o1 + addxccc(%g3, %l0, %g1) + ldx [rp+24], %o3 + addxc( %g0, %l1, %g3) + mulx %o0, v0, %l4 + subcc %o2, %g1, %g4 + umulxhi(%o0, v0, %l5) + stx %g4, [rp+16] + add n, -4, n + add up, 32, up + brgez n, L(top) + add rp, 32, rp + +L(end): addxccc(%g3, %l2, %g1) + ldx [rp+0], %o2 + addxc( %g0, %l3, %g3) + mulx %o1, v0, %l6 + subcc %o3, %g1, %g4 + umulxhi(%o1, v0, %l7) + stx %g4, [rp-8] +L(wd2): addxccc(%g3, %l4, %g1) + ldx [rp+8], %o3 + addxc( %g0, %l5, %g3) + subcc %o2, %g1, %g4 + stx %g4, [rp+0] +L(wd1): addxccc(%g3, %l6, %g1) + addxc( %g0, %l7, %g3) + subcc %o3, %g1, %g4 + stx %g4, [rp+8] + addxc( %g0, %g3, %i0) + ret + restore +EPILOGUE() -- cgit v1.2.3