aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-21 23:36:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-21 23:42:26 +0200
commita89a14ef5da44684a16b204e7a70460cc8c4922a (patch)
treeb23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7
parent1db63fcedab0b288820d66e100b1877b1a5a8851 (diff)
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7')
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aormul_2.asm135
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aors_n.asm128
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm43
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm43
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm129
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_11.asm67
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm146
-rw-r--r--vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gmp-mparam.h175
8 files changed, 866 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aormul_2.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aormul_2.asm
new file mode 100644
index 0000000..8731e01
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aormul_2.asm
@@ -0,0 +1,135 @@
+dnl PowerPC-64 mpn_mul_2 and mpn_addmul_2.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb
+C mul_2 addmul_2
+C POWER3/PPC630 ? ?
+C POWER4/PPC970 ? ?
+C POWER5 ? ?
+C POWER6 ? ?
+C POWER7-SMT4 3 3
+C POWER7-SMT2 ? ?
+C POWER7-SMT1 ? ?
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vp', `r6')
+
+define(`cy0', `r10')
+ifdef(`EXTRA_REGISTER',
+` define(`cy1', EXTRA_REGISTER)',
+` define(`cy1', `r31')')
+
+ifdef(`OPERATION_mul_2',`
+ define(`AM', `')
+ define(`ADDX', `addc')
+ define(`func', `mpn_mul_2')
+')
+ifdef(`OPERATION_addmul_2',`
+ define(`AM', `$1')
+ define(`ADDX', `adde')
+ define(`func', `mpn_addmul_2')
+')
+
+MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2)
+
+ASM_START()
+PROLOGUE(func)
+
+ifdef(`EXTRA_REGISTER',,`
+ std r31, -8(r1)
+')
+ andi. r12, n, 1
+ addi r0, n, 1
+ srdi r0, r0, 1
+ mtctr r0
+ ld r11, 0(vp) C v0
+ li cy0, 0
+ ld r12, 8(vp) C v1
+ li cy1, 0
+ ld r5, 0(up)
+ beq L(lo0)
+ addi up, up, -8
+ addi rp, rp, -8
+ b L(lo1)
+
+ ALIGN(32)
+L(top):
+AM(` ld r0, -8(rp)')
+ ld r5, 0(up)
+AM(` addc r6, r6, r0')
+ ADDX r7, r7, r8
+ addze r9, r9
+ addc r6, r6, cy0
+ adde cy0, r7, cy1
+ std r6, -8(rp)
+ addze cy1, r9
+L(lo0): mulld r6, r11, r5 C v0 * u[i] weight 0
+ mulhdu r7, r11, r5 C v0 * u[i] weight 1
+ mulld r8, r12, r5 C v1 * u[i] weight 1
+ mulhdu r9, r12, r5 C v1 * u[i] weight 2
+AM(` ld r0, 0(rp)')
+ ld r5, 8(up)
+AM(` addc r6, r6, r0')
+ ADDX r7, r7, r8
+ addze r9, r9
+ addc r6, r6, cy0
+ adde cy0, r7, cy1
+ std r6, 0(rp)
+ addze cy1, r9
+L(lo1): mulld r6, r11, r5 C v0 * u[i] weight 0
+ mulhdu r7, r11, r5 C v0 * u[i] weight 1
+ addi up, up, 16
+ addi rp, rp, 16
+ mulld r8, r12, r5 C v1 * u[i] weight 1
+ mulhdu r9, r12, r5 C v1 * u[i] weight 2
+ bdnz L(top)
+
+L(end):
+AM(` ld r0, -8(rp)')
+AM(` addc r6, r6, r0')
+ ADDX r7, r7, r8
+ addze r9, r9
+ addc r6, r6, cy0
+ std r6, -8(rp)
+ adde cy0, r7, cy1
+ addze cy1, r9
+ std cy0, 0(rp)
+ mr r3, cy1
+
+ifdef(`EXTRA_REGISTER',,`
+ ld r31, -8(r1)
+')
+ blr
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aors_n.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aors_n.asm
new file mode 100644
index 0000000..857c701
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aors_n.asm
@@ -0,0 +1,128 @@
+dnl PowerPC-64 mpn_add_n, mpn_sub_n optimised for POWER7.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 ?
+C POWER6 ?
+C POWER7 2.18
+
+C This is a tad bit slower than the cnd_aors_n.asm code, which is of course an
+C anomaly.
+
+ifdef(`OPERATION_add_n',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)
+ define(GENRVAL, `addi r3, r3, 1')
+ define(SETCBR, `addic r0, $1, -1')
+ define(CLRCB, `addic r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)
+ define(GENRVAL, `neg r3, r3')
+ define(SETCBR, `subfic r0, $1, 0')
+ define(CLRCB, `addic r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n', `r6')
+
+ASM_START()
+PROLOGUE(func_nc)
+ SETCBR(r7)
+ b L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+ CLRCB
+L(ent):
+ andi. r7, n, 1
+ beq L(bx0)
+
+L(bx1): ld r7, 0(up)
+ ld r9, 0(vp)
+ ADDSUBC r11, r9, r7
+ std r11, 0(rp)
+ cmpldi cr6, n, 1
+ beq cr6, L(end)
+ addi up, up, 8
+ addi vp, vp, 8
+ addi rp, rp, 8
+
+L(bx0): addi r0, n, 2 C compute branch...
+ srdi r0, r0, 2 C ...count
+ mtctr r0
+
+ andi. r7, n, 2
+ bne L(mid)
+
+ addi up, up, 16
+ addi vp, vp, 16
+ addi rp, rp, 16
+
+ ALIGN(32)
+L(top): ld r6, -16(up)
+ ld r7, -8(up)
+ ld r8, -16(vp)
+ ld r9, -8(vp)
+ ADDSUBC r10, r8, r6
+ ADDSUBC r11, r9, r7
+ std r10, -16(rp)
+ std r11, -8(rp)
+L(mid): ld r6, 0(up)
+ ld r7, 8(up)
+ ld r8, 0(vp)
+ ld r9, 8(vp)
+ ADDSUBC r10, r8, r6
+ ADDSUBC r11, r9, r7
+ std r10, 0(rp)
+ std r11, 8(rp)
+ addi up, up, 32
+ addi vp, vp, 32
+ addi rp, rp, 32
+ bdnz L(top)
+
+L(end): subfe r3, r0, r0 C -cy
+ GENRVAL
+ blr
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm
new file mode 100644
index 0000000..ddf5fd8
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm
@@ -0,0 +1,43 @@
+dnl PowerPC-64 mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsblsh1_n.
+
+dnl Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
+
+include_mpn(`powerpc64/mode64/p7/aorsorrlshC_n.asm')
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm
new file mode 100644
index 0000000..3f9d88d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm
@@ -0,0 +1,43 @@
+dnl PowerPC-64 mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n.
+
+dnl Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
+
+include_mpn(`powerpc64/mode64/p7/aorsorrlshC_n.asm')
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm
new file mode 100644
index 0000000..5251202
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm
@@ -0,0 +1,129 @@
+dnl PowerPC-64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n.
+
+dnl Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 ?
+C POWER6 ?
+C POWER7 2.5
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n', `r6')
+
+ifdef(`DO_add', `
+ define(`ADDSUBC', `addc $1, $2, $3')
+ define(`ADDSUBE', `adde $1, $2, $3')
+ define(INITCY, `addic $1, r1, 0')
+ define(RETVAL, `addze r3, $1')
+ define(`func', mpn_addlsh`'LSH`'_n)')
+ifdef(`DO_sub', `
+ define(`ADDSUBC', `subfc $1, $2, $3')
+ define(`ADDSUBE', `subfe $1, $2, $3')
+ define(INITCY, `addic $1, r1, -1')
+ define(RETVAL, `subfze r3, $1
+ neg r3, r3')
+ define(`func', mpn_sublsh`'LSH`'_n)')
+ifdef(`DO_rsb', `
+ define(`ADDSUBC', `subfc $1, $3, $2')
+ define(`ADDSUBE', `subfe $1, $3, $2')
+ define(INITCY, `addic $1, r1, -1')
+ define(RETVAL, `addme r3, $1')
+ define(`func', mpn_rsblsh`'LSH`'_n)')
+
+define(`s0', `r0') define(`s1', `r9')
+define(`u0', `r6') define(`u1', `r7')
+define(`v0', `r10') define(`v1', `r11')
+
+
+ASM_START()
+PROLOGUE(func)
+ rldic r7, n, 3, 59
+ add up, up, r7
+ add vp, vp, r7
+ add rp, rp, r7
+
+ifdef(`DO_add', `
+ addic r0, n, 3 C set cy flag as side effect
+',`
+ subfc r0, r0, r0 C set cy flag
+ addi r0, n, 3
+')
+ srdi r0, r0, 2
+ mtctr r0
+
+ andi. r0, n, 1
+ beq L(bx0)
+
+L(bx1): andi. r0, n, 2
+ li s0, 0
+ bne L(lo3)
+ b L(lo1)
+
+L(bx0): andi. r0, n, 2
+ li s1, 0
+ bne L(lo2)
+
+ ALIGN(32)
+L(top): addi rp, rp, 32
+ ld v0, 0(vp)
+ addi vp, vp, 32
+ rldimi s1, v0, LSH, 0
+ ld u0, 0(up)
+ addi up, up, 32
+ srdi s0, v0, RSH
+ ADDSUBE(s1, s1, u0)
+ std s1, -32(rp)
+L(lo3): ld v1, -24(vp)
+ rldimi s0, v1, LSH, 0
+ ld u1, -24(up)
+ srdi s1, v1, RSH
+ ADDSUBE(s0, s0, u1)
+ std s0, -24(rp)
+L(lo2): ld v0, -16(vp)
+ rldimi s1, v0, LSH, 0
+ ld u0, -16(up)
+ srdi s0, v0, RSH
+ ADDSUBE(s1, s1, u0)
+ std s1, -16(rp)
+L(lo1): ld v1, -8(vp)
+ rldimi s0, v1, LSH, 0
+ ld u1, -8(up)
+ srdi s1, v1, RSH
+ ADDSUBE(s0, s0, u1)
+ std s0, -8(rp)
+ bdnz L(top) C decrement CTR and loop back
+
+ RETVAL( s1)
+ blr
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_11.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_11.asm
new file mode 100644
index 0000000..f04e896
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_11.asm
@@ -0,0 +1,67 @@
+dnl PowerPC-64 mpn_gcd_11.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C POWER3/PPC630 -
+C POWER4/PPC970 -
+C POWER5 -
+C POWER6 -
+C POWER7 7.6 obsolete
+C POWER8 ?
+C POWER9 ?
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+C INPUT PARAMETERS
+define(`u0', `r3')
+define(`v0', `r4')
+
+define(`cnt', `r9')dnl
+
+ASM_START()
+PROLOGUE(mpn_gcd_11)
+ li r12, 63
+ b L(odd)
+
+ ALIGN(16)
+L(top): and r8, r11, r10 C isolate lsb
+ cntlzd cnt, r8
+ isel v0, u0, v0, 29 C v = min(u,v)
+ isel u0, r10, r11, 29 C u = |u - v|
+ subf cnt, cnt, r12 C cnt = 63-cnt
+ srd u0, u0, cnt
+L(odd): cmpld cr7, v0, u0
+ subf r10, u0, v0 C r10 = v - u
+ subf r11, v0, u0 C r11 = u - v
+ bne cr7, L(top)
+
+L(end): blr
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm
new file mode 100644
index 0000000..ade30e4
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm
@@ -0,0 +1,146 @@
+dnl PowerPC-64 mpn_gcd_22 optimised for POWER7 and POWER8.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011-2013, 2019 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C POWER3/PPC630 -
+C POWER4/PPC970 -
+C POWER5 -
+C POWER6 -
+C POWER7 12.3
+C POWER8 13.4
+C POWER9 10.6
+
+C We define SLOW if this target uses a slow struct return mechanism, with
+C r3 as an implicit parameter for the struct pointer.
+undefine(`SLOW')dnl
+ifdef(`AIX',`define(`SLOW',`due to AIX')',`
+ ifdef(`DARWIN',,`
+ ifdef(`ELFv2_ABI',,`define(`SLOW',`due to ELFv1')')dnl
+ ')
+')
+
+ifdef(`SLOW',`
+define(`IFSLOW', `$1')
+define(`u1', `r4')
+define(`u0', `r5')
+define(`v1', `r6')
+define(`v0', `r7')
+',`
+define(`IFSLOW', `')
+define(`u1', `r3')
+define(`u0', `r4')
+define(`v1', `r5')
+define(`v0', `r6')
+')
+
+define(`tmp', `r0')
+define(`t0', `r8')
+define(`t1', `r9')
+define(`s0', `r10')
+define(`s1', `r11')
+define(`cnt', `r12')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+L(top): subfc. t0, v0, u0 C 0 12
+ beq cr0, L(lowz)
+ subfe t1, v1, u1 C 2 14
+ subfe. tmp, tmp, tmp C 4 set cr0 from the carry bit
+ subfc s0, u0, v0 C 0
+ subfe s1, u1, v1 C 2
+
+L(bck): and tmp, s0, t0 C 2
+ cntlzd cnt, tmp C 4
+ addi tmp, cnt, 1 C 6
+ subfic cnt, cnt, 63 C 6
+
+ isel v0, v0, u0, 2 C 6 use condition set by subfe
+ isel v1, v1, u1, 2 C 6
+ isel u0, t0, s0, 2 C 6
+ isel u1, t1, s1, 2 C 6
+
+ srd u0, u0, cnt C 8
+ sld tmp, u1, tmp C 8
+ srd u1, u1, cnt C 8
+ or u0, u0, tmp C 10
+
+ or. r0, u1, v1 C 10
+ bne L(top)
+
+
+ li r0, 63
+ b L(odd)
+ ALIGN(16)
+L(top1):isel v0, u0, v0, 29 C v = min(u,v)
+ isel u0, r10, r11, 29 C u = |u - v|
+ subf cnt, cnt, r0 C cnt = 63-cnt
+ srd u0, u0, cnt
+L(odd): subf r10, u0, v0 C r10 = v - u
+ subf r11, v0, u0 C r11 = u - v
+ cmpld cr7, v0, u0
+ and r8, r11, r10 C isolate lsb
+ cntlzd cnt, r8
+ bne cr7, L(top1)
+
+ifdef(`SLOW',`
+ std v0, 0(r3)
+ std r10, 8(r3) C zero
+',`
+ mr r3, v0
+ li r4, 0
+')
+ blr
+
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ subfc. t0, v1, u1 C 2 8
+ beq L(end)
+ li t1, 0
+ subfe. tmp, tmp, tmp C 4 set cr0 from the carry bit
+ subf s0, u1, v1 C 2
+ li s1, 0
+ b L(bck)
+
+L(end):
+ifdef(`SLOW',`
+ std v0, 0(r3)
+ std v1, 8(r3)
+ blr
+',`
+ mr r3, v0
+ mr r4, v1
+ blr
+')
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gmp-mparam.h
new file mode 100644
index 0000000..9da4080
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/p7/gmp-mparam.h
@@ -0,0 +1,175 @@
+/* POWER7 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 3720 MHz POWER7/SMT4 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-10-02, gcc 4.8 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 16
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
+#define USE_PREINV_DIVREM_1 0
+/* From gcc110.osuosl.org, 2023-07-27 */
+#define DIV_QR_1N_PI1_METHOD 3 /* 8.45% faster than 4 */
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 27
+
+#define DIV_1_VS_MUL_1_PERCENT 341
+
+#define MUL_TOOM22_THRESHOLD 22
+#define MUL_TOOM33_THRESHOLD 71
+#define MUL_TOOM44_THRESHOLD 196
+#define MUL_TOOM6H_THRESHOLD 298
+#define MUL_TOOM8H_THRESHOLD 406
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 140
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 139
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 120
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 32
+#define SQR_TOOM3_THRESHOLD 105
+#define SQR_TOOM4_THRESHOLD 190
+#define SQR_TOOM6_THRESHOLD 318
+#define SQR_TOOM8_THRESHOLD 547
+
+#define MULMID_TOOM42_THRESHOLD 56
+
+#define MULMOD_BNM1_THRESHOLD 18
+#define SQRMOD_BNM1_THRESHOLD 20
+
+#define MUL_FFT_MODF_THRESHOLD 436 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 436, 5}, { 21, 6}, { 21, 7}, { 11, 6}, \
+ { 23, 7}, { 12, 6}, { 25, 7}, { 21, 8}, \
+ { 11, 7}, { 25, 8}, { 13, 7}, { 28, 8}, \
+ { 15, 7}, { 33, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 21, 9}, { 11, 8}, \
+ { 29, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 43,10}, \
+ { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
+ { 95,10}, { 55,11}, { 31,10}, { 63, 9}, \
+ { 127,10}, { 79,11}, { 47,10}, { 103,12}, \
+ { 31,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 159,11}, { 95,10}, { 191, 9}, { 383,11}, \
+ { 111,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,11}, { 143,10}, { 287, 9}, { 575,11}, \
+ { 159,10}, { 319,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 83
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 368 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 368, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 25, 8}, \
+ { 13, 7}, { 28, 8}, { 15, 7}, { 32, 8}, \
+ { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 9}, { 11, 8}, { 29, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 31, 8}, \
+ { 63, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
+ { 31,10}, { 79,11}, { 47,10}, { 95,12}, \
+ { 31,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
+ { 383,11}, { 111,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319, 9}, \
+ { 639,12}, { 95,11}, { 191,10}, { 383, 9}, \
+ { 767,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 84
+#define SQR_FFT_THRESHOLD 3264
+
+#define MULLO_BASECASE_THRESHOLD 3
+#define MULLO_DC_THRESHOLD 35
+#define MULLO_MUL_N_THRESHOLD 9449
+#define SQRLO_BASECASE_THRESHOLD 3
+#define SQRLO_DC_THRESHOLD 119
+#define SQRLO_SQR_THRESHOLD 6440
+
+#define DC_DIV_QR_THRESHOLD 33
+#define DC_DIVAPPR_Q_THRESHOLD 124
+#define DC_BDIV_QR_THRESHOLD 62
+#define DC_BDIV_Q_THRESHOLD 144
+
+#define INV_MULMOD_BNM1_THRESHOLD 67
+#define INV_NEWTON_THRESHOLD 123
+#define INV_APPR_THRESHOLD 123
+
+#define BINV_NEWTON_THRESHOLD 284
+#define REDC_1_TO_REDC_2_THRESHOLD 18
+#define REDC_2_TO_REDC_N_THRESHOLD 109
+
+#define MU_DIV_QR_THRESHOLD 1387
+#define MU_DIVAPPR_Q_THRESHOLD 1334
+#define MUPI_DIV_QR_THRESHOLD 50
+#define MU_BDIV_QR_THRESHOLD 1308
+#define MU_BDIV_Q_THRESHOLD 1499
+
+#define POWM_SEC_TABLE 1,23,121,579,642
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 18
+#define SET_STR_DC_THRESHOLD 1562
+#define SET_STR_PRECOMPUTE_THRESHOLD 3100
+
+#define FAC_DSC_THRESHOLD 774
+#define FAC_ODD_THRESHOLD 25
+
+#define MATRIX22_STRASSEN_THRESHOLD 18
+#define HGCD2_DIV1_METHOD 5 /* 3.27% faster than 3 */
+#define HGCD_THRESHOLD 118
+#define HGCD_APPR_THRESHOLD 150
+#define HGCD_REDUCE_THRESHOLD 3014
+#define GCD_DC_THRESHOLD 386
+#define GCDEXT_DC_THRESHOLD 365
+#define JACOBI_BASE_METHOD 4 /* 27.64% faster than 1 */