From a89a14ef5da44684a16b204e7a70460cc8c4922a Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 21 Jun 2024 23:36:36 +0200 Subject: Basic constant folding implementation --- .../gmp-6.3.0/mpn/powerpc64/mode64/invert_limb.asm | 88 ++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 vendor/gmp-6.3.0/mpn/powerpc64/mode64/invert_limb.asm (limited to 'vendor/gmp-6.3.0/mpn/powerpc64/mode64/invert_limb.asm') diff --git a/vendor/gmp-6.3.0/mpn/powerpc64/mode64/invert_limb.asm b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/invert_limb.asm new file mode 100644 index 0000000..dfdba64 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/powerpc64/mode64/invert_limb.asm @@ -0,0 +1,88 @@ +dnl PowerPC-64 mpn_invert_limb -- Invert a normalized limb. + +dnl Copyright 2004-2006, 2008, 2010, 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb (approximate) +C POWER3/PPC630 80 +C POWER4/PPC970 86 +C POWER5 86 +C POWER6 170 +C POWER7 66 + +ASM_START() +PROLOGUE(mpn_invert_limb,toc) + LEAL( r12, approx_tab) + srdi r9, r3, 32 + rlwinm r9, r9, 10, 23, 30 C (d >> 55) & 0x1fe + srdi r10, r3, 24 C d >> 24 + lis r11, 0x1000 + rldicl r8, r3, 0, 63 C d mod 2 + addi r10, r10, 1 C d40 + sldi r11, r11, 32 C 2^60 + srdi r7, r3, 1 C d/2 + add r7, r7, r8 C d63 = ceil(d/2) + neg r8, r8 C mask = -(d mod 2) + lhzx r0, r9, r12 + mullw r9, r0, r0 C v0*v0 + sldi r6, r0, 11 C v0 << 11 + addi r0, r6, -1 C (v0 << 11) - 1 + mulld r9, r9, r10 C v0*v0*d40 + srdi r9, r9, 40 C v0*v0*d40 >> 40 + subf r9, r9, r0 C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1 + mulld r0, r9, r10 C v1*d40 + sldi r6, r9, 13 C v1 << 13 + subf r0, r0, r11 C 2^60 - v1*d40 + mulld r0, r0, r9 C v1 * (2^60 - v1*d40) + srdi r0, r0, 47 C v1 * (2^60 - v1*d40) >> 47 + add r0, r0, r6 C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47) + mulld r11, r0, r7 C v2 * d63 + srdi r10, r0, 1 C v2 >> 1 + sldi r9, r0, 31 C v2 << 31 + and r8, r10, r8 C (v2 >> 1) & mask + subf r8, r11, r8 C ((v2 >> 1) & mask) - v2 * d63 + mulhdu r0, r8, r0 C p1 = v2 * (((v2 >> 1) & mask) - v2 * d63) + srdi r0, r0, 1 C p1 >> 1 + add r0, r0, r9 C v3 = (v2 << 31) + (p1 >> 1) + nop + mulld r11, r0, r3 + mulhdu r9, r0, r3 + addc r10, r11, r3 + adde r3, r9, r3 + subf r3, r3, r0 + blr +EPILOGUE() + +DEF_OBJECT(approx_tab) +forloop(i,256,512-1,dnl +` .short eval(0x7fd00/i) +')dnl +END_OBJECT(approx_tab) +ASM_END() -- cgit v1.2.3