diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:36:36 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:42:26 +0200 |
commit | a89a14ef5da44684a16b204e7a70460cc8c4922a (patch) | |
tree | b23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/pa32/hppa1_1 | |
parent | 1db63fcedab0b288820d66e100b1877b1a5a8851 (diff) |
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/pa32/hppa1_1')
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm | 106 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h | 72 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm | 102 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm | 83 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm | 201 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm | 95 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm | 92 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm | 84 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm | 207 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm | 60 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm | 115 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm | 102 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm | 47 |
13 files changed, 1366 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm new file mode 100644 index 0000000..ec2f219 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm @@ -0,0 +1,106 @@ +dnl HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the +dnl result to a second limb vector. + +dnl Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s1_ptr r25 +C size r24 +C s2_limb r23 + +C This runs at 11 cycles/limb on a PA7000. With the used instructions, it can +C not become faster due to data cache contention after a store. On the PA7100 +C it runs at 10 cycles/limb. + +C There are some ideas described in mul_1.asm that applies to this code too. + +ASM_START() +PROLOGUE(mpn_addmul_1) +C .callinfo frame=64,no_calls + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) C move s2_limb ... + addib,= -1,%r24,L(just_one_limb) + fldws -16(%r30),%fr4 C ... into fr4 + add %r0,%r0,%r0 C clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 C least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L(end) + ldw -12(%r30),%r1 + +C Main loop +LDEF(loop) + ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L(loop) + ldw -12(%r30),%r1 + +LDEF(end) + ldw 0(%r26),%r29 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +LDEF(just_one_limb) + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + add %r29,%r1,%r19 + stw %r19,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h new file mode 100644 index 0000000..1261b24 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h @@ -0,0 +1,72 @@ +/* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 1999-2002, 2004 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#define GMP_LIMB_BITS 32 +#define GMP_LIMB_BYTES 4 + +/* Generated by tuneup.c, 2004-02-07, gcc 2.8 (pa7100/100MHz) */ + +#define MUL_TOOM22_THRESHOLD 30 +#define MUL_TOOM33_THRESHOLD 89 + +#define SQR_BASECASE_THRESHOLD 4 +#define SQR_TOOM2_THRESHOLD 55 +#define SQR_TOOM3_THRESHOLD 101 + +#define DIV_SB_PREINV_THRESHOLD 0 /* always */ +#define DIV_DC_THRESHOLD 84 +#define POWM_THRESHOLD 166 + +#define HGCD_THRESHOLD 231 +#define GCD_ACCEL_THRESHOLD 3 +#define GCD_DC_THRESHOLD 823 +#define JACOBI_BASE_METHOD 2 + +#define DIVREM_1_NORM_THRESHOLD 5 +#define DIVREM_1_UNNORM_THRESHOLD 11 +#define MOD_1_NORM_THRESHOLD 5 +#define MOD_1_UNNORM_THRESHOLD 10 +#define USE_PREINV_DIVREM_1 1 +#define USE_PREINV_MOD_1 1 +#define DIVREM_2_THRESHOLD 0 /* always */ +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 13 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_THRESHOLD 6589 + +#define MUL_FFT_TABLE { 464, 928, 1920, 4608, 14336, 40960, 0 } +#define MUL_FFT_MODF_THRESHOLD 480 +#define MUL_FFT_THRESHOLD 3328 + +#define SQR_FFT_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 } +#define SQR_FFT_MODF_THRESHOLD 520 +#define SQR_FFT_THRESHOLD 3328 diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm new file mode 100644 index 0000000..6e60c2f --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm @@ -0,0 +1,102 @@ +dnl HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the +dnl result in a second limb vector. + +dnl Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s1_ptr r25 +C size r24 +C s2_limb r23 + +C This runs at 9 cycles/limb on a PA7000. With the used instructions, it can +C not become faster due to data cache contention after a store. On the PA7100 +C it runs at 7 cycles/limb. + +C We could use fldds to read two limbs at a time from the S1 array, and that +C could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and +C PA7100, respectively. We don't do that since it does not seem worth the +C (alignment) troubles... + +C At least the PA7100 is rumored to be able to deal with cache-misses without +C stalling instruction issue. If this is true, and the cache is actually also +C lockup-free, we should use a deeper software pipeline, and load from S1 very +C early! (The loads and stores to -12(sp) will surely be in the cache.) + +ASM_START() +PROLOGUE(mpn_mul_1) +C .callinfo frame=64,no_calls + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) C move s2_limb ... + addib,= -1,%r24,L(just_one_limb) + fldws -16(%r30),%fr4 C ... into fr4 + add %r0,%r0,%r0 C clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 C least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L(end) + ldw -12(%r30),%r1 + +C Main loop +LDEF(loop) + fldws,ma 4(%r25),%fr5 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addib,<> -1,%r24,L(loop) + ldw -12(%r30),%r1 + +LDEF(end) + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +LDEF(just_one_limb) + xmpyu %fr4,%fr5,%fr6 + fstds %fr6,-16(%r30) + ldw -16(%r30),%r28 + ldo -64(%r30),%r30 + bv 0(%r2) + fstws %fr6R,0(%r26) +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm new file mode 100644 index 0000000..b96d403 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm @@ -0,0 +1,83 @@ +dnl HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store +dnl sum in a third limb vector. Optimized for the PA7100, where is runs at +dnl 4.25 cycles/limb. + +dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s1_ptr r25 +C s2_ptr r24 +C size r23 + +ASM_START() +PROLOGUE(mpn_add_n) + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L(rest) + add %r20,%r19,%r28 C add first limbs ignoring cy + +LDEF(loop) + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L(loop) + addc %r20,%r19,%r28 + +LDEF(rest) + addib,= 4,%r23,L(end) + nop + +LDEF(eloop) + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L(eloop) + addc %r20,%r19,%r28 + +LDEF(end) + stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm new file mode 100644 index 0000000..fb16100 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm @@ -0,0 +1,201 @@ +dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and +dnl add the result to a second limb vector. + +dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +define(`res_ptr',`%r26') +define(`s1_ptr',`%r25') +define(`size_param',`%r24') +define(`s2_limb',`%r23') + +define(`cylimb',`%r28') +define(`s0',`%r19') +define(`s1',`%r20') +define(`s2',`%r3') +define(`s3',`%r4') +define(`lo0',`%r21') +define(`lo1',`%r5') +define(`lo2',`%r6') +define(`lo3',`%r7') +define(`hi0',`%r22') +define(`hi1',`%r23') C safe to reuse +define(`hi2',`%r29') +define(`hi3',`%r1') + +ASM_START() +PROLOGUE(mpn_addmul_1) +C .callinfo frame=128,no_calls + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb C clear cy and cylimb + addib,< -4,size_param,L(few_limbs) + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L(0) + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + add s0,lo0,s0 + addib,< -1,size_param,L(few_limbs) + stws,ma s0,4(res_ptr) + +C start software pipeline ---------------------------------------------------- +LDEF(0) + fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size_param,L(end) + addc %r0,hi3,cylimb C propagate carry into cylimb +C main loop ------------------------------------------------------------------ +LDEF(loop) + fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + add s0,lo0,s0 + fstds %fr6,-8(%r31) + addc s1,lo1,s1 + fstds %fr9,0(%r31) + addc s2,lo2,s2 + fstds %fr10,8(%r31) + addc s3,lo3,s3 + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size_param,L(loop) + addc %r0,hi3,cylimb C propagate carry into cylimb +C finish software pipeline --------------------------------------------------- +LDEF(end) + ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addc s1,lo1,s1 + stws,ma s1,4(res_ptr) + addc s2,lo2,s2 + stws,ma s2,4(res_ptr) + addc s3,lo3,s3 + stws,ma s3,4(res_ptr) + +C restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +LDEF(few_limbs) + addib,=,n 4,size_param,L(ret) + +LDEF(loop2) + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addib,<> -1,size_param,L(loop2) + nop + +LDEF(ret) + addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 +EPILOGUE(mpn_addmul_1) diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm new file mode 100644 index 0000000..d65db2a --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm @@ -0,0 +1,95 @@ +dnl HP-PA mpn_lshift -- Shift a number left. +dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb. + +dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s_ptr r25 +C size r24 +C cnt r23 + +ASM_START() +PROLOGUE(mpn_lshift) + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L(0004) + vshd %r0,%r22,%r28 C compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,<= -5,%r24,L(rest) + vshd %r22,%r29,%r20 + +LDEF(loop) + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + vshd %r22,%r29,%r20 + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -4,%r24,L(loop) + vshd %r22,%r29,%r20 + +LDEF(rest) + addib,= 4,%r24,L(end1) + nop + +LDEF(eloop) + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,<= -1,%r24,L(end2) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -1,%r24,L(eloop) + vshd %r22,%r29,%r20 + +LDEF(end1) + stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + +LDEF(end2) + stws,mb %r20,-4(0,%r26) + +LDEF(0004) + vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm new file mode 100644 index 0000000..f7896fc --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm @@ -0,0 +1,92 @@ +dnl HP-PA mpn_rshift -- Shift a number right. +dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb. + +dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s_ptr r25 +C size r24 +C cnt r23 + +ASM_START() +PROLOGUE(mpn_rshift) + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L(0004) + vshd %r22,%r0,%r28 C compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,<= -5,%r24,L(rest) + vshd %r29,%r22,%r20 + +LDEF(loop) + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + vshd %r29,%r22,%r20 + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -4,%r24,L(loop) + vshd %r29,%r22,%r20 + +LDEF(rest) + addib,= 4,%r24,L(end1) + nop + +LDEF(eloop) + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,<= -1,%r24,L(end2) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -1,%r24,L(eloop) + vshd %r29,%r22,%r20 + +LDEF(end1) + stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + +LDEF(end2) + stws,ma %r20,4(0,%r26) + +LDEF(0004) + vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm new file mode 100644 index 0000000..df3f6e8 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm @@ -0,0 +1,84 @@ +dnl HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +dnl store difference in a third limb vector. Optimized for the PA7100, where +dnl is runs at 4.25 cycles/limb. + +dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s1_ptr r25 +C s2_ptr r24 +C size r23 + +ASM_START() +PROLOGUE(mpn_sub_n) + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L(rest) + sub %r20,%r19,%r28 C subtract first limbs ignoring cy + +LDEF(loop) + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L(loop) + subb %r20,%r19,%r28 + +LDEF(rest) + addib,= 4,%r23,L(end) + nop + +LDEF(eloop) + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L(eloop) + subb %r20,%r19,%r28 + +LDEF(end) + stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm new file mode 100644 index 0000000..5ea08cb --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm @@ -0,0 +1,207 @@ +dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and +dnl subtract the result from a second limb vector. + +dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +define(`res_ptr',`%r26') +define(`s1_ptr',`%r25') +define(`size_param',`%r24') +define(`s2_limb',`%r23') + +define(`cylimb',`%r28') +define(`s0',`%r19') +define(`s1',`%r20') +define(`s2',`%r3') +define(`s3',`%r4') +define(`lo0',`%r21') +define(`lo1',`%r5') +define(`lo2',`%r6') +define(`lo3',`%r7') +define(`hi0',`%r22') +define(`hi1',`%r23') C safe to reuse +define(`hi2',`%r29') +define(`hi3',`%r1') + +ASM_START() +PROLOGUE(mpn_submul_1) +C .callinfo frame=128,no_calls + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb C clear cy and cylimb + addib,< -4,size_param,L(few_limbs) + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L(0) + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + sub s0,lo0,s0 + add s0,lo0,%r0 C invert cy + addib,< -1,size_param,L(few_limbs) + stws,ma s0,4(res_ptr) + +C start software pipeline ---------------------------------------------------- +LDEF(0) + fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size_param,L(end) + addc %r0,hi3,cylimb C propagate carry into cylimb +C main loop ------------------------------------------------------------------ +LDEF(loop) + fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + sub s0,lo0,s0 + fstds %fr6,-8(%r31) + subb s1,lo1,s1 + fstds %fr9,0(%r31) + subb s2,lo2,s2 + fstds %fr10,8(%r31) + subb s3,lo3,s3 + subb %r0,%r0,lo0 C these two insns ... + add lo0,lo0,%r0 C ... just invert cy + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size_param,L(loop) + addc %r0,hi3,cylimb C propagate carry into cylimb +C finish software pipeline --------------------------------------------------- +LDEF(end) + ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + sub s0,lo0,s0 + stws,ma s0,4(res_ptr) + subb s1,lo1,s1 + stws,ma s1,4(res_ptr) + subb s2,lo2,s2 + stws,ma s2,4(res_ptr) + subb s3,lo3,s3 + stws,ma s3,4(res_ptr) + subb %r0,%r0,lo0 C these two insns ... + add lo0,lo0,%r0 C ... invert cy + +C restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +LDEF(few_limbs) + addib,=,n 4,size_param,L(ret) + +LDEF(loop2) + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + sub s0,lo0,s0 + add s0,lo0,%r0 C invert cy + stws,ma s0,4(res_ptr) + addib,<> -1,size_param,L(loop2) + nop + +LDEF(ret) + addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 +EPILOGUE(mpn_submul_1) diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm new file mode 100644 index 0000000..1c7a18e --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm @@ -0,0 +1,60 @@ +dnl HP-PA 1.1 32-bit mpn_sqr_diagonal. + +dnl Copyright 2001, 2002 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C This code runs at 6 cycles/limb on the PA7100 and 2.5 cycles/limb on PA8x00. +C 2-way unrolling wouldn't help the PA7100; it could however bring times down +C to 2.0 cycles/limb for the PA8x00. + +C INPUT PARAMETERS +define(`rp',`%r26') +define(`up',`%r25') +define(`n',`%r24') + +ASM_START() +PROLOGUE(mpn_sqr_diagonal) + ldo 4(rp),rp + fldws,ma 4(up),%fr4r + addib,= -1,n,L(exit) + xmpyu %fr4r,%fr4r,%fr5 + +LDEF(loop) + fldws,ma 4(up),%fr4r + fstws %fr5r,-4(rp) + fstws,ma %fr5l,8(rp) + addib,<> -1,n,L(loop) + xmpyu %fr4r,%fr4r,%fr5 + +LDEF(exit) + fstws %fr5r,-4(rp) + bv 0(%r2) + fstws %fr5l,0(rp) +EPILOGUE(mpn_sqr_diagonal) diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm new file mode 100644 index 0000000..a9b11d2 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm @@ -0,0 +1,115 @@ +dnl HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract +dnl the result from a second limb vector. + +dnl Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr r26 +C s1_ptr r25 +C size r24 +C s2_limb r23 + +C This runs at 12 cycles/limb on a PA7000. With the used instructions, it can +C not become faster due to data cache contention after a store. On the PA7100 +C it runs at 11 cycles/limb. + +C There are some ideas described in mul_1.asm that applies to this code too. + +C It seems possible to make this run as fast as mpn_addmul_1, if we use +C sub,>>= %r29,%r19,%r22 +C addi 1,%r28,%r28 +C but that requires reworking the hairy software pipeline... + +ASM_START() +PROLOGUE(mpn_submul_1) +C .callinfo frame=64,no_calls + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) C move s2_limb ... + addib,= -1,%r24,L(just_one_limb) + fldws -16(%r30),%fr4 C ... into fr4 + add %r0,%r0,%r0 C clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 C least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L(end) + ldw -12(%r30),%r1 + +C Main loop +LDEF(loop) + ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L(loop) + ldw -12(%r30),%r1 + +LDEF(end) + ldw 0(%r26),%r29 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +LDEF(just_one_limb) + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + sub %r29,%r1,%r22 + add %r22,%r1,%r0 + stw %r22,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm new file mode 100644 index 0000000..626ecd2 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm @@ -0,0 +1,102 @@ +dnl HP-PA __udiv_qrnnd division support, used from longlong.h. +dnl This version runs fast on PA 7000 and later. + +dnl Copyright 1993, 1994, 2000, 2001, 2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C INPUT PARAMETERS +C rem_ptr gr26 +C n1 gr25 +C n0 gr24 +C d gr23 + +C This file has caused a lot of trouble, since it demands PIC reference to +C static data, which triggers bugs in gas (at least version 2.7 through +C 2.11.2). When the bug is triggered, many bogus relocs are generated. The +C current solution is to stuff data right into the code, and refer it using +C absolute offsets. Fragile to be sure, but nothing else seems to work. + +ASM_START() +ifdef(`PIC',`', +` RODATA + INT64(0000, 0x43f00000, 0x0) C 2^64 +') + +PROLOGUE(mpn_udiv_qrnnd) +C .callinfo frame=64,no_calls + + ldo 64(%r30),%r30 + + stws %r25,-16(0,%r30) C n_hi + stws %r24,-12(0,%r30) C n_lo + +ifdef(`PIC', +` bl .+20,%r31 + dep %r0,31,2,%r31 + .word 0x0 C padding for alignment + .word 0x43f00000, 0x0 C 2^64 + ldo 4(%r31),%r31', +` ldil `L'%L(0000),%r31 + ldo R%L(0000)(%r31),%r31') + + fldds -16(0,%r30),%fr5 + stws %r23,-12(0,%r30) + comib,<= 0,%r25,L(1) + fcnvxf,dbl,dbl %fr5,%fr5 + fldds 0(0,%r31),%fr4 + fadd,dbl %fr4,%fr5,%fr5 + +LDEF(1) + fcpy,sgl %fr0,%fr6L + fldws -12(0,%r30),%fr6R + fcnvxf,dbl,dbl %fr6,%fr4 + + fdiv,dbl %fr5,%fr4,%fr5 + + fcnvfx,dbl,dbl %fr5,%fr4 + fstws %fr4R,-16(%r30) + xmpyu %fr4R,%fr6R,%fr6 + ldws -16(%r30),%r28 + fstds %fr6,-16(0,%r30) + ldws -12(0,%r30),%r21 + ldws -16(0,%r30),%r20 + sub %r24,%r21,%r22 + subb %r25,%r20,%r20 + comib,= 0,%r20,L(2) + ldo -64(%r30),%r30 + + add %r22,%r23,%r22 + ldo -1(%r28),%r28 + +LDEF(2) + bv 0(%r2) + stws %r22,0(0,%r26) + +EPILOGUE(mpn_udiv_qrnnd) diff --git a/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm new file mode 100644 index 0000000..18b923c --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm @@ -0,0 +1,47 @@ +dnl Copyright 1999, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_umul_ppmm) +C .callinfo frame=64,no_calls + + ldo 64(%r30),%r30 + stw %r25,-16(0,%r30) + fldws -16(0,%r30),%fr22R + stw %r24,-16(0,%r30) + fldws -16(0,%r30),%fr22L + xmpyu %fr22R,%fr22L,%fr22 + fstds %fr22,-16(0,%r30) + ldw -16(0,%r30),%r28 + ldw -12(0,%r30),%r29 + stw %r29,0(0,%r26) + bv 0(%r2) + ldo -64(%r30),%r30 +EPILOGUE() |