diff options
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/s390_32/esame')
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm | 72 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm | 137 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm | 173 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm | 65 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h | 177 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm | 66 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm | 130 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm | 203 | ||||
-rw-r--r-- | vendor/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm | 70 |
9 files changed, 1093 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm new file mode 100644 index 0000000..4375b74 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm @@ -0,0 +1,72 @@ +dnl S/390-32 mpn_addmul_1 for systems with MLR instruction + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 18.5 +C z990 10 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`v0', `%r5') + +define(`z', `%r9') + +ASM_START() +PROLOGUE(mpn_addmul_1) + stm %r9, %r12, 36(%r15) + lhi %r12, 0 C zero index register + ahi %r12, 0 C clear carry fla + lhi %r11, 0 C clear carry limb + lhi z, 0 C clear carry limb + +L(top): l %r1, 0(%r12,up) + l %r10, 0(%r12,rp) + mlr %r0, v0 + alcr %r1, %r10 + alcr %r0, z + alr %r1, %r11 + lr %r11, %r0 + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct n, L(top) + + lhi %r2, 0 + alcr %r2, %r11 + + lm %r9, %r12, 36(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm new file mode 100644 index 0000000..98b0dbc --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm @@ -0,0 +1,137 @@ +dnl S/390-32 mpn_add_n and mpn_sub_n. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 2.75-3 (fast for even n, slow for odd n) +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Optimise for small n +C * Use r0 and save/restore one less register +C * Using logops_n's v1 inner loop operand order make the loop about 20% +C faster, at the expense of highly alignment-dependent performance. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`vp', `%r4') +define(`n', `%r5') + +ifdef(`OPERATION_add_n', ` + define(ADSB, al) + define(ADSBCR, alcr) + define(ADSBC, alc) + define(RETVAL,`dnl + lhi %r2, 0 + alcr %r2, %r2') + define(func, mpn_add_n) + define(func_nc, mpn_add_nc)') +ifdef(`OPERATION_sub_n', ` + define(ADSB, sl) + define(ADSBCR, slbr) + define(ADSBC, slb) + define(RETVAL,`dnl + slbr %r2, %r2 + lcr %r2, %r2') + define(func, mpn_sub_n) + define(func_nc, mpn_sub_nc)') + +MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n) + +ASM_START() +PROLOGUE(func) + stm %r6, %r8, 24(%r15) + + ahi n, 3 + lhi %r7, 3 + lr %r1, n + srl %r1, 2 + nr %r7, n C n mod 4 + je L(b1) + chi %r7, 2 + jl L(b2) + jne L(b0) + +L(b3): lm %r5, %r7, 0(up) + la up, 12(up) + ADSB %r5, 0(vp) + ADSBC %r6, 4(vp) + ADSBC %r7, 8(vp) + la vp, 12(vp) + stm %r5, %r7, 0(rp) + la rp, 12(rp) + brct %r1, L(top) + j L(end) + +L(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake, + la up, 16(up) C it is needed to make main loop run + ADSB %r5, 0(vp) C fast for n = 0 (mod 4). + ADSBC %r6, 4(vp) + j L(m0) + +L(b1): l %r5, 0(up) + la up, 4(up) + ADSB %r5, 0(vp) + la vp, 4(vp) + st %r5, 0(rp) + la rp, 4(rp) + brct %r1, L(top) + j L(end) + +L(b2): lm %r5, %r6, 0(up) + la up, 8(up) + ADSB %r5, 0(vp) + ADSBC %r6, 4(vp) + la vp, 8(vp) + stm %r5, %r6, 0(rp) + la rp, 8(rp) + brct %r1, L(top) + j L(end) + +L(top): lm %r5, %r8, 0(up) + la up, 16(up) + ADSBC %r5, 0(vp) + ADSBC %r6, 4(vp) +L(m0): ADSBC %r7, 8(vp) + ADSBC %r8, 12(vp) + la vp, 16(vp) + stm %r5, %r8, 0(rp) + la rp, 16(rp) + brct %r1, L(top) + +L(end): RETVAL + lm %r6, %r8, 24(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm new file mode 100644 index 0000000..f2b222b --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm @@ -0,0 +1,173 @@ +dnl S/390-32 mpn_addlsh1_n + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 9.25 +C z990 5 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Optimise for small n +C * Compute RETVAL for sublsh1_n less stupidly + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`vp', `%r4') +define(`n', `%r5') + +ifdef(`OPERATION_addlsh1_n',` + define(ADDSUBC, alr) + define(ADDSUBE, alcr) + define(INITCY, `lhi %r13, -1') + define(RETVAL, `alr %r1, %r13 + lhi %r2, 2 + alr %r2, %r1') + define(func, mpn_addlsh1_n) +') +ifdef(`OPERATION_sublsh1_n',` + define(ADDSUBC, slr) + define(ADDSUBE, slbr) + define(INITCY, `lhi %r13, 0') + define(RETVAL, `slr %r1, %r13 + lhi %r2, 1 + alr %r2, %r1') + define(func, mpn_sublsh1_n) +') + +MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) + +ASM_START() +PROLOGUE(func) + stm %r6, %r13, 24(%r15) + + la %r0, 3(n) + lhi %r7, 3 + srl %r0, 2 + nr %r7, n C n mod 4 + je L(b0) + chi %r7, 2 + jl L(b1) + je L(b2) + +L(b3): lm %r5, %r7, 0(up) + la up, 12(up) + lm %r9, %r11, 0(vp) + la vp, 12(vp) + + alr %r9, %r9 + alcr %r10, %r10 + alcr %r11, %r11 + slbr %r1, %r1 + + ADDSUBC %r5, %r9 + ADDSUBE %r6, %r10 + ADDSUBE %r7, %r11 + slbr %r13, %r13 + + stm %r5, %r7, 0(rp) + la rp, 12(rp) + brct %r0, L(top) + j L(end) + +L(b0): lhi %r1, -1 + INITCY + j L(top) + +L(b1): l %r5, 0(up) + la up, 4(up) + l %r9, 0(vp) + la vp, 4(vp) + + alr %r9, %r9 + slbr %r1, %r1 + ADDSUBC %r5, %r9 + slbr %r13, %r13 + + st %r5, 0(rp) + la rp, 4(rp) + brct %r0, L(top) + j L(end) + +L(b2): lm %r5, %r6, 0(up) + la up, 8(up) + lm %r9, %r10, 0(vp) + la vp, 8(vp) + + alr %r9, %r9 + alcr %r10, %r10 + slbr %r1, %r1 + + ADDSUBC %r5, %r9 + ADDSUBE %r6, %r10 + slbr %r13, %r13 + + stm %r5, %r6, 0(rp) + la rp, 8(rp) + brct %r0, L(top) + j L(end) + +L(top): lm %r9, %r12, 0(vp) + la vp, 16(vp) + + ahi %r1, 1 C restore carry + + alcr %r9, %r9 + alcr %r10, %r10 + alcr %r11, %r11 + alcr %r12, %r12 + + slbr %r1, %r1 C save carry + + lm %r5, %r8, 0(up) + la up, 16(up) + + ahi %r13, 1 C restore carry + + ADDSUBE %r5, %r9 + ADDSUBE %r6, %r10 + ADDSUBE %r7, %r11 + ADDSUBE %r8, %r12 + + slbr %r13, %r13 + + stm %r5, %r8, 0(rp) + la rp, 16(rp) + brct %r0, L(top) + +L(end): + RETVAL + lm %r6, %r13, 24(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm new file mode 100644 index 0000000..568a2a4 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm @@ -0,0 +1,65 @@ +dnl S/390-32 mpn_bdiv_dbm1c for systems with MLR instruction. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 14 +C z990 10 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`qp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`bd', `%r5') +define(`cy', `%r6') + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_bdiv_dbm1c) + stm %r6, %r7, 24(%r15) + lhi %r7, 0 C zero index register + +L(top): l %r1, 0(%r7,up) + mlr %r0, bd + slr %r6, %r1 + st %r6, 0(%r7,qp) + slbr %r6, %r0 + la %r7, 4(%r7) + brct n, L(top) + + lr %r2, %r6 + lm %r6, %r7, 24(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h new file mode 100644 index 0000000..c0e5046 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h @@ -0,0 +1,177 @@ +/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 2000-2008-2011, 2014 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#define GMP_LIMB_BITS 32 +#define GMP_LIMB_BYTES 4 + +/* 4400 MHz IBM z196 running in 32-bit mode */ +/* FFT tuning limit = 0.5M */ +/* Generated by tuneup.c, 2017-01-02, gcc 4.9 */ + +#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_1P_METHOD 2 +#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1N_TO_MOD_1_1_THRESHOLD 45 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 18 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ +#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 3 +#define USE_PREINV_DIVREM_1 0 +#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVEXACT_1_THRESHOLD 6 +#define BMOD_1_TO_MOD_1_THRESHOLD 0 /* always */ + +#define DIV_1_VS_MUL_1_PERCENT 320 + +#define MUL_TOOM22_THRESHOLD 12 +#define MUL_TOOM33_THRESHOLD 81 +#define MUL_TOOM44_THRESHOLD 130 +#define MUL_TOOM6H_THRESHOLD 173 +#define MUL_TOOM8H_THRESHOLD 260 + +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 83 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 86 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_TOOM2_THRESHOLD 18 +#define SQR_TOOM3_THRESHOLD 69 +#define SQR_TOOM4_THRESHOLD 178 +#define SQR_TOOM6_THRESHOLD 254 +#define SQR_TOOM8_THRESHOLD 406 + +#define MULMID_TOOM42_THRESHOLD 30 + +#define MULMOD_BNM1_THRESHOLD 12 +#define SQRMOD_BNM1_THRESHOLD 7 + +#define MUL_FFT_MODF_THRESHOLD 276 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 276, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \ + { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \ + { 17, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \ + { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \ + { 11, 7}, { 25, 8}, { 15, 7}, { 31, 8}, \ + { 19, 7}, { 39, 8}, { 23, 9}, { 15, 8}, \ + { 39, 9}, { 23,10}, { 15, 9}, { 31, 8}, \ + { 67, 9}, { 39, 8}, { 79, 9}, { 47,10}, \ + { 31, 9}, { 71, 8}, { 143, 9}, { 79,10}, \ + { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \ + { 255, 7}, { 511, 9}, { 143,10}, { 79, 9}, \ + { 159, 8}, { 319, 9}, { 175, 8}, { 351,10}, \ + { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ + { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \ + { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \ + { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \ + { 287, 9}, { 575,11}, { 159,10}, { 351, 9}, \ + { 703, 8}, { 1407,11}, { 191,10}, { 415, 9}, \ + { 831,11}, { 223,10}, { 479, 9}, { 959, 8}, \ + { 1919,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 89 +#define MUL_FFT_THRESHOLD 2688 + +#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 240, 5}, { 17, 6}, { 17, 7}, { 9, 6}, \ + { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \ + { 7, 7}, { 19, 8}, { 11, 7}, { 25, 8}, \ + { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \ + { 23, 9}, { 15, 8}, { 39, 9}, { 23,10}, \ + { 15, 9}, { 31, 8}, { 63, 9}, { 47,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \ + { 143,10}, { 47,11}, { 31,10}, { 63, 9}, \ + { 127, 8}, { 255, 7}, { 511, 9}, { 143,10}, \ + { 79, 9}, { 159, 8}, { 319, 9}, { 175, 8}, \ + { 351, 7}, { 703,10}, { 95, 9}, { 191, 8}, \ + { 383, 9}, { 207, 8}, { 415,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ + { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \ + { 175, 9}, { 351, 8}, { 703, 7}, { 1407,11}, \ + { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \ + { 415,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 287, 9}, { 575,11}, { 159,10}, \ + { 351, 9}, { 703, 8}, { 1407,11}, { 191,10}, \ + { 415, 9}, { 831,11}, { 223,10}, { 479,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 84 +#define SQR_FFT_THRESHOLD 1856 + +#define MULLO_BASECASE_THRESHOLD 0 /* always */ +#define MULLO_DC_THRESHOLD 27 +#define MULLO_MUL_N_THRESHOLD 5240 +#define SQRLO_BASECASE_THRESHOLD 0 /* always */ +#define SQRLO_DC_THRESHOLD 65 +#define SQRLO_SQR_THRESHOLD 3470 + +#define DC_DIV_QR_THRESHOLD 32 +#define DC_DIVAPPR_Q_THRESHOLD 135 +#define DC_BDIV_QR_THRESHOLD 32 +#define DC_BDIV_Q_THRESHOLD 80 + +#define INV_MULMOD_BNM1_THRESHOLD 42 +#define INV_NEWTON_THRESHOLD 177 +#define INV_APPR_THRESHOLD 139 + +#define BINV_NEWTON_THRESHOLD 179 +#define REDC_1_TO_REDC_N_THRESHOLD 39 + +#define MU_DIV_QR_THRESHOLD 872 +#define MU_DIVAPPR_Q_THRESHOLD 998 +#define MUPI_DIV_QR_THRESHOLD 66 +#define MU_BDIV_QR_THRESHOLD 748 +#define MU_BDIV_Q_THRESHOLD 906 + +#define POWM_SEC_TABLE 9,34,257,946,2913 + +#define GET_STR_DC_THRESHOLD 10 +#define GET_STR_PRECOMPUTE_THRESHOLD 16 +#define SET_STR_DC_THRESHOLD 1045 +#define SET_STR_PRECOMPUTE_THRESHOLD 1800 + +#define FAC_DSC_THRESHOLD 77 +#define FAC_ODD_THRESHOLD 24 + +#define MATRIX22_STRASSEN_THRESHOLD 15 +#define HGCD_THRESHOLD 121 +#define HGCD_APPR_THRESHOLD 142 +#define HGCD_REDUCE_THRESHOLD 1679 +#define GCD_DC_THRESHOLD 389 +#define GCDEXT_DC_THRESHOLD 285 +#define JACOBI_BASE_METHOD 4 diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm new file mode 100644 index 0000000..04be963 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm @@ -0,0 +1,66 @@ +dnl S/390-32 mpn_mul_1 for systems with MLR instruction + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 14 +C z990 9 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`v0', `%r5') + +ASM_START() +PROLOGUE(mpn_mul_1) + stm %r11, %r12, 44(%r15) + lhi %r12, 0 C zero index register + ahi %r12, 0 C clear carry flag + lhi %r11, 0 C clear carry limb + +L(top): l %r1, 0(%r12,up) + mlr %r0, v0 + alcr %r1, %r11 + lr %r11, %r0 C copy high part to carry limb + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct n, L(top) + + lhi %r2, 0 + alcr %r2, %r11 + + lm %r11, %r12, 44(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm new file mode 100644 index 0000000..2c8138d --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm @@ -0,0 +1,130 @@ +dnl S/390-32/esame mpn_mul_basecase. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 ? +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Perhaps add special case for un <= 2. +C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped +C up by about 10%. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`un', `%r4') +define(`vp', `%r5') +define(`vn', `%r6') + +define(`zero', `%r8') + +ASM_START() +PROLOGUE(mpn_mul_basecase) + chi un, 2 + jhe L(ge2) + +C un = vn = 1 + l %r1, 0(vp) + ml %r0, 0(up) + st %r1, 0(rp) + st %r0, 4(rp) + br %r14 + +L(ge2): C jne L(gen) + + +L(gen): +C mul_1 ======================================================================= + + stm %r6, %r12, 24(%r15) + lhi zero, 0 + ahi un, -1 + + l %r7, 0(vp) + l %r11, 0(up) + lhi %r12, 4 C init index register + mlr %r10, %r7 + lr %r9, un + st %r11, 0(rp) + cr %r15, %r15 C clear carry flag + +L(tm): l %r1, 0(%r12,up) + mlr %r0, %r7 + alcr %r1, %r10 + lr %r10, %r0 C copy high part to carry limb + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r9, L(tm) + + alcr %r0, zero + st %r0, 0(%r12,rp) + +C addmul_1 loop =============================================================== + + ahi vn, -1 + je L(outer_end) +L(outer_loop): + + la rp, 4(rp) C rp += 1 + la vp, 4(vp) C up += 1 + l %r7, 0(vp) + l %r11, 0(up) + lhi %r12, 4 C init index register + mlr %r10, %r7 + lr %r9, un + al %r11, 0(rp) + st %r11, 0(rp) + +L(tam): l %r1, 0(%r12,up) + l %r11, 0(%r12,rp) + mlr %r0, %r7 + alcr %r1, %r11 + alcr %r0, zero + alr %r1, %r10 + lr %r10, %r0 + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r9, L(tam) + + alcr %r0, zero + st %r0, 0(%r12,rp) + + brct vn, L(outer_loop) +L(outer_end): + + lm %r6, %r12, 24(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm new file mode 100644 index 0000000..f45f87a --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm @@ -0,0 +1,203 @@ +dnl S/390-32 mpn_sqr_basecase. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 23 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Clean up. +C * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail. +C This will ask for basecase handling of n = 3. +C * Update counters and pointers more straightforwardly, possibly lowering +C register usage. +C * Should we use this allocation-free style for more sqr_basecase asm +C implementations? The only disadvantage is that it requires R != U. +C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped +C up by about 10%. The sqr_diag_addlsh1 loop could probably be sped up even +C more. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') + +define(`zero', `%r8') +define(`rp_saved', `%r9') +define(`up_saved', `%r13') +define(`n_saved', `%r14') + +ASM_START() +PROLOGUE(mpn_sqr_basecase) + ahi n, -2 + jhe L(ge2) + +C n = 1 + l %r5, 0(up) + mlr %r4, %r5 + st %r5, 0(rp) + st %r4, 4(rp) + br %r14 + +L(ge2): jne L(gen) + +C n = 2 + stm %r6, %r8, 24(%r15) + lhi zero, 0 + + l %r5, 0(up) + mlr %r4, %r5 C u0 * u0 + l %r1, 4(up) + mlr %r0, %r1 C u1 * u1 + st %r5, 0(rp) + + l %r7, 0(up) + ml %r6, 4(up) C u0 * u1 + alr %r7, %r7 + alcr %r6, %r6 + alcr %r0, zero + + alr %r4, %r7 + alcr %r1, %r6 + alcr %r0, zero + st %r4, 4(rp) + st %r1, 8(rp) + st %r0, 12(rp) + + lm %r6, %r8, 24(%r15) + br %r14 + +L(gen): +C mul_1 ======================================================================= + + stm %r6, %r14, 24(%r15) + lhi zero, 0 + lr up_saved, up + lr rp_saved, rp + lr n_saved, n + + l %r6, 0(up) + l %r11, 4(up) + lhi %r12, 8 C init index register + mlr %r10, %r6 + lr %r5, n + st %r11, 4(rp) + cr %r15, %r15 C clear carry flag + +L(tm): l %r1, 0(%r12,up) + mlr %r0, %r6 + alcr %r1, %r10 + lr %r10, %r0 C copy high part to carry limb + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r5, L(tm) + + alcr %r0, zero + st %r0, 0(%r12,rp) + +C addmul_1 loop =============================================================== + + ahi n, -1 + je L(outer_end) +L(outer_loop): + + la rp, 8(rp) C rp += 2 + la up, 4(up) C up += 1 + l %r6, 0(up) + l %r11, 4(up) + lhi %r12, 8 C init index register + mlr %r10, %r6 + lr %r5, n + al %r11, 4(rp) + st %r11, 4(rp) + +L(tam): l %r1, 0(%r12,up) + l %r7, 0(%r12,rp) + mlr %r0, %r6 + alcr %r1, %r7 + alcr %r0, zero + alr %r1, %r10 + lr %r10, %r0 + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r5, L(tam) + + alcr %r0, zero + st %r0, 0(%r12,rp) + + brct n, L(outer_loop) +L(outer_end): + + l %r6, 4(up) + l %r1, 8(up) + lr %r7, %r0 C Same as: l %r7, 12(,rp) + mlr %r0, %r6 + alr %r1, %r7 + alcr %r0, zero + st %r1, 12(rp) + st %r0, 16(rp) + +C sqr_diag_addlsh1 ============================================================ + +define(`up', `up_saved') +define(`rp', `rp_saved') + la n, 1(n_saved) + + l %r1, 0(up) + mlr %r0, %r1 + st %r1, 0(rp) +C clr %r15, %r15 C clear carry (already clear per above) + +L(top): l %r11, 4(up) + la up, 4(up) + l %r6, 4(rp) + l %r7, 8(rp) + mlr %r10, %r11 + alcr %r6, %r6 + alcr %r7, %r7 + alcr %r10, zero C propagate carry to high product limb + alr %r6, %r0 + alcr %r7, %r11 + stm %r6, %r7, 4(rp) + la rp, 8(rp) + lr %r0, %r10 C copy carry limb + brct n, L(top) + + alcr %r0, zero + st %r0, 4(rp) + + lm %r6, %r14, 24(%r15) + br %r14 +EPILOGUE() diff --git a/vendor/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm b/vendor/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm new file mode 100644 index 0000000..a71e57e --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm @@ -0,0 +1,70 @@ +dnl S/390-32 mpn_submul_1 for systems with MLR instruction. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 20 +C z990 11 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`v0', `%r5') + +ASM_START() +PROLOGUE(mpn_submul_1) + stm %r9, %r12, 36(%r15) + lhi %r12, 0 + slr %r11, %r11 + +L(top): l %r1, 0(%r12, up) + l %r10, 0(%r12, rp) + mlr %r0, v0 + slbr %r10, %r1 + slbr %r9, %r9 + slr %r0, %r9 C conditional incr + slr %r10, %r11 + lr %r11, %r0 + st %r10, 0(%r12, rp) + la %r12, 4(%r12) + brct %r4, L(top) + + lr %r2, %r11 + slbr %r9, %r9 + slr %r2, %r9 + + lm %r9, %r12, 36(%r15) + br %r14 +EPILOGUE() |