aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/cray/ieee
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-21 23:36:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-21 23:42:26 +0200
commita89a14ef5da44684a16b204e7a70460cc8c4922a (patch)
treeb23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/cray/ieee
parent1db63fcedab0b288820d66e100b1877b1a5a8851 (diff)
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/cray/ieee')
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/addmul_1.c111
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/gmp-mparam.h73
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/invert_limb.c127
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/mul_1.c103
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/mul_basecase.c107
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/sqr_basecase.c105
-rw-r--r--vendor/gmp-6.3.0/mpn/cray/ieee/submul_1.c111
7 files changed, 737 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/addmul_1.c b/vendor/gmp-6.3.0/mpn/cray/ieee/addmul_1.c
new file mode 100644
index 0000000..ce7dfbb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/addmul_1.c
@@ -0,0 +1,111 @@
+/* Cray PVP/IEEE mpn_addmul_1 -- multiply a limb vector with a limb and add the
+ result to a second limb vector.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* This code runs at just under 9 cycles/limb on a T90. That is not perfect,
+ mainly due to vector register shortage in the main loop. Assembly code
+ should bring it down to perhaps 7 cycles/limb. */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t cy[n];
+ mp_limb_t a, b, r, s0, s1, c0, c1;
+ mp_size_t i;
+ int more_carries;
+
+ if (up == rp)
+ {
+ /* The algorithm used below cannot handle overlap. Handle it here by
+ making a temporary copy of the source vector, then call ourselves. */
+ mp_limb_t xp[n];
+ MPN_COPY (xp, up, n);
+ return mpn_addmul_1 (rp, xp, n, vl);
+ }
+
+ a = up[0] * vl;
+ r = rp[0];
+ s0 = a + r;
+ rp[0] = s0;
+ c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+ cy[0] = c0;
+
+ /* Main multiply loop. Generate a raw accumulated output product in rp[]
+ and a carry vector in cy[]. */
+#pragma _CRI ivdep
+ for (i = 1; i < n; i++)
+ {
+ a = up[i] * vl;
+ b = _int_mult_upper (up[i - 1], vl);
+ s0 = a + b;
+ c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+ r = rp[i];
+ s1 = s0 + r;
+ rp[i] = s1;
+ c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+ cy[i] = c0 + c1;
+ }
+ /* Carry add loop. Add the carry vector cy[] to the raw result rp[] and
+ store the new result back to rp[]. */
+ more_carries = 0;
+#pragma _CRI ivdep
+ for (i = 1; i < n; i++)
+ {
+ r = rp[i];
+ c0 = cy[i - 1];
+ s0 = r + c0;
+ rp[i] = s0;
+ c0 = (r & ~s0) >> 63;
+ more_carries += c0;
+ }
+ /* If that second loop generated carry, handle that in scalar loop. */
+ if (more_carries)
+ {
+ mp_limb_t cyrec = 0;
+ /* Look for places where rp[k] == 0 and cy[k-1] == 1 or
+ rp[k] == 1 and cy[k-1] == 2.
+ These are where we got a recurrency carry. */
+ for (i = 1; i < n; i++)
+ {
+ r = rp[i];
+ c0 = r < cy[i - 1];
+ s0 = r + cyrec;
+ rp[i] = s0;
+ c1 = (r & ~s0) >> 63;
+ cyrec = c0 | c1;
+ }
+ return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+ }
+
+ return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/cray/ieee/gmp-mparam.h
new file mode 100644
index 0000000..1fdc286
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/gmp-mparam.h
@@ -0,0 +1,73 @@
+/* Cray T90 IEEE gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* Generated by tuneup.c, 2004-02-07, system compiler */
+
+#define MUL_TOOM22_THRESHOLD 130
+#define MUL_TOOM33_THRESHOLD 260
+
+#define SQR_BASECASE_THRESHOLD 9 /* karatsuba */
+#define SQR_TOOM2_THRESHOLD 0 /* never sqr_basecase */
+#define SQR_TOOM3_THRESHOLD 34
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */
+#define DIV_DC_THRESHOLD 390
+#define POWM_THRESHOLD 656
+
+#define HGCD_THRESHOLD 964
+#define GCD_ACCEL_THRESHOLD 3
+#define GCD_DC_THRESHOLD 964
+#define JACOBI_BASE_METHOD 2
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define USE_PREINV_DIVREM_1 1 /* preinv always */
+#define USE_PREINV_MOD_1 1 /* preinv always */
+#define DIVREM_2_THRESHOLD 0 /* preinv always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 45
+#define GET_STR_PRECOMPUTE_THRESHOLD 77
+#define SET_STR_THRESHOLD 145756
+
+#define MUL_FFT_TABLE { 1104, 2208, 4416, 8960, 19456, 45056, 0 }
+#define MUL_FFT_MODF_THRESHOLD 1168
+#define MUL_FFT_THRESHOLD 6528
+
+#define SQR_FFT_TABLE { 368, 736, 1600, 2816, 7168, 12288, 0 }
+#define SQR_FFT_MODF_THRESHOLD 296
+#define SQR_FFT_THRESHOLD 1312
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/invert_limb.c b/vendor/gmp-6.3.0/mpn/cray/ieee/invert_limb.c
new file mode 100644
index 0000000..774a27b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/invert_limb.c
@@ -0,0 +1,127 @@
+/* mpn_invert_limb -- Invert a normalized limb.
+
+Copyright 1991, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+ This is needed to make configure define HAVE_NATIVE_mpn_invert_limb:
+ PROLOGUE(mpn_invert_limb)
+*/
+
+static const unsigned short int approx_tab[0x100] =
+{
+ /* 0x400, */
+ 0x3ff,
+ 0x3fc, 0x3f8, 0x3f4, 0x3f0, 0x3ec, 0x3e8, 0x3e4,
+ 0x3e0, 0x3dd, 0x3d9, 0x3d5, 0x3d2, 0x3ce, 0x3ca, 0x3c7,
+ 0x3c3, 0x3c0, 0x3bc, 0x3b9, 0x3b5, 0x3b2, 0x3ae, 0x3ab,
+ 0x3a8, 0x3a4, 0x3a1, 0x39e, 0x39b, 0x397, 0x394, 0x391,
+ 0x38e, 0x38b, 0x387, 0x384, 0x381, 0x37e, 0x37b, 0x378,
+ 0x375, 0x372, 0x36f, 0x36c, 0x369, 0x366, 0x364, 0x361,
+ 0x35e, 0x35b, 0x358, 0x355, 0x353, 0x350, 0x34d, 0x34a,
+ 0x348, 0x345, 0x342, 0x340, 0x33d, 0x33a, 0x338, 0x335,
+ 0x333, 0x330, 0x32e, 0x32b, 0x329, 0x326, 0x324, 0x321,
+ 0x31f, 0x31c, 0x31a, 0x317, 0x315, 0x313, 0x310, 0x30e,
+ 0x30c, 0x309, 0x307, 0x305, 0x303, 0x300, 0x2fe, 0x2fc,
+ 0x2fa, 0x2f7, 0x2f5, 0x2f3, 0x2f1, 0x2ef, 0x2ec, 0x2ea,
+ 0x2e8, 0x2e6, 0x2e4, 0x2e2, 0x2e0, 0x2de, 0x2dc, 0x2da,
+ 0x2d8, 0x2d6, 0x2d4, 0x2d2, 0x2d0, 0x2ce, 0x2cc, 0x2ca,
+ 0x2c8, 0x2c6, 0x2c4, 0x2c2, 0x2c0, 0x2be, 0x2bc, 0x2bb,
+ 0x2b9, 0x2b7, 0x2b5, 0x2b3, 0x2b1, 0x2b0, 0x2ae, 0x2ac,
+ 0x2aa, 0x2a8, 0x2a7, 0x2a5, 0x2a3, 0x2a1, 0x2a0, 0x29e,
+ 0x29c, 0x29b, 0x299, 0x297, 0x295, 0x294, 0x292, 0x291,
+ 0x28f, 0x28d, 0x28c, 0x28a, 0x288, 0x287, 0x285, 0x284,
+ 0x282, 0x280, 0x27f, 0x27d, 0x27c, 0x27a, 0x279, 0x277,
+ 0x276, 0x274, 0x273, 0x271, 0x270, 0x26e, 0x26d, 0x26b,
+ 0x26a, 0x268, 0x267, 0x265, 0x264, 0x263, 0x261, 0x260,
+ 0x25e, 0x25d, 0x25c, 0x25a, 0x259, 0x257, 0x256, 0x255,
+ 0x253, 0x252, 0x251, 0x24f, 0x24e, 0x24d, 0x24b, 0x24a,
+ 0x249, 0x247, 0x246, 0x245, 0x243, 0x242, 0x241, 0x240,
+ 0x23e, 0x23d, 0x23c, 0x23b, 0x239, 0x238, 0x237, 0x236,
+ 0x234, 0x233, 0x232, 0x231, 0x230, 0x22e, 0x22d, 0x22c,
+ 0x22b, 0x22a, 0x229, 0x227, 0x226, 0x225, 0x224, 0x223,
+ 0x222, 0x220, 0x21f, 0x21e, 0x21d, 0x21c, 0x21b, 0x21a,
+ 0x219, 0x218, 0x216, 0x215, 0x214, 0x213, 0x212, 0x211,
+ 0x210, 0x20f, 0x20e, 0x20d, 0x20c, 0x20b, 0x20a, 0x209,
+ 0x208, 0x207, 0x206, 0x205, 0x204, 0x203, 0x202, 0x201,
+};
+
+/* iteration: z = 2z-(z**2)d */
+
+mp_limb_t
+mpn_invert_limb (mp_limb_t d)
+{
+ mp_limb_t z, z2l, z2h, tl, th;
+ mp_limb_t xh, xl;
+ mp_limb_t zh, zl;
+
+#if GMP_LIMB_BITS == 32
+ z = approx_tab[(d >> 23) - 0x100] << 6; /* z < 2^16 */
+
+ z2l = z * z; /* z2l < 2^32 */
+ umul_ppmm (th, tl, z2l, d);
+ z = (z << 17) - (th << 1);
+#endif
+#if GMP_LIMB_BITS == 64
+ z = approx_tab[(d >> 55) - 0x100] << 6; /* z < 2^16 */
+
+ z2l = z * z; /* z2l < 2^32 */
+ th = z2l * (d >> 32); /* th < 2^64 */
+ z = (z << 17) - (th >> 31); /* z < 2^32 */
+
+ z2l = z * z;
+ umul_ppmm (th, tl, z2l, d);
+ z = (z << 33) - (th << 1);
+#endif
+
+ umul_ppmm (z2h, z2l, z, z);
+ umul_ppmm (th, tl, z2h, d);
+ umul_ppmm (xh, xl, z2l, d);
+ tl += xh;
+ th += tl < xh;
+ th = (th << 2) | (tl >> GMP_LIMB_BITS - 2);
+ tl = tl << 2;
+ sub_ddmmss (zh, zl, z << 2, 0, th, tl);
+
+ umul_ppmm (xh, xl, d, zh);
+ xh += d; /* add_ssaaaa (xh, xl, xh, xl, d, 0); */
+ if (~xh != 0)
+ {
+ add_ssaaaa (xh, xl, xh, xl, 0, d);
+ zh++;
+ }
+
+ add_ssaaaa (xh, xl, xh, xl, 0, d);
+ if (xh != 0)
+ zh++;
+
+ return zh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/mul_1.c b/vendor/gmp-6.3.0/mpn/cray/ieee/mul_1.c
new file mode 100644
index 0000000..40139fb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/mul_1.c
@@ -0,0 +1,103 @@
+/* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
+ result in a second limb vector.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* This code runs at 5 cycles/limb on a T90. That would probably
+ be hard to improve upon, even with assembly code. */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t cy[n];
+ mp_limb_t a, b, r, s0, s1, c0, c1;
+ mp_size_t i;
+ int more_carries;
+
+ if (up == rp)
+ {
+ /* The algorithm used below cannot handle overlap. Handle it here by
+ making a temporary copy of the source vector, then call ourselves. */
+ mp_limb_t xp[n];
+ MPN_COPY (xp, up, n);
+ return mpn_mul_1 (rp, xp, n, vl);
+ }
+
+ a = up[0] * vl;
+ rp[0] = a;
+ cy[0] = 0;
+
+ /* Main multiply loop. Generate a raw accumulated output product in rp[]
+ and a carry vector in cy[]. */
+#pragma _CRI ivdep
+ for (i = 1; i < n; i++)
+ {
+ a = up[i] * vl;
+ b = _int_mult_upper (up[i - 1], vl);
+ s0 = a + b;
+ c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+ rp[i] = s0;
+ cy[i] = c0;
+ }
+ /* Carry add loop. Add the carry vector cy[] to the raw sum rp[] and
+ store the new sum back to rp[0]. */
+ more_carries = 0;
+#pragma _CRI ivdep
+ for (i = 2; i < n; i++)
+ {
+ r = rp[i];
+ c0 = cy[i - 1];
+ s0 = r + c0;
+ rp[i] = s0;
+ c0 = (r & ~s0) >> 63;
+ more_carries += c0;
+ }
+ /* If that second loop generated carry, handle that in scalar loop. */
+ if (more_carries)
+ {
+ mp_limb_t cyrec = 0;
+ /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
+ These are where we got a recurrency carry. */
+ for (i = 2; i < n; i++)
+ {
+ r = rp[i];
+ c0 = (r == 0 && cy[i - 1] != 0);
+ s0 = r + cyrec;
+ rp[i] = s0;
+ c1 = (r & ~s0) >> 63;
+ cyrec = c0 | c1;
+ }
+ return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+ }
+
+ return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/mul_basecase.c b/vendor/gmp-6.3.0/mpn/cray/ieee/mul_basecase.c
new file mode 100644
index 0000000..72628f7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/mul_basecase.c
@@ -0,0 +1,107 @@
+/* Cray PVP/IEEE mpn_mul_basecase.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* The most critical loop of this code runs at about 5 cycles/limb on a T90.
+ That is not perfect, mainly due to vector register shortage. */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+void
+mpn_mul_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ mp_limb_t cy[un + vn];
+ mp_limb_t vl;
+ mp_limb_t a, b, r, s0, s1, c0, c1;
+ mp_size_t i, j;
+ int more_carries;
+
+ for (i = 0; i < un + vn; i++)
+ {
+ rp[i] = 0;
+ cy[i] = 0;
+ }
+
+#pragma _CRI novector
+ for (j = 0; j < vn; j++)
+ {
+ vl = vp[j];
+
+ a = up[0] * vl;
+ r = rp[j];
+ s0 = a + r;
+ rp[j] = s0;
+ c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+ cy[j] += c0;
+
+#pragma _CRI ivdep
+ for (i = 1; i < un; i++)
+ {
+ a = up[i] * vl;
+ b = _int_mult_upper (up[i - 1], vl);
+ s0 = a + b;
+ c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+ r = rp[j + i];
+ s1 = s0 + r;
+ rp[j + i] = s1;
+ c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+ cy[j + i] += c0 + c1;
+ }
+ rp[j + un] = _int_mult_upper (up[un - 1], vl);
+ }
+
+ more_carries = 0;
+#pragma _CRI ivdep
+ for (i = 1; i < un + vn; i++)
+ {
+ r = rp[i];
+ c0 = cy[i - 1];
+ s0 = r + c0;
+ rp[i] = s0;
+ c0 = (r & ~s0) >> 63;
+ more_carries += c0;
+ }
+ /* If that second loop generated carry, handle that in scalar loop. */
+ if (more_carries)
+ {
+ mp_limb_t cyrec = 0;
+ for (i = 1; i < un + vn; i++)
+ {
+ r = rp[i];
+ c0 = (r < cy[i - 1]);
+ s0 = r + cyrec;
+ rp[i] = s0;
+ c1 = (r & ~s0) >> 63;
+ cyrec = c0 | c1;
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/sqr_basecase.c b/vendor/gmp-6.3.0/mpn/cray/ieee/sqr_basecase.c
new file mode 100644
index 0000000..5bd4e56
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/sqr_basecase.c
@@ -0,0 +1,105 @@
+/* Cray PVP/IEEE mpn_sqr_basecase.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* This is just mpn_mul_basecase with trivial modifications. */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+void
+mpn_sqr_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un)
+{
+ mp_limb_t cy[un + un];
+ mp_limb_t ul;
+ mp_limb_t a, b, r, s0, s1, c0, c1;
+ mp_size_t i, j;
+ int more_carries;
+
+ for (i = 0; i < un + un; i++)
+ {
+ rp[i] = 0;
+ cy[i] = 0;
+ }
+
+#pragma _CRI novector
+ for (j = 0; j < un; j++)
+ {
+ ul = up[j];
+
+ a = up[0] * ul;
+ r = rp[j];
+ s0 = a + r;
+ rp[j] = s0;
+ c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+ cy[j] += c0;
+
+#pragma _CRI ivdep
+ for (i = 1; i < un; i++)
+ {
+ a = up[i] * ul;
+ b = _int_mult_upper (up[i - 1], ul);
+ s0 = a + b;
+ c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+ r = rp[j + i];
+ s1 = s0 + r;
+ rp[j + i] = s1;
+ c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+ cy[j + i] += c0 + c1;
+ }
+ rp[j + un] = _int_mult_upper (up[un - 1], ul);
+ }
+
+ more_carries = 0;
+#pragma _CRI ivdep
+ for (i = 1; i < un + un; i++)
+ {
+ r = rp[i];
+ c0 = cy[i - 1];
+ s0 = r + c0;
+ rp[i] = s0;
+ c0 = (r & ~s0) >> 63;
+ more_carries += c0;
+ }
+ /* If that second loop generated carry, handle that in scalar loop. */
+ if (more_carries)
+ {
+ mp_limb_t cyrec = 0;
+ for (i = 1; i < un + un; i++)
+ {
+ r = rp[i];
+ c0 = (r < cy[i - 1]);
+ s0 = r + cyrec;
+ rp[i] = s0;
+ c1 = (r & ~s0) >> 63;
+ cyrec = c0 | c1;
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/cray/ieee/submul_1.c b/vendor/gmp-6.3.0/mpn/cray/ieee/submul_1.c
new file mode 100644
index 0000000..2b3ca21
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/cray/ieee/submul_1.c
@@ -0,0 +1,111 @@
+/* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and
+ subtract the result from a second limb vector.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* This code runs at just under 9 cycles/limb on a T90. That is not perfect,
+ mainly due to vector register shortage in the main loop. Assembly code
+ should bring it down to perhaps 7 cycles/limb. */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t cy[n];
+ mp_limb_t a, b, r, s0, s1, c0, c1;
+ mp_size_t i;
+ int more_carries;
+
+ if (up == rp)
+ {
+ /* The algorithm used below cannot handle overlap. Handle it here by
+ making a temporary copy of the source vector, then call ourselves. */
+ mp_limb_t xp[n];
+ MPN_COPY (xp, up, n);
+ return mpn_submul_1 (rp, xp, n, vl);
+ }
+
+ a = up[0] * vl;
+ r = rp[0];
+ s0 = r - a;
+ rp[0] = s0;
+ c1 = ((s0 & a) | ((s0 | a) & ~r)) >> 63;
+ cy[0] = c1;
+
+ /* Main multiply loop. Generate a raw accumulated output product in rp[]
+ and a carry vector in cy[]. */
+#pragma _CRI ivdep
+ for (i = 1; i < n; i++)
+ {
+ a = up[i] * vl;
+ b = _int_mult_upper (up[i - 1], vl);
+ s0 = a + b;
+ c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+ r = rp[i];
+ s1 = r - s0;
+ rp[i] = s1;
+ c1 = ((s1 & s0) | ((s1 | s0) & ~r)) >> 63;
+ cy[i] = c0 + c1;
+ }
+ /* Carry subtract loop. Subtract the carry vector cy[] from the raw result
+ rp[] and store the new result back to rp[]. */
+ more_carries = 0;
+#pragma _CRI ivdep
+ for (i = 1; i < n; i++)
+ {
+ r = rp[i];
+ c0 = cy[i - 1];
+ s0 = r - c0;
+ rp[i] = s0;
+ c0 = (s0 & ~r) >> 63;
+ more_carries += c0;
+ }
+ /* If that second loop generated carry, handle that in scalar loop. */
+ if (more_carries)
+ {
+ mp_limb_t cyrec = 0;
+ /* Look for places where rp[k] == ~0 and cy[k-1] == 1 or
+ rp[k] == ~1 and cy[k-1] == 2.
+ These are where we got a recurrency carry. */
+ for (i = 1; i < n; i++)
+ {
+ r = rp[i];
+ c0 = ~r < cy[i - 1];
+ s0 = r - cyrec;
+ rp[i] = s0;
+ c1 = (s0 & ~r) >> 63;
+ cyrec = c0 | c1;
+ }
+ return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+ }
+
+ return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}