aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/generic
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-21 23:36:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-21 23:42:26 +0200
commita89a14ef5da44684a16b204e7a70460cc8c4922a (patch)
treeb23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/generic
parent1db63fcedab0b288820d66e100b1877b1a5a8851 (diff)
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/generic')
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add.c33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add_1.c33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add_err1_n.c100
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add_err2_n.c116
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add_err3_n.c131
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add_n.c89
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/add_n_sub_n.c172
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/addmul_1.c145
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/bdiv_dbm1c.c58
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/bdiv_q.c76
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/bdiv_q_1.c121
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/bdiv_qr.c84
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/binvert.c106
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/broot.c195
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/brootinv.c159
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/bsqrt.c47
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/bsqrtinv.c103
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/cmp.c33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/cnd_add_n.c69
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/cnd_sub_n.c69
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/cnd_swap.c50
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/com.c44
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/comb_tables.c47
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/compute_powtab.c373
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/copyd.c40
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/copyi.c42
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_q.c161
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_qr.c176
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dcpi1_div_q.c86
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dcpi1_div_qr.c248
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dcpi1_divappr_q.c256
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_q.c313
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_1.c125
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi1.c505
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi2.c203
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_1u_pi2.c236
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_2.c314
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_2n_pi1.c84
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/div_qr_2u_pi1.c76
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dive_1.c146
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/diveby3.c173
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/divexact.c296
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/divis.c194
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/divrem.c103
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/divrem_1.c254
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/divrem_2.c118
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/dump.c99
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/fib2_ui.c174
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/fib2m.c252
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcd.c266
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcd_1.c103
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcd_11.c74
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcd_22.c131
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcd_subdiv_step.c204
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcdext.c557
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcdext_1.c275
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gcdext_lehmer.c336
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/get_d.c438
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/get_str.c451
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/gmp-mparam.h33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd.c182
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd2-div.h504
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd2.c283
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd2_jacobi.c251
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd_appr.c267
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd_jacobi.c243
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd_matrix.c265
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd_reduce.c242
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/hgcd_step.c127
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/invert.c86
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/invertappr.c300
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/jacbase.c242
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/jacobi.c294
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/jacobi_2.c351
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/logops_n.c77
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/lshift.c72
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/lshiftc.c73
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/matrix22_mul.c321
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/matrix22_mul1_inverse_vector.c64
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mod_1.c278
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mod_1_1.c341
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mod_1_2.c148
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mod_1_3.c155
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mod_1_4.c170
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mod_34lsub1.c128
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mode1o.c235
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mu_bdiv_q.c281
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c312
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mu_div_q.c184
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mu_div_qr.c417
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c368
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mul.c441
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mul_1.c96
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mul_basecase.c165
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mul_fft.c1105
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mul_n.c96
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mullo_basecase.c90
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mullo_n.c243
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mulmid.c255
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mulmid_basecase.c82
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mulmid_n.c61
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mulmod_bknp1.c502
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/mulmod_bnm1.c374
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/neg.c33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/nussbaumer_mul.c70
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/perfpow.c342
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/perfsqr.c238
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/popham.c125
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/pow_1.c135
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/powlo.c188
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/powm.c1003
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/pre_divrem_1.c145
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/pre_mod_1.c61
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/random.c50
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/random2.c105
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/redc_1.c56
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/redc_2.c110
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/redc_n.c80
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/remove.c182
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/rootrem.c515
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/rshift.c69
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_q.c96
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_qr.c82
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_r.c79
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sbpi1_div_q.c302
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sbpi1_div_qr.c109
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sbpi1_divappr_q.c198
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/scan0.c59
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/scan1.c59
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_aors_1.c59
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_div.c131
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_invert.c177
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_mul.c48
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_pi1_div.c172
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_powm.c430
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_sqr.c76
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sec_tabselect.c134
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/set_str.c290
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sizeinbase.c49
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sqr.c98
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sqr_basecase.c361
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sqrlo.c239
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sqrlo_basecase.c194
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sqrmod_bnm1.c328
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sqrtrem.c555
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/strongfibo.c219
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sub.c33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sub_1.c33
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sub_err1_n.c100
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sub_err2_n.c116
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sub_err3_n.c131
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/sub_n.c89
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/submul_1.c144
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/tdiv_qr.c386
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom22_mul.c222
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom2_sqr.c155
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom32_mul.c320
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom33_mul.c316
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom3_sqr.c221
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom42_mul.c234
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom42_mulmid.c237
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom43_mul.c238
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom44_mul.c239
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom4_sqr.c164
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom52_mul.c256
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom53_mul.c331
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom54_mul.c142
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom62_mul.c310
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom63_mul.c231
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom6_sqr.c181
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom6h_mul.c262
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom8_sqr.c225
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom8h_mul.c305
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_couple_handling.c80
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm1.c72
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm2.c97
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_eval_pm1.c89
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2.c130
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2exp.c127
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2rexp.c101
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_interpolate_12pts.c374
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_interpolate_16pts.c545
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_interpolate_5pts.c198
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_interpolate_6pts.c241
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_interpolate_7pts.c274
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/toom_interpolate_8pts.c211
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/trialdiv.c131
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/udiv_w_sdiv.c141
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/zero.c41
-rw-r--r--vendor/gmp-6.3.0/mpn/generic/zero_p.c33
190 files changed, 37323 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/generic/add.c b/vendor/gmp-6.3.0/mpn/generic/add.c
new file mode 100644
index 0000000..4a6e3ba
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add.c
@@ -0,0 +1,33 @@
+/* mpn_add - add mpn to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_add 1
+
+#include "gmp-impl.h"
diff --git a/vendor/gmp-6.3.0/mpn/generic/add_1.c b/vendor/gmp-6.3.0/mpn/generic/add_1.c
new file mode 100644
index 0000000..1745aed
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add_1.c
@@ -0,0 +1,33 @@
+/* mpn_add_1 - add limb to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_add_1 1
+
+#include "gmp-impl.h"
diff --git a/vendor/gmp-6.3.0/mpn/generic/add_err1_n.c b/vendor/gmp-6.3.0/mpn/generic/add_err1_n.c
new file mode 100644
index 0000000..b247f19
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add_err1_n.c
@@ -0,0 +1,100 @@
+/* mpn_add_err1_n -- add_n with one error term
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+ return value is carry out.
+
+ (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+ Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+ yp += n - 1;
+ el = eh = 0;
+
+ do
+ {
+ yl = *yp--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary add_n */
+ ADDC_LIMB (cy1, sl, ul, vl);
+ ADDC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh:el) */
+ zl = (-cy) & yl;
+ el += zl;
+ eh += el < zl;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+ el &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el;
+ ep[1] = eh;
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/add_err2_n.c b/vendor/gmp-6.3.0/mpn/generic/add_err2_n.c
new file mode 100644
index 0000000..d584d6d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add_err2_n.c
@@ -0,0 +1,116 @@
+/* mpn_add_err2_n -- add_n with two error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+ return value is carry out.
+
+ (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary add_n */
+ ADDC_LIMB (cy1, sl, ul, vl);
+ ADDC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/add_err3_n.c b/vendor/gmp-6.3.0/mpn/generic/add_err3_n.c
new file mode 100644
index 0000000..a6ed4dc
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add_err3_n.c
@@ -0,0 +1,131 @@
+/* mpn_add_err3_n -- add_n with three error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+ return value is carry out.
+
+ (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+ stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ yp3 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+ el3 = eh3 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ yl3 = *yp3--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary add_n */
+ ADDC_LIMB (cy1, sl, ul, vl);
+ ADDC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+
+ /* update (eh3:el3) */
+ zl3 = (-cy) & yl3;
+ el3 += zl3;
+ eh3 += el3 < zl3;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+ eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+ el3 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+ ep[4] = el3;
+ ep[5] = eh3;
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/add_n.c b/vendor/gmp-6.3.0/mpn/generic/add_n.c
new file mode 100644
index 0000000..f62ac87
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add_n.c
@@ -0,0 +1,89 @@
+/* mpn_add_n -- Add equal length limb vectors.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++;
+ sl = ul + vl;
+ cy1 = sl < ul;
+ rl = sl + cy;
+ cy2 = rl < sl;
+ cy = cy1 | cy2;
+ *rp++ = rl;
+ }
+ while (--n != 0);
+
+ return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t ul, vl, rl, cy;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++;
+ rl = ul + vl + cy;
+ cy = rl >> GMP_NUMB_BITS;
+ *rp++ = rl & GMP_NUMB_MASK;
+ }
+ while (--n != 0);
+
+ return cy;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/add_n_sub_n.c b/vendor/gmp-6.3.0/mpn/generic/add_n_sub_n.c
new file mode 100644
index 0000000..1e72b5d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/add_n_sub_n.c
@@ -0,0 +1,172 @@
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999-2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / GMP_LIMB_BYTES / 6)
+
+
+/* mpn_add_n_sub_n.
+ r1[] = s1[] + s2[]
+ r2[] = s1[] - s2[]
+ All operands have n limbs.
+ In-place operations allowed. */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+ mp_limb_t acyn, acyo; /* carry for add */
+ mp_limb_t scyn, scyo; /* carry for subtract */
+ mp_size_t off; /* offset in operands */
+ mp_size_t this_n; /* size of current chunk */
+
+ /* We alternatingly add and subtract in chunks that fit into the (L1)
+ cache. Since the chunks are several hundred limbs, the function call
+ overhead is insignificant, but we get much better locality. */
+
+ /* We have three variant of the inner loop, the proper loop is chosen
+ depending on whether r1 or r2 are the same operand as s1 or s2. */
+
+ if (r1p != s1p && r1p != s2p)
+ {
+ /* r1 is not identical to either input operand. We can therefore write
+ to r1 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ }
+ }
+ else if (r2p != s1p && r2p != s2p)
+ {
+ /* r2 is not identical to either input operand. We can therefore write
+ to r2 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+ }
+ }
+ else
+ {
+ /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+ Need temporary storage. */
+ mp_limb_t tp[PART_SIZE];
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+ acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ MPN_COPY (r1p + off, tp, this_n);
+ }
+ }
+
+ return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+ mp_ptr r1p, r2p, s1p, s2p;
+ double t;
+ mp_size_t n;
+
+ n = strtol (argv[1], 0, 0);
+
+ r1p = malloc (n * GMP_LIMB_BYTES);
+ r2p = malloc (n * GMP_LIMB_BYTES);
+ s1p = malloc (n * GMP_LIMB_BYTES);
+ s2p = malloc (n * GMP_LIMB_BYTES);
+ TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+ printf (" separate add and sub: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+ printf ("combined addsub separate variables: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r1 overlap: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r2 overlap: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+ printf (" combined addsub in-place: %.3f\n", t);
+
+ return 0;
+}
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/addmul_1.c b/vendor/gmp-6.3.0/mpn/generic/addmul_1.c
new file mode 100644
index 0000000..6140e8e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/addmul_1.c
@@ -0,0 +1,145 @@
+/* mpn_addmul_1 -- multiply the N long limb vector pointed to by UP by VL,
+ add the N least significant limbs of the product to the limb vector
+ pointed to by RP. Return the most significant limb of the product,
+ adjusted for carry-out from the addition.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2004, 2016 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+ mp_limb_t u0, crec, c, p1, p0, r0;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+ crec = 0;
+ do
+ {
+ u0 = *up++;
+ umul_ppmm (p1, p0, u0, v0);
+
+ r0 = *rp;
+
+ p0 = r0 + p0;
+ c = r0 > p0;
+
+ p1 = p1 + c;
+
+ r0 = p0 + crec; /* cycle 0, 3, ... */
+ c = p0 > r0; /* cycle 1, 4, ... */
+
+ crec = p1 + c; /* cycle 2, 5, ... */
+
+ *rp++ = r0;
+ }
+ while (--n != 0);
+
+ return crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+ mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, crec, xl, c1, c2, c3;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT_MPN (rp, n);
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (v0);
+
+ shifted_v0 = v0 << GMP_NAIL_BITS;
+ crec = 0;
+ prev_p1 = 0;
+ do
+ {
+ u0 = *up++;
+ r0 = *rp;
+ umul_ppmm (p1, p0, u0, shifted_v0);
+ p0 >>= GMP_NAIL_BITS;
+ ADDC_LIMB (c1, xl, prev_p1, p0);
+ ADDC_LIMB (c2, xl, xl, r0);
+ ADDC_LIMB (c3, xl, xl, crec);
+ crec = c1 + c2 + c3;
+ *rp++ = xl;
+ prev_p1 = p1;
+ }
+ while (--n != 0);
+
+ return prev_p1 + crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+ mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, crec, xl;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT_MPN (rp, n);
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (v0);
+
+ shifted_v0 = v0 << GMP_NAIL_BITS;
+ crec = 0;
+ prev_p1 = 0;
+ do
+ {
+ u0 = *up++;
+ r0 = *rp;
+ umul_ppmm (p1, p0, u0, shifted_v0);
+ p0 >>= GMP_NAIL_BITS;
+ xw = prev_p1 + p0 + r0 + crec;
+ crec = xw >> GMP_NUMB_BITS;
+ xl = xw & GMP_NUMB_MASK;
+ *rp++ = xl;
+ prev_p1 = p1;
+ }
+ while (--n != 0);
+
+ return prev_p1 + crec;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/bdiv_dbm1c.c b/vendor/gmp-6.3.0/mpn/generic/bdiv_dbm1c.c
new file mode 100644
index 0000000..543bb6e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/bdiv_dbm1c.c
@@ -0,0 +1,58 @@
+/* mpn_bdiv_dbm1c -- divide an mpn number by a divisor of B-1, where B is the
+ limb base. The dbm1c moniker means "Divisor of B Minus 1 with Carry".
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_bdiv_dbm1c (mp_ptr qp, mp_srcptr ap, mp_size_t n, mp_limb_t bd, mp_limb_t h)
+{
+ mp_limb_t a, p0, p1, cy;
+ mp_size_t i;
+
+ for (i = 0; i < n; i++)
+ {
+ a = ap[i];
+ umul_ppmm (p1, p0, a, bd << GMP_NAIL_BITS);
+ p0 >>= GMP_NAIL_BITS;
+ cy = h < p0;
+ h = (h - p0) & GMP_NUMB_MASK;
+ qp[i] = h;
+ h = h - p1 - cy;
+ }
+
+ return h;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/bdiv_q.c b/vendor/gmp-6.3.0/mpn/generic/bdiv_q.c
new file mode 100644
index 0000000..52aa473
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/bdiv_q.c
@@ -0,0 +1,76 @@
+/* mpn_bdiv_q -- Hensel division with precomputed inverse, returning quotient.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n. */
+
+void
+mpn_bdiv_q (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr tp)
+{
+ mp_limb_t di;
+
+ if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+ {
+ MPN_COPY (tp, np, nn);
+ binvert_limb (di, dp[0]); di = -di;
+ mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+ }
+ else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+ {
+ MPN_COPY (tp, np, nn);
+ binvert_limb (di, dp[0]); di = -di;
+ mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+ }
+ else
+ {
+ mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
+ }
+ return;
+}
+
+mp_size_t
+mpn_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+ if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+ return nn;
+ else
+ return mpn_mu_bdiv_q_itch (nn, dn);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/bdiv_q_1.c b/vendor/gmp-6.3.0/mpn/generic/bdiv_q_1.c
new file mode 100644
index 0000000..6beb9a0
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/bdiv_q_1.c
@@ -0,0 +1,121 @@
+/* mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 1-limb
+ divisor, returning quotient only.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003, 2005, 2009, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_pi1_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d,
+ mp_limb_t di, int shift)
+{
+ mp_size_t i;
+ mp_limb_t c, h, l, u, u_next, dummy;
+
+ ASSERT (n >= 1);
+ ASSERT (d != 0);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (d);
+
+ d <<= GMP_NAIL_BITS;
+
+ if (shift != 0)
+ {
+ c = 0;
+
+ u = up[0];
+ rp--;
+ for (i = 1; i < n; i++)
+ {
+ u_next = up[i];
+ u = ((u >> shift) | (u_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+
+ SUBC_LIMB (c, l, u, c);
+
+ l = (l * di) & GMP_NUMB_MASK;
+ rp[i] = l;
+
+ umul_ppmm (h, dummy, l, d);
+ c += h;
+ u = u_next;
+ }
+
+ u = u >> shift;
+ SUBC_LIMB (c, l, u, c);
+
+ l = (l * di) & GMP_NUMB_MASK;
+ rp[n] = l;
+ }
+ else
+ {
+ u = up[0];
+ l = (u * di) & GMP_NUMB_MASK;
+ rp[0] = l;
+ c = 0;
+
+ for (i = 1; i < n; i++)
+ {
+ umul_ppmm (h, dummy, l, d);
+ c += h;
+
+ u = up[i];
+ SUBC_LIMB (c, l, u, c);
+
+ l = (l * di) & GMP_NUMB_MASK;
+ rp[i] = l;
+ }
+ }
+
+ return c;
+}
+
+mp_limb_t
+mpn_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d)
+{
+ mp_limb_t di;
+ int shift;
+
+ ASSERT (n >= 1);
+ ASSERT (d != 0);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (d);
+
+ count_trailing_zeros (shift, d);
+ d >>= shift;
+
+ binvert_limb (di, d);
+ return mpn_pi1_bdiv_q_1 (rp, up, n, d, di, shift);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/bdiv_qr.c b/vendor/gmp-6.3.0/mpn/generic/bdiv_qr.c
new file mode 100644
index 0000000..a4f0f39
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/bdiv_qr.c
@@ -0,0 +1,84 @@
+/* mpn_bdiv_qr -- Hensel division with precomputed inverse, returning quotient
+ and remainder.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n,
+ R = N - QD. */
+
+mp_limb_t
+mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr tp)
+{
+ mp_limb_t di;
+ mp_limb_t rh;
+
+ ASSERT (nn > dn);
+ if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+ BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
+ {
+ MPN_COPY (tp, np, nn);
+ binvert_limb (di, dp[0]); di = -di;
+ rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+ MPN_COPY (rp, tp + nn - dn, dn);
+ }
+ else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+ {
+ MPN_COPY (tp, np, nn);
+ binvert_limb (di, dp[0]); di = -di;
+ rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+ MPN_COPY (rp, tp + nn - dn, dn);
+ }
+ else
+ {
+ rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
+ }
+
+ return rh;
+}
+
+mp_size_t
+mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+ if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+ return nn;
+ else
+ return mpn_mu_bdiv_qr_itch (nn, dn);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/binvert.c b/vendor/gmp-6.3.0/mpn/generic/binvert.c
new file mode 100644
index 0000000..a170e66
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/binvert.c
@@ -0,0 +1,106 @@
+/* Compute {up,n}^(-1) mod B^n.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2004-2007, 2009, 2012, 2017, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/*
+ r[k+1] = r[k] - r[k] * (u*r[k] - 1)
+ r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
+#endif
+
+mp_size_t
+mpn_binvert_itch (mp_size_t n)
+{
+ mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
+ return itch_local + itch_out;
+}
+
+void
+mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+ mp_ptr xp;
+ mp_size_t rn, newrn;
+ mp_size_t sizes[NPOWS], *sizp;
+ mp_limb_t di;
+
+ /* Compute the computation precisions from highest to lowest, leaving the
+ base case size in 'rn'. */
+ sizp = sizes;
+ for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
+ *sizp++ = rn;
+
+ xp = scratch;
+
+ /* Compute a base value of rn limbs. */
+ MPN_ZERO (xp, rn);
+ xp[0] = 1;
+ binvert_limb (di, up[0]);
+ if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
+ mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+ else
+ mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+
+ mpn_neg (rp, rp, rn);
+
+ /* Use Newton iterations to get the desired precision. */
+ for (; rn < n; rn = newrn)
+ {
+ mp_size_t m;
+ newrn = *--sizp;
+
+ /* X <- UR. */
+ m = mpn_mulmod_bnm1_next_size (newrn);
+ mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
+ /* Only the values in the range xp + rn .. xp + newrn - 1 are
+ used by the _mullo_n below.
+ Since m >= newrn, we do not need the following. */
+ /* mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1); */
+
+ /* R = R(X/B^rn) */
+ mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
+ mpn_neg (rp + rn, rp + rn, newrn - rn);
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/broot.c b/vendor/gmp-6.3.0/mpn/generic/broot.c
new file mode 100644
index 0000000..02fe75a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/broot.c
@@ -0,0 +1,195 @@
+/* mpn_broot -- Compute hensel sqrt
+
+ Contributed to the GNU project by Niels Möller
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+ typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+ mp_limb_t r = 1;
+ mp_limb_t s = a;
+
+ for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+ if (e & 1)
+ r *= s;
+
+ return r;
+}
+
+/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
+
+ Iterates
+
+ r' <-- r - r * (a^{k-1} r^k - 1) / n
+
+ If
+
+ a^{k-1} r^k = 1 (mod 2^m),
+
+ then
+
+ a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+ Compute the update term as
+
+ r' = r - (a^{k-1} r^{k+1} - r) / k
+
+ where we still have cancellation of low limbs.
+
+ */
+void
+mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+ mp_size_t sizes[GMP_LIMB_BITS * 2];
+ mp_ptr akm1, tp, rnp, ep;
+ mp_limb_t a0, r0, km1, kp1h, kinv;
+ mp_size_t rn;
+ unsigned i;
+
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT (ap[0] & 1);
+ ASSERT (k & 1);
+ ASSERT (k >= 3);
+
+ TMP_MARK;
+
+ akm1 = TMP_ALLOC_LIMBS (4*n);
+ tp = akm1 + n;
+
+ km1 = k-1;
+ /* FIXME: Could arrange the iteration so we don't need to compute
+ this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
+ that we can use wraparound also for a*r, since the low half is
+ unchanged from the previous iteration. Or possibly mulmid. Also,
+ a r = a^{1/k}, so we get that value too, for free? */
+ mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
+
+ a0 = ap[0];
+ binvert_limb (kinv, k);
+
+ /* 4 bits: a^{1/k - 1} (mod 16):
+
+ a % 8
+ 1 3 5 7
+ k%4 +-------
+ 1 |1 1 1 1
+ 3 |1 9 9 1
+ */
+ r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+ {
+ unsigned prec = 32;
+ do
+ {
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
+ prec *= 2;
+ }
+ while (prec < GMP_NUMB_BITS);
+ }
+#endif
+
+ rp[0] = r0;
+ if (n == 1)
+ {
+ TMP_FREE;
+ return;
+ }
+
+ /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
+ kp1h = k/2 + 1;
+
+ /* FIXME: Special case for two limb iteration. */
+ rnp = TMP_ALLOC_LIMBS (2*n + 1);
+ ep = rnp + n;
+
+ /* FIXME: Possible to this on the fly with some bit fiddling. */
+ for (i = 0; n > 1; n = (n + 1)/2)
+ sizes[i++] = n;
+
+ rn = 1;
+
+ while (i-- > 0)
+ {
+ /* Compute x^{k+1}. */
+ mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+ final iteration. */
+ mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
+
+ /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
+
+ mpn_mullo_n (ep, rnp, akm1, sizes[i]);
+ ASSERT (mpn_cmp (ep, rp, rn) == 0);
+
+ ASSERT (sizes[i] <= 2*rn);
+ mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
+ mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
+ rn = sizes[i];
+ }
+ TMP_FREE;
+}
+
+/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
+void
+mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT (ap[0] & 1);
+ ASSERT (k & 1);
+
+ if (k == 1)
+ {
+ MPN_COPY (rp, ap, n);
+ return;
+ }
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (n);
+
+ mpn_broot_invm1 (tp, ap, n, k);
+ mpn_mullo_n (rp, tp, ap, n);
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/brootinv.c b/vendor/gmp-6.3.0/mpn/generic/brootinv.c
new file mode 100644
index 0000000..e91b597
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/brootinv.c
@@ -0,0 +1,159 @@
+/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
+
+ Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Computes a^2e (mod B). Uses right-to-left binary algorithm, since
+ typical use will have e small. */
+static mp_limb_t
+powsquaredlimb (mp_limb_t a, mp_limb_t e)
+{
+ mp_limb_t r;
+
+ r = 1;
+ /* if (LIKELY (e != 0)) */
+ do {
+ a *= a;
+ if (e & 1)
+ r *= a;
+ e >>= 1;
+ } while (e != 0);
+
+ return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
+
+ Iterates
+ r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
+ using Hensel lifting, each time doubling the number of known bits in r.
+
+ Works just for odd k. Else the Hensel lifting degenerates.
+
+ FIXME:
+
+ (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
+
+ (2) Rewrite iteration as
+ r' <-- r - k^{-1} r (r^k y - 1)
+ and take advantage of the zero low part of r^k y - 1.
+
+ (3) Use wrap-around trick.
+
+ (4) Use a small table to get starting value.
+
+ Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo
+ Currently mpn_powlo requires 3*bn
+ so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
+
+void
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
+{
+ mp_ptr tp2, tp3;
+ mp_limb_t kinv, k2, r0, y0;
+ mp_size_t order[GMP_LIMB_BITS + 1];
+ int d;
+
+ ASSERT (bn > 0);
+ ASSERT ((k & 1) != 0);
+
+ tp2 = tp + bn;
+ tp3 = tp + bn + ((bn + 3) >> 1);
+ k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */
+
+ binvert_limb (kinv, k);
+
+ /* 4-bit initial approximation:
+
+ y%16 | 1 3 5 7 9 11 13 15,
+ k%4 +-------------------------+k2%2
+ 1 | 1 11 13 7 9 3 5 15 | 1
+ 3 | 1 3 5 7 9 11 13 15 | 0
+
+ */
+ y0 = yp[0];
+
+ r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8); /* 4 bits */
+ r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f)); /* 8 bits */
+ r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff)); /* 16 bits */
+#if GMP_NUMB_BITS > 16
+ {
+ unsigned prec = 16;
+ do
+ {
+ r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2));
+ prec *= 2;
+ }
+ while (prec < GMP_NUMB_BITS);
+ }
+#endif
+
+ rp[0] = r0;
+ if (bn == 1)
+ return;
+
+ d = 0;
+ for (; bn != 2; bn = (bn + 1) >> 1)
+ order[d++] = bn;
+
+ order[d] = 2;
+ bn = 1;
+
+ do
+ {
+ mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */
+ tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1);
+
+ bn = order[d];
+
+ mpn_powlo (rp, tp, &k2, 1, bn, tp3);
+ mpn_mullo_n (tp, yp, rp, bn);
+
+ /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */
+ /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/
+ {
+ mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */
+ int borrow;
+ borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0;
+ if (bn > pbn) /* 3 < bn */
+ {
+ if (borrow)
+ mpn_com (tp + pbn, tp + pbn, bn - pbn);
+ else
+ mpn_neg (tp + pbn, tp + pbn, bn - pbn);
+ }
+ }
+ mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0);
+ }
+ while (--d >= 0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/bsqrt.c b/vendor/gmp-6.3.0/mpn/generic/bsqrt.c
new file mode 100644
index 0000000..27184f0
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/bsqrt.c
@@ -0,0 +1,47 @@
+/* mpn_bsqrt, a^{1/2} (mod 2^n).
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+void
+mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
+{
+ mp_ptr sp;
+ mp_size_t n;
+
+ ASSERT (nb > 0);
+
+ n = nb / GMP_NUMB_BITS;
+ sp = tp + n;
+
+ mpn_bsqrtinv (tp, ap, nb, sp);
+ mpn_mullo_n (rp, tp, ap, n);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/bsqrtinv.c b/vendor/gmp-6.3.0/mpn/generic/bsqrtinv.c
new file mode 100644
index 0000000..c286773
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/bsqrtinv.c
@@ -0,0 +1,103 @@
+/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
+
+ Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
+ Return non-zero if such an integer r exists.
+
+ Iterates
+ r' <-- (3r - r^3 y) / 2
+ using Hensel lifting. Since we divide by two, the Hensel lifting is
+ somewhat degenerates. Therefore, we lift from 2^b to 2^{b+1}-1.
+
+ FIXME:
+ (1) Simplify to do precision book-keeping in limbs rather than bits.
+
+ (2) Rewrite iteration as
+ r' <-- r - r (r^2 y - 1) / 2
+ and take advantage of zero low part of r^2 y - 1.
+
+ (3) Use wrap-around trick.
+
+ (4) Use a small table to get starting value.
+*/
+int
+mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
+{
+ mp_ptr tp2;
+ mp_size_t bn, order[GMP_LIMB_BITS + 1];
+ int i, d;
+
+ ASSERT (bnb > 0);
+
+ bn = 1 + bnb / GMP_LIMB_BITS;
+
+ tp2 = tp + bn;
+
+ rp[0] = 1;
+ if (bnb == 1)
+ {
+ if ((yp[0] & 3) != 1)
+ return 0;
+ }
+ else
+ {
+ if ((yp[0] & 7) != 1)
+ return 0;
+
+ d = 0;
+ for (; bnb != 2; bnb = (bnb + 2) >> 1)
+ order[d++] = bnb;
+
+ for (i = d - 1; i >= 0; i--)
+ {
+ bnb = order[i];
+ bn = 1 + bnb / GMP_LIMB_BITS;
+
+ mpn_sqrlo (tp, rp, bn);
+ mpn_mullo_n (tp2, rp, tp, bn); /* tp2 <- rp ^ 3 */
+
+ mpn_mul_1 (tp, rp, bn, 3);
+
+ mpn_mullo_n (rp, yp, tp2, bn);
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (rp, tp, rp, bn);
+#else
+ mpn_sub_n (tp2, tp, rp, bn);
+ mpn_rshift (rp, tp2, bn, 1);
+#endif
+ }
+ }
+ return 1;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/cmp.c b/vendor/gmp-6.3.0/mpn/generic/cmp.c
new file mode 100644
index 0000000..940314b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/cmp.c
@@ -0,0 +1,33 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_cmp 1
+
+#include "gmp-impl.h"
diff --git a/vendor/gmp-6.3.0/mpn/generic/cnd_add_n.c b/vendor/gmp-6.3.0/mpn/generic/cnd_add_n.c
new file mode 100644
index 0000000..e6b1373
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/cnd_add_n.c
@@ -0,0 +1,69 @@
+/* mpn_cnd_add_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
+ Both cases should take the same time and perform the exact same memory
+ accesses, since this function is intended to be used where side-channel
+ attack resilience is relevant.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+ mask = -(mp_limb_t) (cnd != 0);
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+ sl = ul + vl;
+ cy1 = sl < ul;
+ rl = sl + cy;
+ cy2 = rl < sl;
+ cy = cy1 | cy2;
+ *rp++ = rl;
+#else
+ rl = ul + vl;
+ rl += cy;
+ cy = rl >> GMP_NUMB_BITS;
+ *rp++ = rl & GMP_NUMB_MASK;
+#endif
+ }
+ while (--n != 0);
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/cnd_sub_n.c b/vendor/gmp-6.3.0/mpn/generic/cnd_sub_n.c
new file mode 100644
index 0000000..d04ad8a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/cnd_sub_n.c
@@ -0,0 +1,69 @@
+/* mpn_cnd_sub_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+ Both cases should take the same time and perform the exact same memory
+ accesses, since this function is intended to be used where side-channel
+ attack resilience is relevant.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+ mask = -(mp_limb_t) (cnd != 0);
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+ sl = ul - vl;
+ cy1 = sl > ul;
+ rl = sl - cy;
+ cy2 = rl > sl;
+ cy = cy1 | cy2;
+ *rp++ = rl;
+#else
+ rl = ul - vl;
+ rl -= cy;
+ cy = rl >> (GMP_LIMB_BITS - 1);
+ *rp++ = rl & GMP_NUMB_MASK;
+#endif
+ }
+ while (--n != 0);
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/cnd_swap.c b/vendor/gmp-6.3.0/mpn/generic/cnd_swap.c
new file mode 100644
index 0000000..83d856d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/cnd_swap.c
@@ -0,0 +1,50 @@
+/* mpn_cnd_swap
+
+ Contributed to the GNU project by Niels Möller
+
+Copyright 2013, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp,
+ mp_size_t n)
+{
+ volatile mp_limb_t mask = - (mp_limb_t) (cnd != 0);
+ mp_size_t i;
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t a, b, t;
+ a = ap[i];
+ b = bp[i];
+ t = (a ^ b) & mask;
+ ap[i] = a ^ t;
+ bp[i] = b ^ t;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/com.c b/vendor/gmp-6.3.0/mpn/generic/com.c
new file mode 100644
index 0000000..4de5824
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/com.c
@@ -0,0 +1,44 @@
+/* mpn_com - complement an mpn.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef mpn_com
+#define mpn_com __MPN(com)
+
+void
+mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_limb_t ul;
+ do {
+ ul = *up++;
+ *rp++ = ~ul & GMP_NUMB_MASK;
+ } while (--n != 0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/comb_tables.c b/vendor/gmp-6.3.0/mpn/generic/comb_tables.c
new file mode 100644
index 0000000..dedb77b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/comb_tables.c
@@ -0,0 +1,47 @@
+/* Const tables shared among combinatoric functions.
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+ BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
+ is an odd integer. */
+const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
+
+/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
+ is an odd integer. */
+const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
+
+/* Entry i contains 2i-popc(2i). */
+const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
+
+const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };
diff --git a/vendor/gmp-6.3.0/mpn/generic/compute_powtab.c b/vendor/gmp-6.3.0/mpn/generic/compute_powtab.c
new file mode 100644
index 0000000..f4fbc64
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/compute_powtab.c
@@ -0,0 +1,373 @@
+/* mpn_compute_powtab.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/*
+ CAVEATS:
+ * The exptab and powtab vectors are in opposite orders. Probably OK.
+ * Consider getting rid of exptab, doing bit ops on the un argument instead.
+ * Consider rounding greatest power slightly upwards to save adjustments.
+ * In powtab_decide, consider computing cost from just the 2-3 largest
+ operands, since smaller operand contribute little. This makes most sense
+ if exptab is suppressed.
+*/
+
+#include "gmp-impl.h"
+
+#ifndef DIV_1_VS_MUL_1_PERCENT
+#define DIV_1_VS_MUL_1_PERCENT 150
+#endif
+
+#define SET_powers_t(dest, ptr, size, dib, b, sh) \
+ do { \
+ dest.p = ptr; \
+ dest.n = size; \
+ dest.digits_in_base = dib; \
+ dest.base = b; \
+ dest.shift = sh; \
+ } while (0)
+
+#if DIV_1_VS_MUL_1_PERCENT > 120
+#define HAVE_mpn_compute_powtab_mul 1
+static void
+mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
+ int base, const size_t *exptab, size_t n_pows)
+{
+ mp_size_t n;
+ mp_ptr p, t;
+ mp_limb_t cy;
+ long start_idx;
+ int c;
+
+ mp_limb_t big_base = mp_bases[base].big_base;
+ int chars_per_limb = mp_bases[base].chars_per_limb;
+
+ mp_ptr powtab_mem_ptr = powtab_mem;
+
+ size_t digits_in_base = chars_per_limb;
+
+ powers_t *pt = powtab;
+
+ p = powtab_mem_ptr;
+ powtab_mem_ptr += 1;
+ p[0] = big_base;
+
+ SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
+ pt++;
+
+ t = powtab_mem_ptr;
+ powtab_mem_ptr += 2;
+ t[1] = mpn_mul_1 (t, p, 1, big_base);
+ n = 2;
+
+ digits_in_base *= 2;
+
+ c = t[0] == 0;
+ t += c;
+ n -= c;
+ mp_size_t shift = c;
+
+ SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+ p = t;
+ pt++;
+
+ if (exptab[0] == ((size_t) chars_per_limb << n_pows))
+ {
+ start_idx = n_pows - 2;
+ }
+ else
+ {
+ if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
+ {
+ /* 3, sometimes adjusted to 4. */
+ t = powtab_mem_ptr;
+ powtab_mem_ptr += 4;
+ t[n] = cy = mpn_mul_1 (t, p, n, big_base);
+ n += cy != 0;;
+
+ digits_in_base += chars_per_limb;
+
+ c = t[0] == 0;
+ t += c;
+ n -= c;
+ shift += c;
+ }
+ else
+ {
+ /* 2 copy, will always become 3 with back-multiplication. */
+ t = powtab_mem_ptr;
+ powtab_mem_ptr += 3;
+ t[0] = p[0];
+ t[1] = p[1];
+ }
+
+ SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+ p = t;
+ pt++;
+ start_idx = n_pows - 3;
+ }
+
+ for (long pi = start_idx; pi >= 0; pi--)
+ {
+ t = powtab_mem_ptr;
+ powtab_mem_ptr += 2 * n + 2;
+
+ ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
+
+ mpn_sqr (t, p, n);
+
+ digits_in_base *= 2;
+ n *= 2;
+ n -= t[n - 1] == 0;
+ shift *= 2;
+
+ c = t[0] == 0;
+ t += c;
+ n -= c;
+ shift += c;
+
+ /* Adjust new value if it is too small as input to the next squaring. */
+ if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
+ {
+ t[n] = cy = mpn_mul_1 (t, t, n, big_base);
+ n += cy != 0;
+
+ digits_in_base += chars_per_limb;
+
+ c = t[0] == 0;
+ t += c;
+ n -= c;
+ shift += c;
+ }
+
+ SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+
+ /* Adjust previous value if it is not at its target power. */
+ if (pt[-1].digits_in_base < exptab[pi + 1])
+ {
+ mp_size_t n = pt[-1].n;
+ mp_ptr p = pt[-1].p;
+ p[n] = cy = mpn_mul_1 (p, p, n, big_base);
+ n += cy != 0;
+
+ ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
+ pt[-1].digits_in_base = exptab[pi + 1];
+
+ c = p[0] == 0;
+ pt[-1].p = p + c;
+ pt[-1].n = n - c;
+ pt[-1].shift += c;
+ }
+
+ p = t;
+ pt++;
+ }
+}
+#endif
+
+#if DIV_1_VS_MUL_1_PERCENT < 275
+#define HAVE_mpn_compute_powtab_div 1
+static void
+mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
+ int base, const size_t *exptab, size_t n_pows)
+{
+ mp_ptr p, t;
+
+ mp_limb_t big_base = mp_bases[base].big_base;
+ int chars_per_limb = mp_bases[base].chars_per_limb;
+
+ mp_ptr powtab_mem_ptr = powtab_mem;
+
+ size_t digits_in_base = chars_per_limb;
+
+ powers_t *pt = powtab;
+
+ p = powtab_mem_ptr;
+ powtab_mem_ptr += 1;
+ p[0] = big_base;
+
+ SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
+ pt++;
+
+ mp_size_t n = 1;
+ mp_size_t shift = 0;
+ for (long pi = n_pows - 1; pi >= 0; pi--)
+ {
+ t = powtab_mem_ptr;
+ powtab_mem_ptr += 2 * n;
+
+ ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
+
+ mpn_sqr (t, p, n);
+ n = 2 * n - 1; n += t[n] != 0;
+ digits_in_base *= 2;
+
+ if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */
+ {
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
+ if (__GMP_LIKELY (base == 10))
+ mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
+ MP_BASES_BIG_BASE_BINVERTED_10,
+ MP_BASES_BIG_BASE_CTZ_10);
+ else
+#endif
+ /* FIXME: We could use _pi1 here if we add big_base_binverted and
+ big_base_ctz fields to struct bases. That would add about 2 KiB
+ to mp_bases.c.
+ FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
+ mpn_bdiv_q_1 for more machines. */
+ mpn_divexact_1 (t, t, n, big_base);
+
+ n -= t[n - 1] == 0;
+ digits_in_base -= chars_per_limb;
+ }
+
+ shift *= 2;
+ /* Strip low zero limbs, but be careful to keep the result divisible by
+ big_base. */
+ while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
+ {
+ t++;
+ n--;
+ shift++;
+ }
+ p = t;
+
+ SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
+ pt++;
+ }
+
+ /* Strip any remaining low zero limbs. */
+ pt -= n_pows + 1;
+ for (long pi = n_pows; pi >= 0; pi--)
+ {
+ mp_ptr t = pt[pi].p;
+ mp_size_t shift = pt[pi].shift;
+ mp_size_t n = pt[pi].n;
+ int c;
+ c = t[0] == 0;
+ t += c;
+ n -= c;
+ shift += c;
+ pt[pi].p = t;
+ pt[pi].shift = shift;
+ pt[pi].n = n;
+ }
+}
+#endif
+
+static long
+powtab_decide (size_t *exptab, size_t un, int base)
+{
+ int chars_per_limb = mp_bases[base].chars_per_limb;
+ long n_pows = 0;
+ for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
+ {
+ exptab[n_pows] = pn * chars_per_limb;
+ n_pows++;
+ }
+ exptab[n_pows] = chars_per_limb;
+
+#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
+ size_t pn = un - 1;
+ size_t xn = (un + 1) >> 1;
+ unsigned mcost = 1;
+ unsigned dcost = 1;
+ for (long i = n_pows - 2; i >= 0; i--)
+ {
+ size_t pow = (pn >> (i + 1)) + 1;
+
+ if (pow & 1)
+ dcost += pow;
+
+ if (xn != (pow << i))
+ {
+ if (pow > 2 && (pow & 1) == 0)
+ mcost += 2 * pow;
+ else
+ mcost += pow;
+ }
+ else
+ {
+ if (pow & 1)
+ mcost += pow;
+ }
+ }
+
+ dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
+
+ if (mcost <= dcost)
+ return n_pows;
+ else
+ return -n_pows;
+#elif HAVE_mpn_compute_powtab_mul
+ return n_pows;
+#elif HAVE_mpn_compute_powtab_div
+ return -n_pows;
+#else
+#error "no powtab function available"
+#endif
+}
+
+size_t
+mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
+{
+ size_t exptab[GMP_LIMB_BITS];
+
+ long n_pows = powtab_decide (exptab, un, base);
+
+#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
+ if (n_pows >= 0)
+ {
+ mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
+ return n_pows;
+ }
+ else
+ {
+ mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
+ return -n_pows;
+ }
+#elif HAVE_mpn_compute_powtab_mul
+ ASSERT (n_pows > 0);
+ mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
+ return n_pows;
+#elif HAVE_mpn_compute_powtab_div
+ ASSERT (n_pows < 0);
+ mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
+ return -n_pows;
+#else
+#error "no powtab function available"
+#endif
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/copyd.c b/vendor/gmp-6.3.0/mpn/generic/copyd.c
new file mode 100644
index 0000000..7def007
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/copyd.c
@@ -0,0 +1,40 @@
+/* mpn_copyd
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_copyd (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_size_t i;
+
+ for (i = n - 1; i >= 0; i--)
+ rp[i] = up[i];
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/copyi.c b/vendor/gmp-6.3.0/mpn/generic/copyi.c
new file mode 100644
index 0000000..736e0b5
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/copyi.c
@@ -0,0 +1,42 @@
+/* mpn_copyi
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_copyi (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_size_t i;
+
+ up += n;
+ rp += n;
+ for (i = -n; i != 0; i++)
+ rp[i] = up[i];
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_q.c b/vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_q.c
new file mode 100644
index 0000000..3c21818
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_q.c
@@ -0,0 +1,161 @@
+/* mpn_dcpi1_bdiv_q -- divide-and-conquer Hensel division with precomputed
+ inverse, returning quotient.
+
+ Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009-2011, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+#if 0 /* unused, so leave out for now */
+static mp_size_t
+mpn_dcpi1_bdiv_q_n_itch (mp_size_t n)
+{
+ /* NOTE: Depends on mullo_n and mpn_dcpi1_bdiv_qr_n interface */
+ return n;
+}
+#endif
+
+/* Computes Q = - N / D mod B^n, destroys N.
+
+ N = {np,n}
+ D = {dp,n}
+*/
+
+static void
+mpn_dcpi1_bdiv_q_n (mp_ptr qp,
+ mp_ptr np, mp_srcptr dp, mp_size_t n,
+ mp_limb_t dinv, mp_ptr tp)
+{
+ while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
+ {
+ mp_size_t lo, hi;
+ mp_limb_t cy;
+
+ lo = n >> 1; /* floor(n/2) */
+ hi = n - lo; /* ceil(n/2) */
+
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+ mpn_mullo_n (tp, qp, dp + hi, lo);
+ mpn_add_n (np + hi, np + hi, tp, lo);
+
+ if (lo < hi)
+ {
+ cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]);
+ np[n - 1] += cy;
+ }
+ qp += lo;
+ np += lo;
+ n -= lo;
+ }
+ mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
+}
+
+/* Computes Q = - N / D mod B^nn, destroys N.
+
+ N = {np,nn}
+ D = {dp,dn}
+*/
+
+void
+mpn_dcpi1_bdiv_q (mp_ptr qp,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv)
+{
+ mp_size_t qn;
+ mp_limb_t cy;
+ mp_ptr tp;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ ASSERT (dn >= 2);
+ ASSERT (nn - dn >= 0);
+ ASSERT (dp[0] & 1);
+
+ tp = TMP_SALLOC_LIMBS (dn);
+
+ qn = nn;
+
+ if (qn > dn)
+ {
+ /* Reduce qn mod dn in a super-efficient manner. */
+ do
+ qn -= dn;
+ while (qn > dn);
+
+ /* Perform the typically smaller block first. */
+ if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+ cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+ else
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+ else
+ mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+ mpn_incr_u (tp + qn, cy);
+
+ mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+ cy = 0;
+ }
+
+ np += qn;
+ qp += qn;
+
+ qn = nn - qn;
+ while (qn > dn)
+ {
+ mpn_add_1 (np + dn, np + dn, qn - dn, cy);
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+ qp += dn;
+ np += dn;
+ qn -= dn;
+ }
+ mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
+ mpn_sbpi1_bdiv_q (qp, np, qn, dp, qn, dinv);
+ else
+ mpn_dcpi1_bdiv_q_n (qp, np, dp, qn, dinv, tp);
+ }
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_qr.c b/vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_qr.c
new file mode 100644
index 0000000..11da44f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dcpi1_bdiv_qr.c
@@ -0,0 +1,176 @@
+/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
+ inverse, returning quotient and remainder.
+
+ Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
+
+ Output:
+
+ q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
+
+ r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
+
+ Stores q at qp. Stores the n least significant limbs of r at the high half
+ of np, and returns the carry from the addition n + q*d.
+
+ d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
+
+mp_size_t
+mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
+{
+ return n;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+ mp_limb_t dinv, mp_ptr tp)
+{
+ mp_size_t lo, hi;
+ mp_limb_t cy;
+ mp_limb_t rh;
+
+ lo = n >> 1; /* floor(n/2) */
+ hi = n - lo; /* ceil(n/2) */
+
+ if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
+ cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
+ else
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+ mpn_mul (tp, dp + lo, hi, qp, lo);
+
+ mpn_incr_u (tp + lo, cy);
+ rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
+
+ if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
+ cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
+ else
+ cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
+
+ mpn_mul (tp, qp + lo, hi, dp + hi, lo);
+
+ mpn_incr_u (tp + hi, cy);
+ rh += mpn_add_n (np + n, np + n, tp, n);
+
+ return rh;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+ mp_size_t qn;
+ mp_limb_t rr, cy;
+ mp_ptr tp;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ ASSERT (dn >= 2); /* to adhere to mpn_sbpi1_div_qr's limits */
+ ASSERT (nn - dn >= 1); /* to adhere to mpn_sbpi1_div_qr's limits */
+ ASSERT (dp[0] & 1);
+
+ tp = TMP_SALLOC_LIMBS (dn);
+
+ qn = nn - dn;
+
+ if (qn > dn)
+ {
+ /* Reduce qn mod dn without division, optimizing small operations. */
+ do
+ qn -= dn;
+ while (qn > dn);
+
+ /* Perform the typically smaller block first. */
+ if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+ cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+ else
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+ rr = 0;
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+ else
+ mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+ mpn_incr_u (tp + qn, cy);
+
+ rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+ cy = 0;
+ }
+
+ np += qn;
+ qp += qn;
+
+ qn = nn - dn - qn;
+ do
+ {
+ rr += mpn_add_1 (np + dn, np + dn, qn, cy);
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+ qp += dn;
+ np += dn;
+ qn -= dn;
+ }
+ while (qn > 0);
+ TMP_FREE;
+ return rr + cy;
+ }
+
+ if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+ cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+ else
+ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+ rr = 0;
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+ else
+ mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+ mpn_incr_u (tp + qn, cy);
+
+ rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+ cy = 0;
+ }
+
+ TMP_FREE;
+ return rr + cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dcpi1_div_q.c b/vendor/gmp-6.3.0/mpn/generic/dcpi1_div_q.c
new file mode 100644
index 0000000..1905c98
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dcpi1_div_q.c
@@ -0,0 +1,86 @@
+/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient
+ only.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+ mp_ptr tp, wp;
+ mp_limb_t qh;
+ mp_size_t qn;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ ASSERT (dn >= 6);
+ ASSERT (nn - dn >= 3);
+ ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+ tp = TMP_ALLOC_LIMBS (nn + 1);
+ MPN_COPY (tp + 1, np, nn);
+ tp[0] = 0;
+
+ qn = nn - dn;
+ wp = TMP_ALLOC_LIMBS (qn + 1);
+
+ qh = mpn_dcpi1_divappr_q (wp, tp, nn + 1, dp, dn, dinv);
+
+ if (wp[0] == 0)
+ {
+ mp_limb_t cy;
+
+ if (qn > dn)
+ mpn_mul (tp, wp + 1, qn, dp, dn);
+ else
+ mpn_mul (tp, dp, dn, wp + 1, qn);
+
+ cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;
+
+ if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+ qh -= mpn_sub_1 (qp, wp + 1, qn, 1);
+ else /* Same as below */
+ MPN_COPY (qp, wp + 1, qn);
+ }
+ else
+ MPN_COPY (qp, wp + 1, qn);
+
+ TMP_FREE;
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dcpi1_div_qr.c b/vendor/gmp-6.3.0/mpn/generic/dcpi1_div_qr.c
new file mode 100644
index 0000000..d7a65f8
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dcpi1_div_qr.c
@@ -0,0 +1,248 @@
+/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
+ size operands.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+ gmp_pi1_t *dinv, mp_ptr tp)
+{
+ mp_size_t lo, hi;
+ mp_limb_t cy, qh, ql;
+
+ lo = n >> 1; /* floor(n/2) */
+ hi = n - lo; /* ceil(n/2) */
+
+ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+ qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+ else
+ qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+ mpn_mul (tp, qp + lo, hi, dp, lo);
+
+ cy = mpn_sub_n (np + lo, np + lo, tp, n);
+ if (qh != 0)
+ cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+ while (cy != 0)
+ {
+ qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+ cy -= mpn_add_n (np + lo, np + lo, dp, n);
+ }
+
+ if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
+ ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+ else
+ ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+ mpn_mul (tp, dp, hi, qp, lo);
+
+ cy = mpn_sub_n (np, np, tp, n);
+ if (ql != 0)
+ cy += mpn_sub_n (np + lo, np + lo, dp, hi);
+
+ while (cy != 0)
+ {
+ mpn_sub_1 (qp, qp, lo, 1);
+ cy -= mpn_add_n (np, np, dp, n);
+ }
+
+ return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_div_qr (mp_ptr qp,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ gmp_pi1_t *dinv)
+{
+ mp_size_t qn;
+ mp_limb_t qh, cy;
+ mp_ptr tp;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ ASSERT (dn >= 6); /* to adhere to mpn_sbpi1_div_qr's limits */
+ ASSERT (nn - dn >= 3); /* to adhere to mpn_sbpi1_div_qr's limits */
+ ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+ tp = TMP_ALLOC_LIMBS (dn);
+
+ qn = nn - dn;
+ qp += qn;
+ np += nn;
+ dp += dn;
+
+ if (qn > dn)
+ {
+ /* Reduce qn mod dn without division, optimizing small operations. */
+ do
+ qn -= dn;
+ while (qn > dn);
+
+ qp -= qn; /* point at low limb of next quotient block */
+ np -= qn; /* point in the middle of partial remainder */
+
+ /* Perform the typically smaller block first. */
+ if (qn == 1)
+ {
+ mp_limb_t q, n2, n1, n0, d1, d0;
+
+ /* Handle qh up front, for simplicity. */
+ qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+ if (qh)
+ ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+ /* A single iteration of schoolbook: One 3/2 division,
+ followed by the bignum update and adjustment. */
+ n2 = np[0];
+ n1 = np[-1];
+ n0 = np[-2];
+ d1 = dp[-1];
+ d0 = dp[-2];
+
+ ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+ if (UNLIKELY (n2 == d1) && n1 == d0)
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+ ASSERT (cy == n2);
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+ if (dn > 2)
+ {
+ mp_limb_t cy, cy1;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 = (n1 - cy1) & GMP_NUMB_MASK;
+ np[-2] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+ qh -= (q == 0);
+ q = (q - 1) & GMP_NUMB_MASK;
+ }
+ }
+ else
+ np[-2] = n0;
+
+ np[-1] = n1;
+ }
+ qp[0] = q;
+ }
+ else
+ {
+ /* Do a 2qn / qn division */
+ if (qn == 2)
+ qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
+ else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+ qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+ else
+ qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+ else
+ mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+ cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+ if (qh != 0)
+ cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+ while (cy != 0)
+ {
+ qh -= mpn_sub_1 (qp, qp, qn, 1);
+ cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+ }
+ }
+ }
+
+ qn = nn - dn - qn;
+ do
+ {
+ qp -= dn;
+ np -= dn;
+ mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+ qn -= dn;
+ }
+ while (qn > 0);
+ }
+ else
+ {
+ qp -= qn; /* point at low limb of next quotient block */
+ np -= qn; /* point in the middle of partial remainder */
+
+ if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+ qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+ else
+ qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+ else
+ mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+ cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+ if (qh != 0)
+ cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+ while (cy != 0)
+ {
+ qh -= mpn_sub_1 (qp, qp, qn, 1);
+ cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+ }
+ }
+ }
+
+ TMP_FREE;
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dcpi1_divappr_q.c b/vendor/gmp-6.3.0/mpn/generic/dcpi1_divappr_q.c
new file mode 100644
index 0000000..0abe04e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dcpi1_divappr_q.c
@@ -0,0 +1,256 @@
+/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
+ quotient. The quotient returned is either correct, or one too large.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static mp_limb_t
+mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+ gmp_pi1_t *dinv, mp_ptr tp)
+{
+ mp_size_t lo, hi;
+ mp_limb_t cy, qh, ql;
+
+ lo = n >> 1; /* floor(n/2) */
+ hi = n - lo; /* ceil(n/2) */
+
+ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+ qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+ else
+ qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+ mpn_mul (tp, qp + lo, hi, dp, lo);
+
+ cy = mpn_sub_n (np + lo, np + lo, tp, n);
+ if (qh != 0)
+ cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+ while (cy != 0)
+ {
+ qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+ cy -= mpn_add_n (np + lo, np + lo, dp, n);
+ }
+
+ if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
+ ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+ else
+ ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+ if (UNLIKELY (ql != 0))
+ {
+ mp_size_t i;
+ for (i = 0; i < lo; i++)
+ qp[i] = GMP_NUMB_MASK;
+ }
+
+ return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+ mp_size_t qn;
+ mp_limb_t qh, cy, qsave;
+ mp_ptr tp;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ ASSERT (dn >= 6);
+ ASSERT (nn > dn);
+ ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+ qn = nn - dn;
+ qp += qn;
+ np += nn;
+ dp += dn;
+
+ if (qn >= dn)
+ {
+ qn++; /* pretend we'll need an extra limb */
+ /* Reduce qn mod dn without division, optimizing small operations. */
+ do
+ qn -= dn;
+ while (qn > dn);
+
+ qp -= qn; /* point at low limb of next quotient block */
+ np -= qn; /* point in the middle of partial remainder */
+
+ tp = TMP_SALLOC_LIMBS (dn);
+
+ /* Perform the typically smaller block first. */
+ if (qn == 1)
+ {
+ mp_limb_t q, n2, n1, n0, d1, d0;
+
+ /* Handle qh up front, for simplicity. */
+ qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+ if (qh)
+ ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+ /* A single iteration of schoolbook: One 3/2 division,
+ followed by the bignum update and adjustment. */
+ n2 = np[0];
+ n1 = np[-1];
+ n0 = np[-2];
+ d1 = dp[-1];
+ d0 = dp[-2];
+
+ ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+ if (UNLIKELY (n2 == d1) && n1 == d0)
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+ ASSERT (cy == n2);
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+ if (dn > 2)
+ {
+ mp_limb_t cy, cy1;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 = (n1 - cy1) & GMP_NUMB_MASK;
+ np[-2] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+ qh -= (q == 0);
+ q = (q - 1) & GMP_NUMB_MASK;
+ }
+ }
+ else
+ np[-2] = n0;
+
+ np[-1] = n1;
+ }
+ qp[0] = q;
+ }
+ else
+ {
+ if (qn == 2)
+ qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
+ else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+ qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+ else
+ qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+ else
+ mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+ cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+ if (qh != 0)
+ cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+ while (cy != 0)
+ {
+ qh -= mpn_sub_1 (qp, qp, qn, 1);
+ cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+ }
+ }
+ }
+ qn = nn - dn - qn + 1;
+ while (qn > dn)
+ {
+ qp -= dn;
+ np -= dn;
+ mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+ qn -= dn;
+ }
+
+ /* Since we pretended we'd need an extra quotient limb before, we now
+ have made sure the code above left just dn-1=qn quotient limbs to
+ develop. Develop that plus a guard limb. */
+ qn--;
+ qp -= qn;
+ np -= dn;
+ qsave = qp[qn];
+ mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
+ MPN_COPY_INCR (qp, qp + 1, qn);
+ qp[qn] = qsave;
+ }
+ else /* (qn < dn) */
+ {
+ mp_ptr q2p;
+#if 0 /* not possible since we demand nn > dn */
+ if (qn == 0)
+ {
+ qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+ if (qh)
+ mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+ TMP_FREE;
+ return qh;
+ }
+#endif
+
+ qp -= qn; /* point at low limb of next quotient block */
+ np -= qn; /* point in the middle of partial remainder */
+
+ q2p = TMP_SALLOC_LIMBS (qn + 1);
+ /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
+ callers not to be silly? */
+ if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
+ {
+ qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
+ dp - (qn + 1), qn + 1, dinv->inv32);
+ }
+ else
+ {
+ /* It is tempting to use qp for recursive scratch and put quotient in
+ tp, but the recursive scratch needs one limb too many. */
+ tp = TMP_SALLOC_LIMBS (qn + 1);
+ qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
+ }
+ MPN_COPY (qp, q2p + 1, qn);
+ }
+
+ TMP_FREE;
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_q.c b/vendor/gmp-6.3.0/mpn/generic/div_q.c
new file mode 100644
index 0000000..18c4ecf
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_q.c
@@ -0,0 +1,313 @@
+/* mpn_div_q -- division for arbitrary size operands.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Compute Q = N/D with truncation.
+ N = {np,nn}
+ D = {dp,dn}
+ Q = {qp,nn-dn+1}
+ T = {scratch,nn+1} is scratch space
+ N and D are both untouched by the computation.
+ N and T may overlap; pass the same space if N is irrelevant after the call,
+ but note that tp needs an extra limb.
+
+ Operand requirements:
+ N >= D > 0
+ dp[dn-1] != 0
+ No overlap between the N, D, and Q areas.
+
+ This division function does not clobber its input operands, since it is
+ intended to support average-O(qn) division, and for that to be effective, it
+ cannot put requirements on callers to copy a O(nn) operand.
+
+ If a caller does not care about the value of {np,nn+1} after calling this
+ function, it should pass np also for the scratch argument. This function
+ will then save some time and space by avoiding allocation and copying.
+ (FIXME: Is this a good design? We only really save any copying for
+ already-normalised divisors, which should be rare. It also prevents us from
+ reasonably asking for all scratch space we need.)
+
+ We write nn-dn+1 limbs for the quotient, but return void. Why not return
+ the most significant quotient limb? Look at the 4 main code blocks below
+ (consisting of an outer if-else where each arm contains an if-else). It is
+ tricky for the first code block, since the mpn_*_div_q calls will typically
+ generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless
+ we generate the most significant quotient limb here, before calling
+ mpn_*_div_q, or put the quotient in a temporary area. Since this is a
+ critical division case (the SB sub-case in particular) copying is not a good
+ idea.
+
+ It might make sense to split the if-else parts of the (qn + FUDGE
+ >= dn) blocks into separate functions, since we could promise quite
+ different things to callers in these two cases. The 'then' case
+ benefits from np=scratch, and it could perhaps even tolerate qp=np,
+ saving some headache for many callers.
+
+ FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size
+ operands, we do not reuse the huge scratch for adjustments. This can be a
+ serious waste of memory for the largest operands.
+*/
+
+/* FUDGE determines when to try getting an approximate quotient from the upper
+ parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
+ for the code to be correct. */
+#define FUDGE 5 /* FIXME: tune this */
+
+#define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD
+#define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD
+#ifndef MUPI_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD
+#endif
+
+void
+mpn_div_q (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+ mp_ptr new_dp, new_np, tp, rp;
+ mp_limb_t cy, dh, qh;
+ mp_size_t new_nn, qn;
+ gmp_pi1_t dinv;
+ int cnt;
+ TMP_DECL;
+ TMP_MARK;
+
+ ASSERT (nn >= dn);
+ ASSERT (dn > 0);
+ ASSERT (dp[dn - 1] != 0);
+ ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
+ ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
+
+ ASSERT_ALWAYS (FUDGE >= 2);
+
+ dh = dp[dn - 1];
+ if (dn == 1)
+ {
+ mpn_divrem_1 (qp, 0L, np, nn, dh);
+ return;
+ }
+
+ qn = nn - dn + 1; /* Quotient size, high limb might be zero */
+
+ if (qn + FUDGE >= dn)
+ {
+ /* |________________________|
+ |_______| */
+ new_np = scratch;
+
+ if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+ {
+ count_leading_zeros (cnt, dh);
+
+ cy = mpn_lshift (new_np, np, nn, cnt);
+ new_np[nn] = cy;
+ new_nn = nn + (cy != 0);
+
+ new_dp = TMP_ALLOC_LIMBS (dn);
+ mpn_lshift (new_dp, dp, dn, cnt);
+
+ if (dn == 2)
+ {
+ qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
+ }
+ else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+ BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
+ {
+ invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+ qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
+ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
+ {
+ invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+ qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
+ }
+ if (cy == 0)
+ qp[qn - 1] = qh;
+ else
+ ASSERT (qh == 0);
+ }
+ else /* divisor is already normalised */
+ {
+ if (new_np != np)
+ MPN_COPY (new_np, np, nn);
+
+ if (dn == 2)
+ {
+ qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
+ }
+ else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+ BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
+ {
+ invert_pi1 (dinv, dh, dp[dn - 2]);
+ qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
+ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
+ {
+ invert_pi1 (dinv, dh, dp[dn - 2]);
+ qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+ }
+ qp[nn - dn] = qh;
+ }
+ }
+ else
+ {
+ /* |________________________|
+ |_________________| */
+ tp = TMP_ALLOC_LIMBS (qn + 1);
+
+ new_np = scratch;
+ new_nn = 2 * qn + 1;
+ if (new_np == np)
+ /* We need {np,nn} to remain untouched until the final adjustment, so
+ we need to allocate separate space for new_np. */
+ new_np = TMP_ALLOC_LIMBS (new_nn + 1);
+
+
+ if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+ {
+ count_leading_zeros (cnt, dh);
+
+ cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
+ new_np[new_nn] = cy;
+
+ new_nn += (cy != 0);
+
+ new_dp = TMP_ALLOC_LIMBS (qn + 1);
+ mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
+ new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
+
+ if (qn + 1 == 2)
+ {
+ qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+ }
+ else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+ qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+ qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+ }
+ if (cy == 0)
+ tp[qn] = qh;
+ else if (UNLIKELY (qh != 0))
+ {
+ /* This happens only when the quotient is close to B^n and
+ mpn_*_divappr_q returned B^n. */
+ mp_size_t i, n;
+ n = new_nn - (qn + 1);
+ for (i = 0; i < n; i++)
+ tp[i] = GMP_NUMB_MAX;
+ qh = 0; /* currently ignored */
+ }
+ }
+ else /* divisor is already normalised */
+ {
+ MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
+
+ new_dp = (mp_ptr) dp + dn - (qn + 1);
+
+ if (qn == 2 - 1)
+ {
+ qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+ }
+ else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, dh, new_dp[qn - 1]);
+ qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, dh, new_dp[qn - 1]);
+ qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+ }
+ tp[qn] = qh;
+ }
+
+ MPN_COPY (qp, tp + 1, qn);
+ if (tp[0] <= 4)
+ {
+ mp_size_t rn;
+
+ rp = TMP_ALLOC_LIMBS (dn + qn);
+ mpn_mul (rp, dp, dn, tp + 1, qn);
+ rn = dn + qn;
+ rn -= rp[rn - 1] == 0;
+
+ if (rn > nn || mpn_cmp (np, rp, nn) < 0)
+ MPN_DECR_U (qp, qn, 1);
+ }
+ }
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_1.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_1.c
new file mode 100644
index 0000000..8f80d37
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_1.c
@@ -0,0 +1,125 @@
+/* mpn_div_qr_1 -- mpn by limb division.
+
+ Contributed to the GNU project by Niels Möller and Torbjörn Granlund
+
+Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD 3
+#endif
+#ifndef DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD 3
+#endif
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
+ * n-1}, and the high quotient limb at *qh. Returns remainder. */
+mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
+ mp_limb_t d)
+{
+ unsigned cnt;
+ mp_limb_t uh;
+
+ ASSERT (n > 0);
+ ASSERT (d > 0);
+
+ if (d & GMP_NUMB_HIGHBIT)
+ {
+ /* Normalized case */
+ mp_limb_t dinv, q;
+
+ uh = up[--n];
+
+ q = (uh >= d);
+ *qh = q;
+ uh -= (-q) & d;
+
+ if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
+ {
+ cnt = 0;
+ plain:
+ while (n > 0)
+ {
+ mp_limb_t ul = up[--n];
+ udiv_qrnnd (qp[n], uh, uh, ul, d);
+ }
+ return uh >> cnt;
+ }
+ invert_limb (dinv, d);
+ return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
+ }
+ else
+ {
+ /* Unnormalized case */
+ mp_limb_t dinv, ul;
+
+ if (! UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+ {
+ uh = up[--n];
+ udiv_qrnnd (*qh, uh, CNST_LIMB(0), uh, d);
+ cnt = 0;
+ goto plain;
+ }
+
+ count_leading_zeros (cnt, d);
+ d <<= cnt;
+
+#if HAVE_NATIVE_mpn_div_qr_1u_pi1
+ /* FIXME: Call loop doing on-the-fly normalization */
+#endif
+
+ /* Shift up front, use qp area for shifted copy. A bit messy,
+ since we have only n-1 limbs available, and shift the high
+ limb manually. */
+ uh = up[--n];
+ ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
+ uh >>= (GMP_LIMB_BITS - cnt);
+
+ if (UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+ {
+ udiv_qrnnd (*qh, uh, uh, ul, d);
+ up = qp;
+ goto plain;
+ }
+ invert_limb (dinv, d);
+
+ udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
+ return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi1.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi1.c
new file mode 100644
index 0000000..4977131
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi1.c
@@ -0,0 +1,505 @@
+/* mpn_div_qr_1n_pi1
+
+ Contributed to the GNU project by Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+#ifndef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 2
+#endif
+
+/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
+
+#if defined (__GNUC__) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %k2\n\t" \
+ "adc %4, %k1\n\t" \
+ "sbb %k0, %k0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %q2\n\t" \
+ "adc %4, %q1\n\t" \
+ "sbb %q0, %q0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addxcc %r3, %4, %1\n\t" \
+ "subx %%g0, %%g0, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addccc %r7, %8, %%g0\n\t" \
+ "addccc %r3, %4, %1\n\t" \
+ "clr %0\n\t" \
+ "movcs %%xcc, -1, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
+ "rJ" ((al) >> 32), "rI" ((bl) >> 32) \
+ __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_mssaaaa
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addxccc %r3, %4, %1\n\t" \
+ "clr %0\n\t" \
+ "movcs %%xcc, -1, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#endif
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add%I6c %2, %5, %6\n\t" \
+ "adde %1, %3, %4\n\t" \
+ "subfe %0, %0, %0\n\t" \
+ "nor %0, %0, %0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "r" (a1), "r" (b1), "%r" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "algr %2, %6\n\t" \
+ "alcgr %1, %4\n\t" \
+ "lghi %0, 0\n\t" \
+ "alcgr %0, %0\n\t" \
+ "lcgr %0, %0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "adds %2, %5, %6\n\t" \
+ "adcs %1, %3, %4\n\t" \
+ "movcc %0, #0\n\t" \
+ "movcs %0, #-1" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "adds %2, %x5, %6\n\t" \
+ "adcs %1, %x3, %x4\n\t" \
+ "csinv %0, xzr, xzr, cc\n\t" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rZ" (ah), "rZ" (bh), "%rZ" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (m) = - (__c1 + (__s1 < __c0)); \
+ } while (0)
+#endif
+
+#if DIV_QR_1N_METHOD == 1
+
+/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
+ Requires that uh < d. */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
+ mp_limb_t d, mp_limb_t dinv)
+{
+ ASSERT (n > 0);
+ ASSERT (uh < d);
+ ASSERT (d & GMP_NUMB_HIGHBIT);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
+
+ do
+ {
+ mp_limb_t q, ul;
+
+ ul = up[--n];
+ udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
+ qp[n] = q;
+ }
+ while (n > 0);
+
+ return uh;
+}
+
+#elif DIV_QR_1N_METHOD == 2
+
+/* The main idea of this algorithm is to write B^2 = d (B + dinv) +
+ B2, where 1 <= B2 < d. Similarly to mpn_mod_1_1p, each iteration
+ can then replace
+
+ u1 B^2 = u1 B2 (mod d)
+
+ which gives a very short critical path for computing the remainder
+ (with some tricks to handle the carry when the next two lower limbs
+ are added in). To also get the quotient, include the corresponding
+ multiple of d in the expression,
+
+ u1 B^2 = u1 B2 + (u1 dinv + u1 B) d
+
+ We get the quotient by accumulating the (u1 dinv + u1 B) terms. The
+ two multiplies, u1 * B2 and u1 * dinv, are independent, and can be
+ executed in parallel.
+ */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+ mp_limb_t d, mp_limb_t dinv)
+{
+ mp_limb_t B2;
+ mp_limb_t u0, u2;
+ mp_limb_t q0, q1;
+ mp_limb_t p0, p1;
+ mp_limb_t t;
+ mp_size_t j;
+
+ ASSERT (d & GMP_LIMB_HIGHBIT);
+ ASSERT (n > 0);
+ ASSERT (u1 < d);
+
+ if (n == 1)
+ {
+ udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+ return u1;
+ }
+
+ /* FIXME: Could be precomputed */
+ B2 = -d*dinv;
+
+ umul_ppmm (q1, q0, dinv, u1);
+ umul_ppmm (p1, p0, B2, u1);
+ q1 += u1;
+ ASSERT (q1 >= u1);
+ u0 = up[n-1]; /* Early read, to allow qp == up. */
+ qp[n-1] = q1;
+
+ add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
+
+ /* FIXME: Keep q1 in a variable between iterations, to reduce number
+ of memory accesses. */
+ for (j = n-2; j-- > 0; )
+ {
+ mp_limb_t q2, cy;
+
+ /* Additions for the q update:
+ * +-------+
+ * |u1 * v |
+ * +---+---+
+ * | u1|
+ * +---+---+
+ * | 1 | v | (conditional on u2)
+ * +---+---+
+ * | 1 | (conditional on u0 + u2 B2 carry)
+ * +---+
+ * + | q0|
+ * -+---+---+---+
+ * | q2| q1| q0|
+ * +---+---+---+
+ */
+ umul_ppmm (p1, t, u1, dinv);
+ ADDC_LIMB (cy, u0, u0, u2 & B2);
+ u0 -= (-cy) & d;
+ add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
+ add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
+ q0 = t;
+
+ /* Note that p1 + cy cannot overflow */
+ add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1 + cy);
+
+ umul_ppmm (p1, p0, u1, B2);
+
+ qp[j+1] = q1;
+ MPN_INCR_U (qp+j+2, n-j-2, q2);
+
+ add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
+ }
+
+ q1 = (u2 > 0);
+ u1 -= (-q1) & d;
+
+ t = (u1 >= d);
+ q1 += t;
+ u1 -= (-t) & d;
+
+ udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+ add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+ MPN_INCR_U (qp+1, n-1, q1);
+
+ qp[0] = q0;
+ return u0;
+}
+
+#elif DIV_QR_1N_METHOD == 3
+
+/* This variant handles carry from the u update earlier. This gives a
+ longer critical path, but reduces the work needed for the
+ quotients. */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+ mp_limb_t d, mp_limb_t dinv)
+{
+ mp_limb_t B2;
+ mp_limb_t cy, u0;
+ mp_limb_t q0, q1;
+ mp_limb_t p0, p1;
+ mp_limb_t t;
+ mp_size_t j;
+
+ ASSERT (d & GMP_LIMB_HIGHBIT);
+ ASSERT (n > 0);
+ ASSERT (u1 < d);
+
+ if (n == 1)
+ {
+ udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+ return u1;
+ }
+
+ /* FIXME: Could be precomputed */
+ B2 = -d*dinv;
+
+ umul_ppmm (q1, q0, dinv, u1);
+ umul_ppmm (p1, p0, B2, u1);
+ q1 += u1;
+ ASSERT (q1 >= u1);
+ u0 = up[n-1]; /* Early read, to allow qp == up. */
+
+ add_mssaaaa (cy, u1, u0, u0, up[n-2], p1, p0);
+ u1 -= cy & d;
+ q1 -= cy;
+ qp[n-1] = q1;
+
+ /* FIXME: Keep q1 in a variable between iterations, to reduce number
+ of memory accesses. */
+ for (j = n-2; j-- > 0; )
+ {
+ mp_limb_t q2, cy;
+ mp_limb_t t1, t0;
+
+ /* Additions for the q update:
+ * +-------+
+ * |u1 * v |
+ * +---+---+
+ * | u1|
+ * +---+
+ * | 1 | (conditional on {u1, u0} carry)
+ * +---+
+ * + | q0|
+ * -+---+---+---+
+ * | q2| q1| q0|
+ * +---+---+---+
+ *
+ * Additions for the u update:
+ * +-------+
+ * |u1 * B2|
+ * +---+---+
+ * + |u0 |u-1|
+ * +---+---+
+ * - | d | (conditional on carry)
+ * ---+---+---+
+ * |u1 | u0|
+ * +---+---+
+ *
+ */
+ umul_ppmm (p1, p0, u1, B2);
+ ADDC_LIMB (q2, q1, u1, q0);
+ umul_ppmm (t1, t0, u1, dinv);
+ add_mssaaaa (cy, u1, u0, u0, up[j], p1, p0);
+ u1 -= cy & d;
+
+ /* t1 <= B-2, so cy can be added in without overflow. */
+ add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), t1 - cy);
+ q0 = t0;
+
+ /* Final q update */
+ qp[j+1] = q1;
+ MPN_INCR_U (qp+j+2, n-j-2, q2);
+ }
+
+ q1 = (u1 >= d);
+ u1 -= (-q1) & d;
+
+ udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+ add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+ MPN_INCR_U (qp+1, n-1, q1);
+
+ qp[0] = q0;
+ return u0;
+}
+
+#elif DIV_QR_1N_METHOD == 4
+
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+ mp_limb_t d, mp_limb_t dinv)
+{
+ mp_limb_t B2;
+ mp_limb_t u2, u0;
+ mp_limb_t q0, q1;
+ mp_limb_t p0, p1;
+ mp_limb_t B2d0, B2d1;
+ mp_limb_t t;
+ mp_size_t j;
+
+ ASSERT (d & GMP_LIMB_HIGHBIT);
+ ASSERT (n > 0);
+ ASSERT (u1 < d);
+
+ if (n == 1)
+ {
+ udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+ return u1;
+ }
+
+ /* FIXME: Could be precomputed */
+ B2 = -d*dinv;
+ /* B2 * (B-d) */
+ umul_ppmm (B2d1, B2d0, B2, -d);
+
+ umul_ppmm (q1, q0, dinv, u1);
+ umul_ppmm (p1, p0, B2, u1);
+ q1 += u1;
+ ASSERT (q1 >= u1);
+
+ add_mssaaaa (u2, u1, u0, up[n-1], up[n-2], p1, p0);
+
+ /* After read of up[n-1], to allow qp == up. */
+ qp[n-1] = q1 - u2;
+
+ /* FIXME: Keep q1 in a variable between iterations, to reduce number
+ of memory accesses. */
+ for (j = n-2; j-- > 0; )
+ {
+ mp_limb_t q2, cy;
+ mp_limb_t t1, t0;
+
+ /* Additions for the q update. *After* u1 -= u2 & d adjustment.
+ * +-------+
+ * |u1 * v |
+ * +---+---+
+ * | u1|
+ * +---+
+ * | 1 | (conditional on {u1, u0} carry)
+ * +---+
+ * + | q0|
+ * -+---+---+---+
+ * | q2| q1| q0|
+ * +---+---+---+
+ *
+ * Additions for the u update. *Before* u1 -= u2 & d adjstment.
+ * +-------+
+ * |u1 * B2|
+ * +---+---+
+ * |u0 |u-1|
+ * +---+---+
+ + + |B2(B-d)| (conditional on u2)
+ * -+---+---+---+
+ * |u2 |u1 | u0|
+ * +---+---+---+
+ *
+ */
+ /* Multiply with unadjusted u1, to shorten critical path. */
+ umul_ppmm (p1, p0, u1, B2);
+ u1 -= (d & u2);
+ ADDC_LIMB (q2, q1, u1, q0);
+ umul_ppmm (t1, t0, u1, dinv);
+
+ add_mssaaaa (cy, u1, u0, u0, up[j], u2 & B2d1, u2 & B2d0);
+ add_mssaaaa (u2, u1, u0, u1, u0, p1, p0);
+ u2 += cy;
+ ASSERT(-u2 <= 1);
+
+ /* t1 <= B-2, so u2 can be added in without overflow. */
+ add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), t1 - u2);
+ q0 = t0;
+
+ /* Final q update */
+ qp[j+1] = q1;
+ MPN_INCR_U (qp+j+2, n-j-2, q2);
+ }
+ u1 -= u2 & d;
+
+ q1 = (u1 >= d);
+ u1 -= (-q1) & d;
+
+ udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+ add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+ MPN_INCR_U (qp+1, n-1, q1);
+
+ qp[0] = q0;
+ return u0;
+}
+#else
+#error Unknown DIV_QR_1N_METHOD
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi2.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi2.c
new file mode 100644
index 0000000..daae68f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_1n_pi2.c
@@ -0,0 +1,203 @@
+/* mpn_div_qr_1n_pi2.
+
+ THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
+ ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* ISSUES:
+
+ * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+ * Are there any problems with generating n quotient limbs in the q area? It
+ surely simplifies things.
+
+ * Not yet adequately tested.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+ an additional sum operand.
+*/
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %x3, xzr"\
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) += __c1 + (__s1 < __c0); \
+ } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+ mp_limb_t dip[2];
+ mp_limb_t d;
+ int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1n_pi2 (mp_ptr qp,
+ mp_srcptr up, mp_size_t un,
+ struct precomp_div_1_pi2 *pd)
+{
+ mp_limb_t most_significant_q_limb;
+ mp_size_t i;
+ mp_limb_t r, u2, u1, u0;
+ mp_limb_t d0, di1, di0;
+ mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+ mp_limb_t cnd;
+
+ ASSERT (un >= 2);
+ ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
+ ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+ ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+ up += un - 3;
+ r = up[2];
+ d0 = pd->d;
+
+ most_significant_q_limb = (r >= d0);
+ r -= d0 & -most_significant_q_limb;
+
+ qp += un - 3;
+ qp[2] = most_significant_q_limb;
+
+ di1 = pd->dip[1];
+ di0 = pd->dip[0];
+
+ for (i = un - 3; i >= 0; i -= 2)
+ {
+ u2 = r;
+ u1 = up[1];
+ u0 = up[0];
+
+ /* Dividend in {r,u1,u0} */
+
+ umul_ppmm (q1d,q0d, u1, di0);
+ umul_ppmm (q2b,q1b, u1, di1);
+ q2b++; /* cannot spill */
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+ umul_ppmm (q2c,q1c, u2, di0);
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+ umul_ppmm (q3a,q2a, u2, di1);
+
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+ q3 += r;
+
+ r = u0 - q2 * d0;
+
+ cnd = (r >= q1);
+ r += d0 & -cnd;
+ sub_ddmmss (q3,q2, q3,q2, 0,cnd);
+
+ if (UNLIKELY (r >= d0))
+ {
+ r -= d0;
+ add_ssaaaa (q3,q2, q3,q2, 0,1);
+ }
+
+ qp[0] = q2;
+ qp[1] = q3;
+
+ up -= 2;
+ qp -= 2;
+ }
+
+ if ((un & 1) == 0)
+ {
+ u2 = r;
+ u1 = up[1];
+
+ udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+ qp[1] = q3;
+ }
+
+ return r;
+
+#undef q3
+#undef q2
+#undef q1
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_1u_pi2.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_1u_pi2.c
new file mode 100644
index 0000000..ea38e3c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_1u_pi2.c
@@ -0,0 +1,236 @@
+/* mpn_div_qr_1u_pi2.
+
+ THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
+ ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* ISSUES:
+
+ * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+ * Are there any problems with generating n quotient limbs in the q area? It
+ surely simplifies things.
+
+ * Not yet adequately tested.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+ an additional sum operand.
+*/
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %x3, xzr"\
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) += __c1 + (__s1 < __c0); \
+ } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+ mp_limb_t dip[2];
+ mp_limb_t d;
+ int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1u_pi2 (mp_ptr qp,
+ mp_srcptr up, mp_size_t un,
+ struct precomp_div_1_pi2 *pd)
+{
+ mp_size_t i;
+ mp_limb_t r, u2, u1, u0;
+ mp_limb_t d0, di1, di0;
+ mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+ mp_limb_t cnd;
+ int cnt;
+
+ ASSERT (un >= 2);
+ ASSERT ((pd->d & GMP_NUMB_HIGHBIT) == 0);
+ ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+ ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+ up += un - 3;
+ cnt = pd->norm_cnt;
+ r = up[2] >> (GMP_NUMB_BITS - cnt);
+ d0 = pd->d << cnt;
+
+ qp += un - 2;
+
+ di1 = pd->dip[1];
+ di0 = pd->dip[0];
+
+ for (i = un - 3; i >= 0; i -= 2)
+ {
+ u2 = r;
+ u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
+ u0 = (up[1] << cnt) | (up[0] >> (GMP_NUMB_BITS - cnt));
+
+ /* Dividend in {r,u1,u0} */
+
+ umul_ppmm (q1d,q0d, u1, di0);
+ umul_ppmm (q2b,q1b, u1, di1);
+ q2b++; /* cannot spill */
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+ umul_ppmm (q2c,q1c, u2, di0);
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+ umul_ppmm (q3a,q2a, u2, di1);
+
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+ q3 += r;
+
+ r = u0 - q2 * d0;
+
+ cnd = (r >= q1);
+ r += d0 & -cnd;
+ sub_ddmmss (q3,q2, q3,q2, 0,cnd);
+
+ if (UNLIKELY (r >= d0))
+ {
+ r -= d0;
+ add_ssaaaa (q3,q2, q3,q2, 0,1);
+ }
+
+ qp[0] = q2;
+ qp[1] = q3;
+
+ up -= 2;
+ qp -= 2;
+ }
+
+ if ((un & 1) != 0)
+ {
+ u2 = r;
+ u1 = (up[2] << cnt);
+
+ udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+ qp[1] = q3;
+ }
+ else
+ {
+ u2 = r;
+ u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
+ u0 = (up[1] << cnt);
+
+ /* Dividend in {r,u1,u0} */
+
+ umul_ppmm (q1d,q0d, u1, di0);
+ umul_ppmm (q2b,q1b, u1, di1);
+ q2b++; /* cannot spill */
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+ umul_ppmm (q2c,q1c, u2, di0);
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+ umul_ppmm (q3a,q2a, u2, di1);
+
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+ q3 += r;
+
+ r = u0 - q2 * d0;
+
+ cnd = (r >= q1);
+ r += d0 & -cnd;
+ sub_ddmmss (q3,q2, q3,q2, 0,cnd);
+
+ if (UNLIKELY (r >= d0))
+ {
+ r -= d0;
+ add_ssaaaa (q3,q2, q3,q2, 0,1);
+ }
+
+ qp[0] = q2;
+ qp[1] = q3;
+ }
+
+ return r >> cnt;
+
+#undef q3
+#undef q2
+#undef q1
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_2.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_2.c
new file mode 100644
index 0000000..c3c8f57
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_2.c
@@ -0,0 +1,314 @@
+/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
+ quotient. The divisor is two limbs.
+
+ Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002, 2011, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_2_PI2_THRESHOLD
+/* Disabled unless explicitly tuned. */
+#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
+#endif
+
+#ifndef SANITY_CHECK
+#define SANITY_CHECK 0
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+ an additional sum operand.
+ * add_csaac accepts two addends and a carry in, and generates a sum and a
+ carry out. A little like a "full adder".
+*/
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %x3, xzr"\
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) += __c1 + (__s1 < __c0); \
+ } while (0)
+#endif
+
+/* Typically used with r1, r0 same as n3, n2. Other types of overlap
+ between inputs and outputs are not supported. */
+#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0) \
+ do { \
+ mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0; \
+ mp_limb_t _t1, _t0; \
+ mp_limb_t _mask; \
+ \
+ /* [q3,q2,q1,q0] = [n3,n2]*[di1,di0] + [n3,n2,n1,n0] + [0,1,0,0] */ \
+ umul_ppmm (_q2,_q1, n2, di1); \
+ umul_ppmm (_q3,_q2a, n3, di1); \
+ ++_q2; /* _q2 cannot overflow */ \
+ add_ssaaaa (_q3,_q2, _q3,_q2, n3,_q2a); \
+ umul_ppmm (_q2c,_q1c, n3, di0); \
+ add_sssaaaa (_q3,_q2,_q1, _q2,_q1, n2,_q1c); \
+ umul_ppmm (_q1d,_q0, n2, di0); \
+ add_sssaaaa (_q2c,_q1,_q0, _q1,_q0, n1,n0); /* _q2c cannot overflow */ \
+ add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1d); \
+ \
+ umul_ppmm (_t1,_t0, _q2, d0); \
+ _t1 += _q2 * d1 + _q3 * d0; \
+ \
+ sub_ddmmss (r1, r0, n1, n0, _t1, _t0); \
+ \
+ _mask = -(mp_limb_t) ((r1 >= _q1) & ((r1 > _q1) | (r0 >= _q0))); /* (r1,r0) >= (q1,q0) */ \
+ add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask); \
+ sub_ddmmss (_q3, _q2, _q3, _q2, CNST_LIMB(0), -_mask); \
+ \
+ if (UNLIKELY (r1 >= d1)) \
+ { \
+ if (r1 > d1 || r0 >= d0) \
+ { \
+ sub_ddmmss (r1, r0, r1, r0, d1, d0); \
+ add_ssaaaa (_q3, _q2, _q3, _q2, CNST_LIMB(0), CNST_LIMB(1));\
+ } \
+ } \
+ (q1) = _q3; \
+ (q0) = _q2; \
+ } while (0)
+
+static void
+invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
+{
+ mp_limb_t v1, v0, p1, t1, t0, p0, mask;
+ invert_limb (v1, d1);
+ p1 = d1 * v1;
+ /* <1, v1> * d1 = <B-1, p1> */
+ p1 += d0;
+ if (p1 < d0)
+ {
+ v1--;
+ mask = -(mp_limb_t) (p1 >= d1);
+ p1 -= d1;
+ v1 += mask;
+ p1 -= mask & d1;
+ }
+ /* <1, v1> * d1 + d0 = <B-1, p1> */
+ umul_ppmm (t1, p0, d0, v1);
+ p1 += t1;
+ if (p1 < t1)
+ {
+ if (UNLIKELY (p1 >= d1))
+ {
+ if (p1 > d1 || p0 >= d0)
+ {
+ sub_ddmmss (p1, p0, p1, p0, d1, d0);
+ v1--;
+ }
+ }
+ sub_ddmmss (p1, p0, p1, p0, d1, d0);
+ v1--;
+ }
+ /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
+ * with <p1, p0> + <d1, d0> >= B^2.
+ *
+ * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
+ * partial remainder after <1, v1> is
+ *
+ * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
+ * = <~p1, ~p0, B-1>
+ */
+ udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
+ di[0] = v0;
+ di[1] = v1;
+
+#if SANITY_CHECK
+ {
+ mp_limb_t tp[4];
+ mp_limb_t dp[2];
+ dp[0] = d0;
+ dp[1] = d1;
+ mpn_mul_n (tp, dp, di, 2);
+ ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
+ ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
+ ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
+ ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
+ }
+#endif
+}
+
+static mp_limb_t
+mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
+{
+ mp_limb_t qh;
+ mp_size_t i;
+ mp_limb_t r1, r0;
+
+ ASSERT (nn >= 2);
+ ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+ r1 = np[nn-1];
+ r0 = np[nn-2];
+
+ qh = 0;
+ if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+ {
+#if GMP_NAIL_BITS == 0
+ sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+ r0 = r0 - d0;
+ r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+ r0 &= GMP_NUMB_MASK;
+#endif
+ qh = 1;
+ }
+
+ for (i = nn - 2; i >= 2; i -= 2)
+ {
+ mp_limb_t n1, n0, q1, q0;
+ n1 = np[i-1];
+ n0 = np[i-2];
+ udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
+ qp[i-1] = q1;
+ qp[i-2] = q0;
+ }
+
+ if (i > 0)
+ {
+ mp_limb_t q;
+ udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
+ qp[0] = q;
+ }
+ rp[1] = r1;
+ rp[0] = r0;
+
+ return qh;
+}
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
+ significant quotient limbs at qp and the 2 long remainder at np.
+ Return the most significant limb of the quotient.
+
+ Preconditions:
+ 1. qp must either not overlap with the other operands at all, or
+ qp >= np + 2 must hold true. (This means that it's possible to put
+ the quotient in the high part of {np,nn}, right above the remainder.)
+ 2. nn >= 2. */
+
+mp_limb_t
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp)
+{
+ mp_limb_t d1;
+ mp_limb_t d0;
+ gmp_pi1_t dinv;
+
+ ASSERT (nn >= 2);
+ ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
+ ASSERT_MPN (np, nn);
+ ASSERT_MPN (dp, 2);
+
+ d1 = dp[1]; d0 = dp[0];
+
+ ASSERT (d1 > 0);
+
+ if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
+ {
+ if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
+ {
+ gmp_pi1_t dinv;
+ invert_pi1 (dinv, d1, d0);
+ return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
+ }
+ else
+ {
+ mp_limb_t di[2];
+ invert_4by2 (di, d1, d0);
+ return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
+ }
+ }
+ else
+ {
+ int shift;
+ count_leading_zeros (shift, d1);
+ d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+ d0 <<= shift;
+ invert_pi1 (dinv, d1, d0);
+ return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_2n_pi1.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_2n_pi1.c
new file mode 100644
index 0000000..131a811
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_2n_pi1.c
@@ -0,0 +1,84 @@
+/* mpn_div_qr_2n_pi1
+
+ Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for normalized divisor */
+mp_limb_t
+mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
+{
+ mp_limb_t qh;
+ mp_size_t i;
+ mp_limb_t r1, r0;
+
+ ASSERT (nn >= 2);
+ ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+ np += nn - 2;
+ r1 = np[1];
+ r0 = np[0];
+
+ qh = 0;
+ if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+ {
+#if GMP_NAIL_BITS == 0
+ sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+ r0 = r0 - d0;
+ r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+ r0 &= GMP_NUMB_MASK;
+#endif
+ qh = 1;
+ }
+
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t n0, q;
+ n0 = np[-1];
+ udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+ np--;
+ qp[i] = q;
+ }
+
+ rp[1] = r1;
+ rp[0] = r0;
+
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/div_qr_2u_pi1.c b/vendor/gmp-6.3.0/mpn/generic/div_qr_2u_pi1.c
new file mode 100644
index 0000000..70e617b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/div_qr_2u_pi1.c
@@ -0,0 +1,76 @@
+/* mpn_div_qr_2u_pi1
+
+ Contributed to the GNU project by Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
+ d0, while {np,nn} is shifted on the fly. */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+ mp_limb_t qh;
+ mp_limb_t r2, r1, r0;
+ mp_size_t i;
+
+ ASSERT (nn >= 2);
+ ASSERT (d1 & GMP_NUMB_HIGHBIT);
+ ASSERT (shift > 0);
+
+ r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+ r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+ r0 = np[nn-2] << shift;
+
+ udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ r0 = np[i];
+ r1 |= r0 >> (GMP_LIMB_BITS - shift);
+ r0 <<= shift;
+ udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+ qp[i] = q;
+ }
+
+ rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+ rp[1] = r2 >> shift;
+
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dive_1.c b/vendor/gmp-6.3.0/mpn/generic/dive_1.c
new file mode 100644
index 0000000..056f5b9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dive_1.c
@@ -0,0 +1,146 @@
+/* mpn_divexact_1 -- mpn by limb exact division.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
+ q will only be correct if d divides a exactly.
+
+ A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
+ give zero on all CPUs (for instance it doesn't on the x86s). This
+ separate loop might run faster too, helping odd divisors.
+
+ Possibilities:
+
+ mpn_divexact_1c could be created, accepting and returning c. This would
+ let a long calculation be done piece by piece. Currently there's no
+ particular need for that, and not returning c means that a final umul can
+ be skipped.
+
+ Another use for returning c would be letting the caller know whether the
+ division was in fact exact. It would work just to return the carry bit
+ "c=(l>s)" and let the caller do a final umul if interested.
+
+ When the divisor is even, the factors of two could be handled with a
+ separate mpn_rshift, instead of shifting on the fly. That might be
+ faster on some CPUs and would mean just the shift==0 style loop would be
+ needed.
+
+ If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
+ shift==0 loop is unnecessary, and could be eliminated if there's no great
+ speed difference.
+
+ It's not clear whether "/" is the best way to handle size==1. Alpha gcc
+ 2.95 for instance has a poor "/" and might prefer the modular method.
+ Perhaps a tuned parameter should control this.
+
+ If src[size-1] < divisor then dst[size-1] will be zero, and one divide
+ step could be skipped. A test at last step for s<divisor (or ls in the
+ even case) might be a good way to do that. But if this code is often
+ used with small divisors then it might not be worth bothering */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+ mp_size_t i;
+ mp_limb_t c, h, l, ls, s, s_next, inverse, dummy;
+ unsigned shift;
+
+ ASSERT (size >= 1);
+ ASSERT (divisor != 0);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+ ASSERT_MPN (src, size);
+ ASSERT_LIMB (divisor);
+
+ if ((divisor & 1) == 0)
+ {
+ count_trailing_zeros (shift, divisor);
+ divisor >>= shift;
+ }
+ else
+ shift = 0;
+
+ binvert_limb (inverse, divisor);
+ divisor <<= GMP_NAIL_BITS;
+
+ if (shift != 0)
+ {
+ c = 0;
+
+ s = src[0];
+
+ for (i = 1; i < size; i++)
+ {
+ s_next = src[i];
+ ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+ s = s_next;
+
+ SUBC_LIMB (c, l, ls, c);
+
+ l = (l * inverse) & GMP_NUMB_MASK;
+ dst[i - 1] = l;
+
+ umul_ppmm (h, dummy, l, divisor);
+ c += h;
+ }
+
+ ls = s >> shift;
+ l = ls - c;
+ l = (l * inverse) & GMP_NUMB_MASK;
+ dst[size - 1] = l;
+ }
+ else
+ {
+ s = src[0];
+
+ l = (s * inverse) & GMP_NUMB_MASK;
+ dst[0] = l;
+ c = 0;
+
+ for (i = 1; i < size; i++)
+ {
+ umul_ppmm (h, dummy, l, divisor);
+ c += h;
+
+ s = src[i];
+ SUBC_LIMB (c, l, s, c);
+
+ l = (l * inverse) & GMP_NUMB_MASK;
+ dst[i] = l;
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/diveby3.c b/vendor/gmp-6.3.0/mpn/generic/diveby3.c
new file mode 100644
index 0000000..7dee0bc
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/diveby3.c
@@ -0,0 +1,173 @@
+/* mpn_divexact_by3c -- mpn exact division by 3.
+
+Copyright 2000-2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if DIVEXACT_BY3_METHOD == 0
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_limb_t c)
+{
+ mp_limb_t r;
+ r = mpn_bdiv_dbm1c (rp, up, un, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * c);
+
+ /* Possible bdiv_dbm1 return values are C * (GMP_NUMB_MASK / 3), 0 <= C < 3.
+ We want to return C. We compute the remainder mod 4 and notice that the
+ inverse of (2^(2k)-1)/3 mod 4 is 1. */
+ return r & 3;
+}
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 1
+
+/* The algorithm here is basically the same as mpn_divexact_1, as described
+ in the manual. Namely at each step q = (src[i]-c)*inverse, and new c =
+ borrow(src[i]-c) + high(divisor*q). But because the divisor is just 3,
+ high(divisor*q) can be determined with two comparisons instead of a
+ multiply.
+
+ The "c += ..."s add the high limb of 3*l to c. That high limb will be 0,
+ 1 or 2. Doing two separate "+="s seems to give better code on gcc (as of
+ 2.95.2 at least).
+
+ It will be noted that the new c is formed by adding three values each 0
+ or 1. But the total is only 0, 1 or 2. When the subtraction src[i]-c
+ causes a borrow, that leaves a limb value of either 0xFF...FF or
+ 0xFF...FE. The multiply by MODLIMB_INVERSE_3 gives 0x55...55 or
+ 0xAA...AA respectively, and in those cases high(3*q) is only 0 or 1
+ respectively, hence a total of no more than 2.
+
+ Alternatives:
+
+ This implementation has each multiply on the dependent chain, due to
+ "l=s-c". See below for alternative code which avoids that. */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t c)
+{
+ mp_limb_t l, q, s;
+ mp_size_t i;
+
+ ASSERT (un >= 1);
+ ASSERT (c == 0 || c == 1 || c == 2);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+ i = 0;
+ do
+ {
+ s = up[i];
+ SUBC_LIMB (c, l, s, c);
+
+ q = (l * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+ rp[i] = q;
+
+ c += (q >= GMP_NUMB_CEIL_MAX_DIV3);
+ c += (q >= GMP_NUMB_CEIL_2MAX_DIV3);
+ }
+ while (++i < un);
+
+ ASSERT (c == 0 || c == 1 || c == 2);
+ return c;
+}
+
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 2
+
+/* The following alternative code re-arranges the quotient calculation from
+ (src[i]-c)*inverse to instead
+
+ q = src[i]*inverse - c*inverse
+
+ thereby allowing src[i]*inverse to be scheduled back as far as desired,
+ making full use of multiplier throughput and leaving just some carry
+ handing on the dependent chain.
+
+ The carry handling consists of determining the c for the next iteration.
+ This is the same as described above, namely look for any borrow from
+ src[i]-c, and at the high of 3*q.
+
+ high(3*q) is done with two comparisons as above (in c2 and c3). The
+ borrow from src[i]-c is incorporated into those by noting that if there's
+ a carry then then we have src[i]-c == 0xFF..FF or 0xFF..FE, in turn
+ giving q = 0x55..55 or 0xAA..AA. Adding 1 to either of those q values is
+ enough to make high(3*q) come out 1 bigger, as required.
+
+ l = -c*inverse is calculated at the same time as c, since for most chips
+ it can be more conveniently derived from separate c1/c2/c3 values than
+ from a combined c equal to 0, 1 or 2.
+
+ The net effect is that with good pipelining this loop should be able to
+ run at perhaps 4 cycles/limb, depending on available execute resources
+ etc.
+
+ Usage:
+
+ This code is not used by default, since we really can't rely on the
+ compiler generating a good software pipeline, nor on such an approach
+ even being worthwhile on all CPUs.
+
+ Itanium is one chip where this algorithm helps though, see
+ mpn/ia64/diveby3.asm. */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t cy)
+{
+ mp_limb_t s, sm, cl, q, qx, c2, c3;
+ mp_size_t i;
+
+ ASSERT (un >= 1);
+ ASSERT (cy == 0 || cy == 1 || cy == 2);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+ cl = cy == 0 ? 0 : cy == 1 ? -MODLIMB_INVERSE_3 : -2*MODLIMB_INVERSE_3;
+
+ for (i = 0; i < un; i++)
+ {
+ s = up[i];
+ sm = (s * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+
+ q = (cl + sm) & GMP_NUMB_MASK;
+ rp[i] = q;
+ qx = q + (s < cy);
+
+ c2 = qx >= GMP_NUMB_CEIL_MAX_DIV3;
+ c3 = qx >= GMP_NUMB_CEIL_2MAX_DIV3 ;
+
+ cy = c2 + c3;
+ cl = (-c2 & -MODLIMB_INVERSE_3) + (-c3 & -MODLIMB_INVERSE_3);
+ }
+
+ return cy;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/divexact.c b/vendor/gmp-6.3.0/mpn/generic/divexact.c
new file mode 100644
index 0000000..ec417df
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/divexact.c
@@ -0,0 +1,296 @@
+/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
+ the result in Q = {qp,nn-dn+1} expecting no remainder. Overlap allowed
+ between Q and N; all other overlap disallowed.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if 1
+void
+mpn_divexact (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+{
+ unsigned shift;
+ mp_size_t qn;
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (dn > 0);
+ ASSERT (nn >= dn);
+ ASSERT (dp[dn-1] > 0);
+
+ while (dp[0] == 0)
+ {
+ ASSERT (np[0] == 0);
+ dp++;
+ np++;
+ dn--;
+ nn--;
+ }
+
+ if (dn == 1)
+ {
+ MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
+ return;
+ }
+
+ TMP_MARK;
+
+ qn = nn + 1 - dn;
+ count_trailing_zeros (shift, dp[0]);
+
+ if (shift > 0)
+ {
+ mp_ptr wp;
+ mp_size_t ss;
+ ss = (dn > qn) ? qn + 1 : dn;
+
+ tp = TMP_ALLOC_LIMBS (ss);
+ mpn_rshift (tp, dp, ss, shift);
+ dp = tp;
+
+ /* Since we have excluded dn == 1, we have nn > qn, and we need
+ to shift one limb beyond qn. */
+ wp = TMP_ALLOC_LIMBS (qn + 1);
+ mpn_rshift (wp, np, qn + 1, shift);
+ np = wp;
+ }
+
+ if (dn > qn)
+ dn = qn;
+
+ tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
+ mpn_bdiv_q (qp, np, qn, dp, dn, tp);
+ TMP_FREE;
+
+ /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */
+ mpn_neg (qp, qp, qn);
+}
+
+#else
+
+/* We use the Jebelean's bidirectional exact division algorithm. This is
+ somewhat naively implemented, with equal quotient parts done by 2-adic
+ division and truncating division. Since 2-adic division is faster, it
+ should be used for a larger chunk.
+
+ This code is horrendously ugly, in all sorts of ways.
+
+ * It was hacked without much care or thought, but with a testing program.
+ * It handles scratch space frivolously, and furthermore the itch function
+ is broken.
+ * Doesn't provide any measures to deal with mu_divappr_q's +3 error. We
+ have yet to provoke an error due to this, though.
+ * Algorithm selection leaves a lot to be desired. In particular, the choice
+ between DC and MU isn't a point, but we treat it like one.
+ * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
+ that the latter is faster. We should at least reverse this, but perhaps
+ we should make the lsb part considerably larger. (How do we tune this?)
+*/
+
+mp_size_t
+mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
+{
+ return nn + dn; /* FIXME this is not right */
+}
+
+void
+mpn_divexact (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_size_t qn;
+ mp_size_t nn0, qn0;
+ mp_size_t nn1, qn1;
+ mp_ptr tp;
+ mp_limb_t qml;
+ mp_limb_t qh;
+ int cnt;
+ mp_ptr xdp;
+ mp_limb_t di;
+ mp_limb_t cy;
+ gmp_pi1_t dinv;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ qn = nn - dn + 1;
+
+ /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
+ if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
+ {
+ tp = scratch;
+ MPN_COPY (tp, np, qn);
+ binvert_limb (di, dp[0]); di = -di;
+ dn = MIN (dn, qn);
+ mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+ TMP_FREE;
+ return;
+ }
+
+ qn0 = ((nn - dn) >> 1) + 1; /* low quotient size */
+
+ /* If quotient is much larger than the divisor, the bidirectional algorithm
+ does not work as currently implemented. Fall back to plain bdiv. */
+ if (qn0 > dn)
+ {
+ if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+ {
+ tp = scratch;
+ MPN_COPY (tp, np, qn);
+ binvert_limb (di, dp[0]); di = -di;
+ dn = MIN (dn, qn);
+ mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+ }
+ else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+ {
+ tp = scratch;
+ MPN_COPY (tp, np, qn);
+ binvert_limb (di, dp[0]); di = -di;
+ mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+ }
+ else
+ {
+ mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
+ }
+ TMP_FREE;
+ return;
+ }
+
+ nn0 = qn0 + qn0;
+
+ nn1 = nn0 - 1 + ((nn-dn) & 1);
+ qn1 = qn0;
+ if (LIKELY (qn0 != dn))
+ {
+ nn1 = nn1 + 1;
+ qn1 = qn1 + 1;
+ if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
+ {
+ /* If the leading divisor limb == 1, i.e. has just one bit, we have
+ to include an extra limb in order to get the needed overlap. */
+ /* FIXME: Now with the mu_divappr_q function, we should really need
+ more overlap. That indicates one of two things: (1) The test code
+ is not good. (2) We actually overlap too much by default. */
+ nn1 = nn1 + 1;
+ qn1 = qn1 + 1;
+ }
+ }
+
+ tp = TMP_ALLOC_LIMBS (nn1 + 1);
+
+ count_leading_zeros (cnt, dp[dn - 1]);
+
+ /* Normalize divisor, store into tmp area. */
+ if (cnt != 0)
+ {
+ xdp = TMP_ALLOC_LIMBS (qn1);
+ mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
+ }
+ else
+ {
+ xdp = (mp_ptr) dp + dn - qn1;
+ }
+
+ /* Shift dividend according to the divisor normalization. */
+ /* FIXME: We compute too much here for XX_divappr_q, but these functions'
+ interfaces want a pointer to the imaginative least significant limb, not
+ to the least significant *used* limb. Of course, we could leave nn1-qn1
+ rubbish limbs in the low part, to save some time. */
+ if (cnt != 0)
+ {
+ cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
+ if (cy != 0)
+ {
+ tp[nn1] = cy;
+ nn1++;
+ }
+ }
+ else
+ {
+ /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
+ mpn_sub_n right before is executed. */
+ MPN_COPY (tp, np + nn - nn1, nn1);
+ }
+
+ invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
+ if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
+ {
+ qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
+ {
+ qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
+ }
+ else
+ {
+ /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0. Work around it with a
+ conditional subtraction here. */
+ qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
+ if (qh)
+ mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
+ mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
+ qp[qn0 - 1 + nn1 - qn1] = qh;
+ }
+ qml = qp[qn0 - 1];
+
+ binvert_limb (di, dp[0]); di = -di;
+
+ if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
+ {
+ MPN_COPY (tp, np, qn0);
+ mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+ }
+ else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
+ {
+ MPN_COPY (tp, np, qn0);
+ mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+ }
+ else
+ {
+ mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
+ }
+
+ if (qml < qp[qn0 - 1])
+ mpn_decr_u (qp + qn0, 1);
+
+ TMP_FREE;
+}
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/divis.c b/vendor/gmp-6.3.0/mpn/generic/divis.c
new file mode 100644
index 0000000..f989ddb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/divis.c
@@ -0,0 +1,194 @@
+/* mpn_divisible_p -- mpn by mpn divisibility test
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2014, 2017, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Determine whether A={ap,an} is divisible by D={dp,dn}. Must have both
+ operands normalized, meaning high limbs non-zero, except that an==0 is
+ allowed.
+
+ There usually won't be many low zero bits on D, but the checks for this
+ are fast and might pick up a few operand combinations, in particular they
+ might reduce D to fit the single-limb mod_1/modexact_1 code.
+
+ Future:
+
+ Getting the remainder limb by limb would make an early exit possible on
+ finding a non-zero. This would probably have to be bdivmod style so
+ there's no addback, but it would need a multi-precision inverse and so
+ might be slower than the plain method (on small sizes at least).
+
+ When D must be normalized (shifted to low bit set), it's possible to
+ suppress the bit-shifting of A down, as long as it's already been checked
+ that A has at least as many trailing zero bits as D. */
+
+int
+mpn_divisible_p (mp_srcptr ap, mp_size_t an,
+ mp_srcptr dp, mp_size_t dn)
+{
+ mp_limb_t alow, dlow, dmask;
+ mp_ptr qp, rp, tp;
+ mp_limb_t di;
+ unsigned twos;
+ int c;
+ TMP_DECL;
+
+ ASSERT (an >= 0);
+ ASSERT (an == 0 || ap[an-1] != 0);
+ ASSERT (dn >= 1);
+ ASSERT (dp[dn-1] != 0);
+ ASSERT_MPN (ap, an);
+ ASSERT_MPN (dp, dn);
+
+ /* When a<d only a==0 is divisible.
+ Notice this test covers all cases of an==0. */
+ if (an < dn)
+ return (an == 0);
+
+ /* Strip low zero limbs from d, requiring a==0 on those. */
+ for (;;)
+ {
+ alow = *ap;
+ dlow = *dp;
+
+ if (dlow != 0)
+ break;
+
+ if (alow != 0)
+ return 0; /* a has fewer low zero limbs than d, so not divisible */
+
+ /* a!=0 and d!=0 so won't get to n==0 */
+ an--; ASSERT (an >= 1);
+ dn--; ASSERT (dn >= 1);
+ ap++;
+ dp++;
+ }
+
+ /* a must have at least as many low zero bits as d */
+ dmask = LOW_ZEROS_MASK (dlow);
+ if ((alow & dmask) != 0)
+ return 0;
+
+ if (dn == 1)
+ {
+ if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
+ return mpn_mod_1 (ap, an, dlow) == 0;
+
+ count_trailing_zeros (twos, dlow);
+ dlow >>= twos;
+ return mpn_modexact_1_odd (ap, an, dlow) == 0;
+ }
+
+ count_trailing_zeros (twos, dlow);
+ if (dn == 2)
+ {
+ mp_limb_t dsecond = dp[1];
+ if (dsecond <= dmask)
+ {
+ dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+ ASSERT_LIMB (dlow);
+ return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
+ }
+ }
+
+ /* Should we compute Q = A * D^(-1) mod B^k,
+ R = A - Q * D mod B^k
+ here, for some small values of k? Then check if R = 0 (mod B^k). */
+
+ /* We could also compute A' = A mod T and D' = D mod P, for some
+ P = 3 * 5 * 7 * 11 ..., and then check if any prime factor from P
+ dividing D' also divides A'. */
+
+ TMP_MARK;
+
+ TMP_ALLOC_LIMBS_2 (rp, an + 1,
+ qp, an - dn + 1); /* FIXME: Could we avoid this? */
+
+ if (twos != 0)
+ {
+ tp = TMP_ALLOC_LIMBS (dn);
+ ASSERT_NOCARRY (mpn_rshift (tp, dp, dn, twos));
+ dp = tp;
+
+ ASSERT_NOCARRY (mpn_rshift (rp, ap, an, twos));
+ }
+ else
+ {
+ MPN_COPY (rp, ap, an);
+ }
+ if (rp[an - 1] >= dp[dn - 1])
+ {
+ rp[an] = 0;
+ an++;
+ }
+ else if (an == dn)
+ {
+ TMP_FREE;
+ return 0;
+ }
+
+ ASSERT (an > dn); /* requirement of functions below */
+
+ if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+ BELOW_THRESHOLD (an - dn, DC_BDIV_QR_THRESHOLD))
+ {
+ binvert_limb (di, dp[0]);
+ mpn_sbpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+ rp += an - dn;
+ }
+ else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+ {
+ binvert_limb (di, dp[0]);
+ mpn_dcpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+ rp += an - dn;
+ }
+ else
+ {
+ tp = TMP_ALLOC_LIMBS (mpn_mu_bdiv_qr_itch (an, dn));
+ mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
+ }
+
+ /* In general, bdiv may return either R = 0 or R = D when D divides
+ A. But R = 0 can happen only when A = 0, which we already have
+ excluded. Furthermore, R == D (mod B^{dn}) implies no carry, so
+ we don't need to check the carry returned from bdiv. */
+
+ MPN_CMP (c, rp, dp, dn);
+
+ TMP_FREE;
+ return c == 0;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/divrem.c b/vendor/gmp-6.3.0/mpn/generic/divrem.c
new file mode 100644
index 0000000..1da84a8
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/divrem.c
@@ -0,0 +1,103 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+ quotient. This is now just a middle layer calling mpn_tdiv_qr.
+
+Copyright 1993-1997, 1999-2002, 2005, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_divrem (mp_ptr qp, mp_size_t qxn,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+{
+ ASSERT (qxn >= 0);
+ ASSERT (nn >= dn);
+ ASSERT (dn >= 1);
+ ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+ ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+ ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
+ ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
+ ASSERT_MPN (np, nn);
+ ASSERT_MPN (dp, dn);
+
+ if (dn == 1)
+ {
+ mp_limb_t ret;
+ mp_ptr q2p;
+ mp_size_t qn;
+ TMP_DECL;
+
+ TMP_MARK;
+ q2p = TMP_ALLOC_LIMBS (nn + qxn);
+
+ np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
+ qn = nn + qxn - 1;
+ MPN_COPY (qp, q2p, qn);
+ ret = q2p[qn];
+
+ TMP_FREE;
+ return ret;
+ }
+ else if (dn == 2)
+ {
+ return mpn_divrem_2 (qp, qxn, np, nn, dp);
+ }
+ else
+ {
+ mp_ptr q2p;
+ mp_limb_t qhl;
+ mp_size_t qn;
+ TMP_DECL;
+
+ TMP_MARK;
+ if (UNLIKELY (qxn != 0))
+ {
+ mp_ptr n2p;
+ TMP_ALLOC_LIMBS_2 (n2p, nn + qxn,
+ q2p, nn - dn + qxn + 1);
+ MPN_ZERO (n2p, qxn);
+ MPN_COPY (n2p + qxn, np, nn);
+ mpn_tdiv_qr (q2p, np, 0L, n2p, nn + qxn, dp, dn);
+ qn = nn - dn + qxn;
+ MPN_COPY (qp, q2p, qn);
+ qhl = q2p[qn];
+ }
+ else
+ {
+ q2p = TMP_ALLOC_LIMBS (nn - dn + 1);
+ mpn_tdiv_qr (q2p, np, 0L, np, nn, dp, dn);
+ qn = nn - dn;
+ MPN_COPY (qp, q2p, qn);
+ qhl = q2p[qn];
+ }
+ TMP_FREE;
+ return qhl;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/divrem_1.c b/vendor/gmp-6.3.0/mpn/generic/divrem_1.c
new file mode 100644
index 0000000..c13aa79
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/divrem_1.c
@@ -0,0 +1,254 @@
+/* mpn_divrem_1 -- mpn by limb division.
+
+Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+ meaning the quotient size where that should happen, the quotient size
+ being how many udiv divisions will be done.
+
+ The default is to use preinv always, CPUs where this doesn't suit have
+ tuned thresholds. Note in particular that preinv should certainly be
+ used if that's the only division available (USE_PREINV_ALWAYS). */
+
+#ifndef DIVREM_1_NORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD 0
+#endif
+#ifndef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_UNNORM_THRESHOLD 0
+#endif
+
+
+
+/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
+ and UNNORM thresholds are 0 and only the inversion code is included.
+
+ If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
+ will be MP_SIZE_T_MAX and only the plain division code is included.
+
+ Otherwise mul-by-inverse is better than plain division above some
+ threshold, and best results are obtained by having code for both present.
+
+ The main reason for separating the norm and unnorm cases is that not all
+ CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
+ code used on an already normalized divisor.
+
+ If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
+ non-shifting code for both the norm and unnorm cases, though with
+ different criteria for skipping a division, and with different thresholds
+ of course. And in fact if inversion is never viable, then that simple
+ non-shifting division would be all that's left.
+
+ The NORM and UNNORM thresholds might not differ much, but if there's
+ going to be separate code for norm and unnorm then it makes sense to have
+ separate thresholds. One thing that's possible is that the
+ mul-by-inverse might be better only for normalized divisors, due to that
+ case not needing variable bit shifts.
+
+ Notice that the thresholds are tested after the decision to possibly skip
+ one divide step, so they're based on the actual number of divisions done.
+
+ For the unnorm case, it would be possible to call mpn_lshift to adjust
+ the dividend all in one go (into the quotient space say), rather than
+ limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
+ than what the compiler can generate for EXTRACT. But this is left to CPU
+ specific implementations to consider, especially since EXTRACT isn't on
+ the dependent chain. */
+
+mp_limb_t
+mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+ mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+ mp_size_t n;
+ mp_size_t i;
+ mp_limb_t n1, n0;
+ mp_limb_t r = 0;
+
+ ASSERT (qxn >= 0);
+ ASSERT (un >= 0);
+ ASSERT (d != 0);
+ /* FIXME: What's the correct overlap rule when qxn!=0? */
+ ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
+
+ n = un + qxn;
+ if (n == 0)
+ return 0;
+
+ d <<= GMP_NAIL_BITS;
+
+ qp += (n - 1); /* Make qp point at most significant quotient limb */
+
+ if ((d & GMP_LIMB_HIGHBIT) != 0)
+ {
+ if (un != 0)
+ {
+ /* High quotient limb is 0 or 1, skip a divide step. */
+ mp_limb_t q;
+ r = up[un - 1] << GMP_NAIL_BITS;
+ q = (r >= d);
+ *qp-- = q;
+ r -= (d & -q);
+ r >>= GMP_NAIL_BITS;
+ n--;
+ un--;
+ }
+
+ if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
+ {
+ plain:
+ for (i = un - 1; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ udiv_qrnnd (*qp, r, r, n0, d);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ for (i = qxn - 1; i >= 0; i--)
+ {
+ udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ return r;
+ }
+ else
+ {
+ /* Multiply-by-inverse, divisor already normalized. */
+ mp_limb_t dinv;
+ invert_limb (dinv, d);
+
+ for (i = un - 1; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ for (i = qxn - 1; i >= 0; i--)
+ {
+ udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ return r;
+ }
+ }
+ else
+ {
+ /* Most significant bit of divisor == 0. */
+ int cnt;
+
+ /* Skip a division if high < divisor (high quotient 0). Testing here
+ before normalizing will still skip as often as possible. */
+ if (un != 0)
+ {
+ n1 = up[un - 1] << GMP_NAIL_BITS;
+ if (n1 < d)
+ {
+ r = n1 >> GMP_NAIL_BITS;
+ *qp-- = 0;
+ n--;
+ if (n == 0)
+ return r;
+ un--;
+ }
+ }
+
+ if (! UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+ goto plain;
+
+ count_leading_zeros (cnt, d);
+ d <<= cnt;
+ r <<= cnt;
+
+ if (UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+ {
+ mp_limb_t nshift;
+ if (un != 0)
+ {
+ n1 = up[un - 1] << GMP_NAIL_BITS;
+ r |= (n1 >> (GMP_LIMB_BITS - cnt));
+ for (i = un - 2; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd (*qp, r, r, nshift, d);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ n1 = n0;
+ }
+ udiv_qrnnd (*qp, r, r, n1 << cnt, d);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ for (i = qxn - 1; i >= 0; i--)
+ {
+ udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ return r >> cnt;
+ }
+ else
+ {
+ mp_limb_t dinv, nshift;
+ invert_limb (dinv, d);
+ if (un != 0)
+ {
+ n1 = up[un - 1] << GMP_NAIL_BITS;
+ r |= (n1 >> (GMP_LIMB_BITS - cnt));
+ for (i = un - 2; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ n1 = n0;
+ }
+ udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ for (i = qxn - 1; i >= 0; i--)
+ {
+ udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+ r >>= GMP_NAIL_BITS;
+ qp--;
+ }
+ return r >> cnt;
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/divrem_2.c b/vendor/gmp-6.3.0/mpn/generic/divrem_2.c
new file mode 100644
index 0000000..217f2f6
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/divrem_2.c
@@ -0,0 +1,118 @@
+/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
+ quotient. The divisor is two limbs.
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
+ quotient limbs at qp and the 2 long remainder at np. If qxn is non-zero,
+ generate that many fraction bits and append them after the other quotient
+ limbs. Return the most significant limb of the quotient, this is always 0
+ or 1.
+
+ Preconditions:
+ 1. The most significant bit of the divisor must be set.
+ 2. qp must either not overlap with the input operands at all, or
+ qp >= np + 2 must hold true. (This means that it's possible to put
+ the quotient in the high part of {np,nn}, right above the remainder.
+ 3. nn >= 2, even if qxn is non-zero. */
+
+mp_limb_t
+mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp)
+{
+ mp_limb_t most_significant_q_limb;
+ mp_size_t i;
+ mp_limb_t r1, r0, d1, d0;
+ gmp_pi1_t di;
+
+ ASSERT (nn >= 2);
+ ASSERT (qxn >= 0);
+ ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
+ ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
+ ASSERT_MPN (np, nn);
+ ASSERT_MPN (dp, 2);
+
+ np += nn - 2;
+ d1 = dp[1];
+ d0 = dp[0];
+ r1 = np[1];
+ r0 = np[0];
+
+ most_significant_q_limb = 0;
+ if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+ {
+#if GMP_NAIL_BITS == 0
+ sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+ r0 = r0 - d0;
+ r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+ r0 &= GMP_NUMB_MASK;
+#endif
+ most_significant_q_limb = 1;
+ }
+
+ invert_pi1 (di, d1, d0);
+
+ qp += qxn;
+
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t n0, q;
+ n0 = np[-1];
+ udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
+ np--;
+ qp[i] = q;
+ }
+
+ if (UNLIKELY (qxn != 0))
+ {
+ qp -= qxn;
+ for (i = qxn - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ udiv_qr_3by2 (q, r1, r0, r1, r0, CNST_LIMB(0), d1, d0, di.inv32);
+ qp[i] = q;
+ }
+ }
+
+ np[1] = r1;
+ np[0] = r0;
+
+ return most_significant_q_limb;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/dump.c b/vendor/gmp-6.3.0/mpn/generic/dump.c
new file mode 100644
index 0000000..9a4ddf4
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/dump.c
@@ -0,0 +1,99 @@
+/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO
+ CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+ FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1996, 2000-2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS % 4 == 0
+void
+mpn_dump (mp_srcptr ptr, mp_size_t n)
+{
+ MPN_NORMALIZE (ptr, n);
+
+ if (n == 0)
+ printf ("0\n");
+ else
+ {
+ n--;
+#if _LONG_LONG_LIMB
+ if ((ptr[n] >> GMP_LIMB_BITS / 2) != 0)
+ {
+ printf ("%lX", (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+ printf ("%0*lX", (GMP_LIMB_BITS / 2 / 4), (unsigned long) ptr[n]);
+ }
+ else
+#endif
+ printf ("%lX", (unsigned long) ptr[n]);
+
+ while (n)
+ {
+ n--;
+#if _LONG_LONG_LIMB
+ printf ("%0*lX", (GMP_NUMB_BITS - GMP_LIMB_BITS / 2) / 4,
+ (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+ printf ("%0*lX", GMP_LIMB_BITS / 2 / 4, (unsigned long) ptr[n]);
+#else
+ printf ("%0*lX", GMP_NUMB_BITS / 4, (unsigned long) ptr[n]);
+#endif
+ }
+ printf ("\n");
+ }
+}
+
+#else
+
+static void
+mpn_recdump (mp_ptr p, mp_size_t n)
+{
+ mp_limb_t lo;
+ if (n != 0)
+ {
+ lo = p[0] & 0xf;
+ mpn_rshift (p, p, n, 4);
+ mpn_recdump (p, n);
+ printf ("%lX", lo);
+ }
+}
+
+void
+mpn_dump (mp_srcptr p, mp_size_t n)
+{
+ mp_ptr tp;
+ TMP_DECL;
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (n);
+ MPN_COPY (tp, p, n);
+ TMP_FREE;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/fib2_ui.c b/vendor/gmp-6.3.0/mpn/generic/fib2_ui.c
new file mode 100644
index 0000000..0b81571
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/fib2_ui.c
@@ -0,0 +1,174 @@
+/* mpn_fib2_ui -- calculate Fibonacci numbers.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Store F[n] at fp and F[n-1] at f1p. fp and f1p should have room for
+ MPN_FIB2_SIZE(n) limbs.
+
+ The return value is the actual number of limbs stored, this will be at
+ least 1. fp[size-1] will be non-zero, except when n==0, in which case
+ fp[0] is 0 and f1p[0] is 1. f1p[size-1] can be zero, since F[n-1]<F[n]
+ (for n>0).
+
+ Notes: F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+
+ In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+ low limb.
+
+ In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+ low limb.
+*/
+
+mp_size_t
+mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
+{
+ mp_size_t size;
+ unsigned long nfirst, mask;
+
+ TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
+
+ ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
+
+ /* Take a starting pair from the table. */
+ mask = 1;
+ for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
+ mask <<= 1;
+ TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
+
+ f1p[0] = FIB_TABLE ((int) nfirst - 1);
+ fp[0] = FIB_TABLE (nfirst);
+ size = 1;
+
+ /* Skip to the end if the table lookup gives the final answer. */
+ if (mask != 1)
+ {
+ mp_size_t alloc;
+ mp_ptr xp;
+ TMP_DECL;
+
+ TMP_MARK;
+ alloc = MPN_FIB2_SIZE (n);
+ xp = TMP_ALLOC_LIMBS (alloc);
+
+ do
+ {
+ /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+ n&mask upwards.
+
+ The next bit of n is n&(mask>>1) and we'll double to the pair
+ fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+ that bit is 0 or 1 respectively. */
+
+ TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
+ n >> refmpn_count_trailing_zeros(mask),
+ mask, size, alloc);
+ mpn_trace ("fp ", fp, size);
+ mpn_trace ("f1p", f1p, size));
+
+ /* fp normalized, f1p at most one high zero */
+ ASSERT (fp[size-1] != 0);
+ ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
+
+ /* f1p[size-1] might be zero, but this occurs rarely, so it's not
+ worth bothering checking for it */
+ ASSERT (alloc >= 2*size);
+ mpn_sqr (xp, fp, size);
+ mpn_sqr (fp, f1p, size);
+ size *= 2;
+
+ /* Shrink if possible. Since fp was normalized there'll be at
+ most one high zero on xp (and if there is then there's one on
+ yp too). */
+ ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
+ size -= (xp[size-1] == 0);
+ ASSERT (xp[size-1] != 0); /* only one xp high zero */
+
+ /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+ f1p[size] = mpn_add_n (f1p, xp, fp, size);
+
+ /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+ n&mask is the low bit of our implied k. */
+
+ ASSERT ((fp[0] & 2) == 0);
+ /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+ fp[0] |= (n & mask ? 2 : 0); /* possible -2 */
+#if HAVE_NATIVE_mpn_rsblsh2_n
+ fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
+ MPN_INCR_U(fp, size + 1, (n & mask ? 0 : 2)); /* possible +2 */
+#else
+ {
+ mp_limb_t c;
+
+ c = mpn_lshift (xp, xp, size, 2);
+ xp[0] |= (n & mask ? 0 : 2); /* possible +2 */
+ c -= mpn_sub_n (fp, xp, fp, size);
+ fp[size] = c;
+ }
+#endif
+ ASSERT (alloc >= size+1);
+ size += (fp[size] != 0);
+
+ /* now n&mask is the new bit of n being considered */
+ mask >>= 1;
+
+ /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+ F[2k+1] and F[2k-1]. */
+ if (n & mask)
+ ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
+ else {
+ ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
+
+ /* Can have a high zero after replacing F[2k+1] with F[2k].
+ f1p will have a high zero if fp does. */
+ ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+ size -= (fp[size-1] == 0);
+ }
+ }
+ while (mask != 1);
+
+ TMP_FREE;
+ }
+
+ TRACE (printf ("done size=%ld\n", size);
+ mpn_trace ("fp ", fp, size);
+ mpn_trace ("f1p", f1p, size));
+
+ return size;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/fib2m.c b/vendor/gmp-6.3.0/mpn/generic/fib2m.c
new file mode 100644
index 0000000..89d2b86
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/fib2m.c
@@ -0,0 +1,252 @@
+/* mpn_fib2m -- calculate Fibonacci numbers, modulo m.
+
+Contributed to the GNU project by Marco Bodrato, based on the previous
+fib2_ui.c file.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n},
+ returns the sign of {ap,n}-{bp,n}. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ mp_limb_t x, y;
+ while (--n >= 0)
+ {
+ x = ap[n];
+ y = bp[n];
+ if (x != y)
+ {
+ ++n;
+ if (x > y)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (rp, ap, bp, n));
+ return 1;
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (rp, bp, ap, n));
+ return -1;
+ }
+ }
+ rp[n] = 0;
+ }
+ return 0;
+}
+
+/* Store F[n] at fp and F[n-1] at f1p. Both are computed modulo m.
+ fp and f1p should have room for mn*2+1 limbs.
+
+ The sign of one or both the values may be flipped (n-F, instead of F),
+ the return value is 0 (zero) if the signs are coherent (both positive
+ or both negative) and 1 (one) otherwise.
+
+ Notes:
+
+ In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+ low limb.
+
+ In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+ low limb.
+
+ TODO: Should {tp, 2 * mn} be passed as a scratch pointer?
+ Should the call to mpn_fib2_ui() obtain (up to) 2*mn limbs?
+*/
+
+int
+mpn_fib2m (mp_ptr fp, mp_ptr f1p, mp_srcptr np, mp_size_t nn, mp_srcptr mp, mp_size_t mn)
+{
+ unsigned long nfirst;
+ mp_limb_t nh;
+ mp_bitcnt_t nbi;
+ mp_size_t sn, fn;
+ int fcnt, ncnt;
+
+ ASSERT (! MPN_OVERLAP_P (fp, MAX(2*mn+1,5), f1p, MAX(2*mn+1,5)));
+ ASSERT (nn > 0 && np[nn - 1] != 0);
+
+ /* Estimate the maximal n such that fibonacci(n) fits in mn limbs. */
+#if GMP_NUMB_BITS % 16 == 0
+ if (UNLIKELY (ULONG_MAX / (23 * (GMP_NUMB_BITS / 16)) <= mn))
+ nfirst = ULONG_MAX;
+ else
+ nfirst = mn * (23 * (GMP_NUMB_BITS / 16));
+#else
+ {
+ mp_bitcnt_t mbi;
+ mbi = (mp_bitcnt_t) mn * GMP_NUMB_BITS;
+
+ if (UNLIKELY (ULONG_MAX / 23 < mbi))
+ {
+ if (UNLIKELY (ULONG_MAX / 23 * 16 <= mbi))
+ nfirst = ULONG_MAX;
+ else
+ nfirst = mbi / 16 * 23;
+ }
+ else
+ nfirst = mbi * 23 / 16;
+ }
+#endif
+
+ sn = nn - 1;
+ nh = np[sn];
+ count_leading_zeros (ncnt, nh);
+ count_leading_zeros (fcnt, nfirst);
+
+ if (fcnt >= ncnt)
+ {
+ ncnt = fcnt - ncnt;
+ nh >>= ncnt;
+ }
+ else if (sn > 0)
+ {
+ ncnt -= fcnt;
+ nh <<= ncnt;
+ ncnt = GMP_NUMB_BITS - ncnt;
+ --sn;
+ nh |= np[sn] >> ncnt;
+ }
+ else
+ ncnt = 0;
+
+ nbi = sn * GMP_NUMB_BITS + ncnt;
+ if (nh > nfirst)
+ {
+ nh >>= 1;
+ ++nbi;
+ }
+
+ ASSERT (nh <= nfirst);
+ /* Take a starting pair from mpn_fib2_ui. */
+ fn = mpn_fib2_ui (fp, f1p, nh);
+ MPN_ZERO (fp + fn, mn - fn);
+ MPN_ZERO (f1p + fn, mn - fn);
+
+ if (nbi == 0)
+ {
+ if (fn == mn)
+ {
+ mp_limb_t qp[2];
+ mpn_tdiv_qr (qp, fp, 0, fp, fn, mp, mn);
+ mpn_tdiv_qr (qp, f1p, 0, f1p, fn, mp, mn);
+ }
+
+ return 0;
+ }
+ else
+ {
+ mp_ptr tp;
+ unsigned pb = nh & 1;
+ int neg;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ tp = TMP_ALLOC_LIMBS (2 * mn + (mn < 2));
+
+ do
+ {
+ mp_ptr rp;
+ /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+ nbi upwards.
+
+ Based on the next bit of n, we'll double to the pair
+ fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+ that bit is 0 or 1 respectively. */
+
+ mpn_sqr (tp, fp, mn);
+ mpn_sqr (fp, f1p, mn);
+
+ /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+ f1p[2 * mn] = mpn_add_n (f1p, tp, fp, 2 * mn);
+
+ /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+ pb is the low bit of our implied k. */
+
+ /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+ ASSERT ((fp[0] & 2) == 0);
+ ASSERT (pb == (pb & 1));
+ ASSERT ((fp[0] + (pb ? 2 : 0)) == (fp[0] | (pb << 1)));
+ fp[0] |= pb << 1; /* possible -2 */
+#if HAVE_NATIVE_mpn_rsblsh2_n
+ fp[2 * mn] = 1 + mpn_rsblsh2_n (fp, fp, tp, 2 * mn);
+ MPN_INCR_U(fp, 2 * mn + 1, (1 ^ pb) << 1); /* possible +2 */
+ fp[2 * mn] = (fp[2 * mn] - 1) & GMP_NUMB_MAX;
+#else
+ {
+ mp_limb_t c;
+
+ c = mpn_lshift (tp, tp, 2 * mn, 2);
+ tp[0] |= (1 ^ pb) << 1; /* possible +2 */
+ c -= mpn_sub_n (fp, tp, fp, 2 * mn);
+ fp[2 * mn] = c & GMP_NUMB_MAX;
+ }
+#endif
+ neg = fp[2 * mn] == GMP_NUMB_MAX;
+
+ /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2 */
+ /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k */
+
+ /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+ F[2k+1] and F[2k-1]. */
+ --nbi;
+ pb = (np [nbi / GMP_NUMB_BITS] >> (nbi % GMP_NUMB_BITS)) & 1;
+ rp = pb ? f1p : fp;
+ if (neg)
+ {
+ /* Calculate -(F[2k+1] - F[2k-1]) */
+ rp[2 * mn] = f1p[2 * mn] + 1 - mpn_sub_n (rp, f1p, fp, 2 * mn);
+ neg = ! pb;
+ if (pb) /* fp not overwritten, negate it. */
+ fp [2 * mn] = 1 ^ mpn_neg (fp, fp, 2 * mn);
+ }
+ else
+ {
+ neg = abs_sub_n (rp, fp, f1p, 2 * mn + 1) < 0;
+ }
+
+ mpn_tdiv_qr (tp, fp, 0, fp, 2 * mn + 1, mp, mn);
+ mpn_tdiv_qr (tp, f1p, 0, f1p, 2 * mn + 1, mp, mn);
+ }
+ while (nbi != 0);
+
+ TMP_FREE;
+
+ return neg;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcd.c b/vendor/gmp-6.3.0/mpn/generic/gcd.c
new file mode 100644
index 0000000..3f92cbf
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcd.c
@@ -0,0 +1,266 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Uses the HGCD operation described in
+
+ N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+ computation, Math. Comp. 77 (2008), 589-607.
+
+ to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
+ then uses Lehmer's algorithm.
+*/
+
+/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
+ * 2)/3, which gives a balanced multiplication in
+ * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
+ * performance. The matrix-vector multiplication is then
+ * 4:1-unbalanced, with matrix elements of size n/6, and vector
+ * elements of size p = 2n/3. */
+
+/* From analysis of the theoretical running time, it appears that when
+ * multiplication takes time O(n^alpha), p should be chosen so that
+ * the ratio of the time for the mpn_hgcd call, and the time for the
+ * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
+ * 1). */
+#ifdef TUNE_GCD_P
+#define P_TABLE_SIZE 10000
+mp_size_t p_table[P_TABLE_SIZE];
+#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
+#else
+#define CHOOSE_P(n) (2*(n) / 3)
+#endif
+
+struct gcd_ctx
+{
+ mp_ptr gp;
+ mp_size_t gn;
+};
+
+static void
+gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ struct gcd_ctx *ctx = (struct gcd_ctx *) p;
+ MPN_COPY (ctx->gp, gp, gn);
+ ctx->gn = gn;
+}
+
+mp_size_t
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
+{
+ mp_size_t talloc;
+ mp_size_t scratch;
+ mp_size_t matrix_scratch;
+
+ struct gcd_ctx ctx;
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (usize >= n);
+ ASSERT (n > 0);
+ ASSERT (vp[n-1] > 0);
+
+ /* FIXME: Check for small sizes first, before setting up temporary
+ storage etc. */
+ talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+ /* For initial division */
+ scratch = usize - n + 1;
+ if (scratch > talloc)
+ talloc = scratch;
+
+#if TUNE_GCD_P
+ if (CHOOSE_P (n) > 0)
+#else
+ if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+ {
+ mp_size_t hgcd_scratch;
+ mp_size_t update_scratch;
+ mp_size_t p = CHOOSE_P (n);
+ mp_size_t scratch;
+#if TUNE_GCD_P
+ /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
+ is increasing */
+ matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
+ hgcd_scratch = mpn_hgcd_itch (n);
+ update_scratch = 2*(n - 1);
+#else
+ matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+ hgcd_scratch = mpn_hgcd_itch (n - p);
+ update_scratch = p + n - 1;
+#endif
+ scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+ if (scratch > talloc)
+ talloc = scratch;
+ }
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS(talloc);
+
+ if (usize > n)
+ {
+ mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
+
+ if (mpn_zero_p (up, n))
+ {
+ MPN_COPY (gp, vp, n);
+ ctx.gn = n;
+ goto done;
+ }
+ }
+
+ ctx.gp = gp;
+
+#if TUNE_GCD_P
+ while (CHOOSE_P (n) > 0)
+#else
+ while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+ {
+ struct hgcd_matrix M;
+ mp_size_t p = CHOOSE_P (n);
+ mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+ mp_size_t nn;
+ mpn_hgcd_matrix_init (&M, n - p, tp);
+ nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
+ if (nn > 0)
+ {
+ ASSERT (M.n <= (n - p - 1)/2);
+ ASSERT (M.n + p <= (p + n - 1) / 2);
+ /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+ n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
+ }
+ else
+ {
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
+ if (n == 0)
+ goto done;
+ }
+ }
+
+ while (n > 2)
+ {
+ struct hgcd_matrix1 M;
+ mp_limb_t uh, ul, vh, vl;
+ mp_limb_t mask;
+
+ mask = up[n-1] | vp[n-1];
+ ASSERT (mask > 0);
+
+ if (mask & GMP_NUMB_HIGHBIT)
+ {
+ uh = up[n-1]; ul = up[n-2];
+ vh = vp[n-1]; vl = vp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
+ ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
+ vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
+ vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
+ }
+
+ /* Try an mpn_hgcd2 step */
+ if (mpn_hgcd2 (uh, ul, vh, vl, &M))
+ {
+ n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
+ MP_PTR_SWAP (up, tp);
+ }
+ else
+ {
+ /* mpn_hgcd2 has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
+
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
+ if (n == 0)
+ goto done;
+ }
+ }
+
+ ASSERT(up[n-1] | vp[n-1]);
+
+ /* Due to the calling convention for mpn_gcd, at most one can be even. */
+ if ((up[0] & 1) == 0)
+ MP_PTR_SWAP (up, vp);
+ ASSERT ((up[0] & 1) != 0);
+
+ {
+ mp_limb_t u0, u1, v0, v1;
+ mp_double_limb_t g;
+
+ u0 = up[0];
+ v0 = vp[0];
+
+ if (n == 1)
+ {
+ int cnt;
+ count_trailing_zeros (cnt, v0);
+ *gp = mpn_gcd_11 (u0, v0 >> cnt);
+ ctx.gn = 1;
+ goto done;
+ }
+
+ v1 = vp[1];
+ if (UNLIKELY (v0 == 0))
+ {
+ v0 = v1;
+ v1 = 0;
+ /* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
+ when this situation occurs internally. */
+ }
+ if ((v0 & 1) == 0)
+ {
+ int cnt;
+ count_trailing_zeros (cnt, v0);
+ v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
+ v1 >>= cnt;
+ }
+
+ u1 = up[1];
+ g = mpn_gcd_22 (u1, u0, v1, v0);
+ gp[0] = g.d0;
+ gp[1] = g.d1;
+ ctx.gn = 1 + (g.d1 > 0);
+ }
+done:
+ TMP_FREE;
+ return ctx.gn;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcd_1.c b/vendor/gmp-6.3.0/mpn/generic/gcd_1.c
new file mode 100644
index 0000000..22b1422
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcd_1.c
@@ -0,0 +1,103 @@
+/* mpn_gcd_1 -- mpn and limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Does not work for U == 0 or V == 0. It would be tough to make it work for
+ V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
+
+ The threshold for doing u%v when size==1 will vary by CPU according to
+ the speed of a division and the code generated for the main loop. Any
+ tuning for this is left to a CPU specific implementation. */
+
+mp_limb_t
+mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
+{
+ mp_limb_t ulimb;
+ unsigned long zero_bits, u_low_zero_bits;
+ int c;
+
+ ASSERT (size >= 1);
+ ASSERT (vlimb != 0);
+ ASSERT_MPN_NONZERO_P (up, size);
+
+ ulimb = up[0];
+
+ /* Need vlimb odd for modexact, want it odd to get common zeros. */
+ count_trailing_zeros (zero_bits, vlimb);
+ vlimb >>= zero_bits;
+
+ if (size > 1)
+ {
+ /* Must get common zeros before the mod reduction. If ulimb==0 then
+ vlimb already gives the common zeros. */
+ if (ulimb != 0)
+ {
+ count_trailing_zeros (u_low_zero_bits, ulimb);
+ zero_bits = MIN (zero_bits, u_low_zero_bits);
+ }
+
+ ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
+ if (ulimb == 0)
+ goto done;
+
+ count_trailing_zeros (c, ulimb);
+ ulimb >>= c;
+ }
+ else
+ {
+ /* size==1, so up[0]!=0 */
+ count_trailing_zeros (u_low_zero_bits, ulimb);
+ ulimb >>= u_low_zero_bits;
+ zero_bits = MIN (zero_bits, u_low_zero_bits);
+
+ /* make u bigger */
+ if (vlimb > ulimb)
+ MP_LIMB_T_SWAP (ulimb, vlimb);
+
+ /* if u is much bigger than v, reduce using a division rather than
+ chipping away at it bit-by-bit */
+ if ((ulimb >> 16) > vlimb)
+ {
+ ulimb %= vlimb;
+ if (ulimb == 0)
+ goto done;
+
+ count_trailing_zeros (c, ulimb);
+ ulimb >>= c;
+ }
+ }
+
+ vlimb = mpn_gcd_11 (ulimb, vlimb);
+
+ done:
+ return vlimb << zero_bits;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcd_11.c b/vendor/gmp-6.3.0/mpn/generic/gcd_11.c
new file mode 100644
index 0000000..214e45c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcd_11.c
@@ -0,0 +1,74 @@
+/* mpn_gcd_11 -- limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+ ASSERT (u & v & 1);
+
+ /* In this loop, we represent the odd numbers ulimb and vlimb
+ without the redundant least significant one bit. This reduction
+ in size by one bit ensures that the high bit of t, below, is set
+ if and only if vlimb > ulimb. */
+
+ u >>= 1;
+ v >>= 1;
+
+ while (u != v)
+ {
+ mp_limb_t t;
+ mp_limb_t vgtu;
+ int c;
+
+ t = u - v;
+ vgtu = LIMB_HIGHBIT_TO_MASK (t);
+
+ /* v <-- min (u, v) */
+ v += (vgtu & t);
+
+ /* u <-- |u - v| */
+ u = (t ^ vgtu) - vgtu;
+
+ count_trailing_zeros (c, t);
+ /* We have c <= GMP_LIMB_BITS - 2 here, so that
+
+ ulimb >>= (c + 1);
+
+ would be safe. But unlike the addition c + 1, a separate
+ shift by 1 is independent of c, and can be executed in
+ parallel with count_trailing_zeros. */
+ u = (u >> 1) >> c;
+ }
+ return (u << 1) + 1;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcd_22.c b/vendor/gmp-6.3.0/mpn/generic/gcd_22.c
new file mode 100644
index 0000000..d97f096
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcd_22.c
@@ -0,0 +1,131 @@
+/* mpn_gcd_22 -- double limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+mp_double_limb_t
+mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0)
+{
+ mp_double_limb_t g;
+ ASSERT (u0 & v0 & 1);
+
+ /* Implicit least significant bit */
+ u0 = (u0 >> 1) | (u1 << (GMP_LIMB_BITS - 1));
+ u1 >>= 1;
+
+ v0 = (v0 >> 1) | (v1 << (GMP_LIMB_BITS - 1));
+ v1 >>= 1;
+
+ while (u1 || v1) /* u1 == 0 can happen at most twice per call */
+ {
+ mp_limb_t vgtu, t1, t0;
+ sub_ddmmss (t1, t0, u1, u0, v1, v0);
+ vgtu = LIMB_HIGHBIT_TO_MASK(t1);
+
+ if (UNLIKELY (t0 == 0))
+ {
+ if (t1 == 0)
+ {
+ g.d1 = (u1 << 1) | (u0 >> (GMP_LIMB_BITS - 1));
+ g.d0 = (u0 << 1) | 1;
+ return g;
+ }
+ int c;
+ count_trailing_zeros (c, t1);
+
+ /* v1 = min (u1, v1) */
+ v1 += (vgtu & t1);
+ /* u0 = |u1 - v1| */
+ u0 = (t1 ^ vgtu) - vgtu;
+ ASSERT (c < GMP_LIMB_BITS - 1);
+ u0 >>= c + 1;
+ u1 = 0;
+ }
+ else
+ {
+ int c;
+ count_trailing_zeros (c, t0);
+ c++;
+ /* V <-- min (U, V).
+
+ Assembly version should use cmov. Another alternative,
+ avoiding carry propagation, would be
+
+ v0 += vgtu & t0; v1 += vtgu & (u1 - v1);
+ */
+ add_ssaaaa (v1, v0, v1, v0, vgtu & t1, vgtu & t0);
+ /* U <-- |U - V|
+ No carry handling needed in this conditional negation,
+ since t0 != 0. */
+ u0 = (t0 ^ vgtu) - vgtu;
+ u1 = t1 ^ vgtu;
+ if (UNLIKELY (c == GMP_LIMB_BITS))
+ {
+ u0 = u1;
+ u1 = 0;
+ }
+ else
+ {
+ u0 = (u0 >> c) | (u1 << (GMP_LIMB_BITS - c));
+ u1 >>= c;
+ }
+ }
+ }
+ while ((v0 | u0) & GMP_LIMB_HIGHBIT)
+ { /* At most two iterations */
+ mp_limb_t vgtu, t0;
+ int c;
+ sub_ddmmss (vgtu, t0, 0, u0, 0, v0);
+ if (UNLIKELY (t0 == 0))
+ {
+ g.d1 = u0 >> (GMP_LIMB_BITS - 1);
+ g.d0 = (u0 << 1) | 1;
+ return g;
+ }
+
+ /* v <-- min (u, v) */
+ v0 += (vgtu & t0);
+
+ /* u <-- |u - v| */
+ u0 = (t0 ^ vgtu) - vgtu;
+
+ count_trailing_zeros (c, t0);
+ u0 = (u0 >> 1) >> c;
+ }
+
+ g.d0 = mpn_gcd_11 ((u0 << 1) + 1, (v0 << 1) + 1);
+ g.d1 = 0;
+ return g;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcd_subdiv_step.c b/vendor/gmp-6.3.0/mpn/generic/gcd_subdiv_step.c
new file mode 100644
index 0000000..9c3b88d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcd_subdiv_step.c
@@ -0,0 +1,204 @@
+/* gcd_subdiv_step.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdlib.h> /* for NULL */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+ b is small, or the difference is small. Perform one subtraction
+ followed by one division. The normal case is to compute the reduced
+ a and b, and return the new size.
+
+ If s == 0 (used for gcd and gcdext), returns zero if the gcd is
+ found.
+
+ If s > 0, don't reduce to size <= s, and return zero if no
+ reduction is possible (if either a, b or |a-b| is of size <= s). */
+
+/* The hook function is called as
+
+ hook(ctx, gp, gn, qp, qn, d)
+
+ in the following cases:
+
+ + If A = B at the start, G is the gcd, Q is NULL, d = -1.
+
+ + If one input is zero at the start, G is the gcd, Q is NULL,
+ d = 0 if A = G and d = 1 if B = G.
+
+ Otherwise, if d = 0 we have just subtracted a multiple of A from B,
+ and if d = 1 we have subtracted a multiple of B from A.
+
+ + If A = B after subtraction, G is the gcd, Q is NULL.
+
+ + If we get a zero remainder after division, G is the gcd, Q is the
+ quotient.
+
+ + Otherwise, G is NULL, Q is the quotient (often 1).
+
+ */
+
+mp_size_t
+mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
+ gcd_subdiv_step_hook *hook, void *ctx,
+ mp_ptr tp)
+{
+ static const mp_limb_t one = CNST_LIMB(1);
+ mp_size_t an, bn, qn;
+
+ int swapped;
+
+ ASSERT (n > 0);
+ ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+ an = bn = n;
+ MPN_NORMALIZE (ap, an);
+ MPN_NORMALIZE (bp, bn);
+
+ swapped = 0;
+
+ /* Arrange so that a < b, subtract b -= a, and maintain
+ normalization. */
+ if (an == bn)
+ {
+ int c;
+ MPN_CMP (c, ap, bp, an);
+ if (UNLIKELY (c == 0))
+ {
+ /* For gcdext, return the smallest of the two cofactors, so
+ pass d = -1. */
+ if (s == 0)
+ hook (ctx, ap, an, NULL, 0, -1);
+ return 0;
+ }
+ else if (c > 0)
+ {
+ MP_PTR_SWAP (ap, bp);
+ swapped ^= 1;
+ }
+ }
+ else
+ {
+ if (an > bn)
+ {
+ MPN_PTR_SWAP (ap, an, bp, bn);
+ swapped ^= 1;
+ }
+ }
+ if (an <= s)
+ {
+ if (s == 0)
+ hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
+ return 0;
+ }
+
+ ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
+ MPN_NORMALIZE (bp, bn);
+ ASSERT (bn > 0);
+
+ if (bn <= s)
+ {
+ /* Undo subtraction. */
+ mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+ if (cy > 0)
+ bp[an] = cy;
+ return 0;
+ }
+
+ /* Arrange so that a < b */
+ if (an == bn)
+ {
+ int c;
+ MPN_CMP (c, ap, bp, an);
+ if (UNLIKELY (c == 0))
+ {
+ if (s > 0)
+ /* Just record subtraction and return */
+ hook (ctx, NULL, 0, &one, 1, swapped);
+ else
+ /* Found gcd. */
+ hook (ctx, bp, bn, NULL, 0, swapped);
+ return 0;
+ }
+
+ hook (ctx, NULL, 0, &one, 1, swapped);
+
+ if (c > 0)
+ {
+ MP_PTR_SWAP (ap, bp);
+ swapped ^= 1;
+ }
+ }
+ else
+ {
+ hook (ctx, NULL, 0, &one, 1, swapped);
+
+ if (an > bn)
+ {
+ MPN_PTR_SWAP (ap, an, bp, bn);
+ swapped ^= 1;
+ }
+ }
+
+ mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
+ qn = bn - an + 1;
+ bn = an;
+ MPN_NORMALIZE (bp, bn);
+
+ if (UNLIKELY (bn <= s))
+ {
+ if (s == 0)
+ {
+ hook (ctx, ap, an, tp, qn, swapped);
+ return 0;
+ }
+
+ /* Quotient is one too large, so decrement it and add back A. */
+ if (bn > 0)
+ {
+ mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+ if (cy)
+ bp[an++] = cy;
+ }
+ else
+ MPN_COPY (bp, ap, an);
+
+ MPN_DECR_U (tp, qn, 1);
+ }
+
+ hook (ctx, NULL, 0, tp, qn, swapped);
+ return an;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcdext.c b/vendor/gmp-6.3.0/mpn/generic/gcdext.c
new file mode 100644
index 0000000..5501480
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcdext.c
@@ -0,0 +1,557 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
+ the size is returned (if inputs are non-normalized, result may be
+ non-normalized too). Temporary space needed is M->n + n.
+ */
+static size_t
+hgcd_mul_matrix_vector (struct hgcd_matrix *M,
+ mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+ mp_limb_t ah, bh;
+
+ /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+ t = u00 * a
+ r = u10 * b
+ r += t;
+
+ t = u11 * b
+ b = u01 * a
+ b += t;
+ */
+
+ if (M->n >= n)
+ {
+ mpn_mul (tp, M->p[0][0], M->n, ap, n);
+ mpn_mul (rp, M->p[1][0], M->n, bp, n);
+ }
+ else
+ {
+ mpn_mul (tp, ap, n, M->p[0][0], M->n);
+ mpn_mul (rp, bp, n, M->p[1][0], M->n);
+ }
+
+ ah = mpn_add_n (rp, rp, tp, n + M->n);
+
+ if (M->n >= n)
+ {
+ mpn_mul (tp, M->p[1][1], M->n, bp, n);
+ mpn_mul (bp, M->p[0][1], M->n, ap, n);
+ }
+ else
+ {
+ mpn_mul (tp, bp, n, M->p[1][1], M->n);
+ mpn_mul (bp, ap, n, M->p[0][1], M->n);
+ }
+ bh = mpn_add_n (bp, bp, tp, n + M->n);
+
+ n += M->n;
+ if ( (ah | bh) > 0)
+ {
+ rp[n] = ah;
+ bp[n] = bh;
+ n++;
+ }
+ else
+ {
+ /* Normalize */
+ while ( (rp[n-1] | bp[n-1]) == 0)
+ n--;
+ }
+
+ return n;
+}
+
+#define COMPUTE_V_ITCH(n) (2*(n))
+
+/* Computes |v| = |(g - u a)| / b, where u may be positive or
+ negative, and v is of the opposite sign. max(a, b) is of size n, u and
+ v at most size n, and v must have space for n+1 limbs. */
+static mp_size_t
+compute_v (mp_ptr vp,
+ mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+ mp_srcptr gp, mp_size_t gn,
+ mp_srcptr up, mp_size_t usize,
+ mp_ptr tp)
+{
+ mp_size_t size;
+ mp_size_t an;
+ mp_size_t bn;
+ mp_size_t vn;
+
+ ASSERT (n > 0);
+ ASSERT (gn > 0);
+ ASSERT (usize != 0);
+
+ size = ABS (usize);
+ ASSERT (size <= n);
+ ASSERT (up[size-1] > 0);
+
+ an = n;
+ MPN_NORMALIZE (ap, an);
+ ASSERT (gn <= an);
+
+ if (an >= size)
+ mpn_mul (tp, ap, an, up, size);
+ else
+ mpn_mul (tp, up, size, ap, an);
+
+ size += an;
+
+ if (usize > 0)
+ {
+ /* |v| = -v = (u a - g) / b */
+
+ ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
+ MPN_NORMALIZE (tp, size);
+ if (size == 0)
+ return 0;
+ }
+ else
+ { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
+ (g + |u| a) always fits in (|usize| + an) limbs. */
+
+ ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
+ size -= (tp[size - 1] == 0);
+ }
+
+ /* Now divide t / b. There must be no remainder */
+ bn = n;
+ MPN_NORMALIZE (bp, bn);
+ ASSERT (size >= bn);
+
+ vn = size + 1 - bn;
+ ASSERT (vn <= n + 1);
+
+ mpn_divexact (vp, tp, size, bp, bn);
+ vn -= (vp[vn-1] == 0);
+
+ return vn;
+}
+
+/* Temporary storage:
+
+ Initial division: Quotient of at most an - n + 1 <= an limbs.
+
+ Storage for u0 and u1: 2(n+1).
+
+ Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
+
+ Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
+
+ When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
+
+ When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
+
+ For the lehmer call after the loop, Let T denote
+ GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
+ u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
+ for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
+ sufficient for both operations.
+
+*/
+
+/* Optimal choice of p seems difficult. In each iteration the division
+ * of work between hgcd and the updates of u0 and u1 depends on the
+ * current size of the u. It may be desirable to use a different
+ * choice of p in each iteration. Also the input size seems to matter;
+ * choosing p = n / 3 in the first iteration seems to improve
+ * performance slightly for input size just above the threshold, but
+ * degrade performance for larger inputs. */
+#define CHOOSE_P_1(n) ((n) / 2)
+#define CHOOSE_P_2(n) ((n) / 3)
+
+mp_size_t
+mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
+ mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
+{
+ mp_size_t talloc;
+ mp_size_t scratch;
+ mp_size_t matrix_scratch;
+ mp_size_t ualloc = n + 1;
+
+ struct gcdext_ctx ctx;
+ mp_size_t un;
+ mp_ptr u0;
+ mp_ptr u1;
+
+ mp_ptr tp;
+
+ TMP_DECL;
+
+ ASSERT (an >= n);
+ ASSERT (n > 0);
+ ASSERT (bp[n-1] > 0);
+
+ TMP_MARK;
+
+ /* FIXME: Check for small sizes first, before setting up temporary
+ storage etc. */
+ talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
+
+ /* For initial division */
+ scratch = an - n + 1;
+ if (scratch > talloc)
+ talloc = scratch;
+
+ if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+ {
+ /* For hgcd loop. */
+ mp_size_t hgcd_scratch;
+ mp_size_t update_scratch;
+ mp_size_t p1 = CHOOSE_P_1 (n);
+ mp_size_t p2 = CHOOSE_P_2 (n);
+ mp_size_t min_p = MIN(p1, p2);
+ mp_size_t max_p = MAX(p1, p2);
+ matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
+ hgcd_scratch = mpn_hgcd_itch (n - min_p);
+ update_scratch = max_p + n - 1;
+
+ scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+ if (scratch > talloc)
+ talloc = scratch;
+
+ /* Final mpn_gcdext_lehmer_n call. Need space for u and for
+ copies of a and b. */
+ scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
+ + 3*GCDEXT_DC_THRESHOLD;
+
+ if (scratch > talloc)
+ talloc = scratch;
+
+ /* Cofactors u0 and u1 */
+ talloc += 2*(n+1);
+ }
+
+ tp = TMP_ALLOC_LIMBS(talloc);
+
+ if (an > n)
+ {
+ mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
+
+ if (mpn_zero_p (ap, n))
+ {
+ MPN_COPY (gp, bp, n);
+ *usizep = 0;
+ TMP_FREE;
+ return n;
+ }
+ }
+
+ if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+ {
+ mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
+
+ TMP_FREE;
+ return gn;
+ }
+
+ MPN_ZERO (tp, 2*ualloc);
+ u0 = tp; tp += ualloc;
+ u1 = tp; tp += ualloc;
+
+ ctx.gp = gp;
+ ctx.up = up;
+ ctx.usize = usizep;
+
+ {
+ /* For the first hgcd call, there are no u updates, and it makes
+ some sense to use a different choice for p. */
+
+ /* FIXME: We could trim use of temporary storage, since u0 and u1
+ are not used yet. For the hgcd call, we could swap in the u0
+ and u1 pointers for the relevant matrix elements. */
+
+ struct hgcd_matrix M;
+ mp_size_t p = CHOOSE_P_1 (n);
+ mp_size_t nn;
+
+ mpn_hgcd_matrix_init (&M, n - p, tp);
+ nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+ if (nn > 0)
+ {
+ ASSERT (M.n <= (n - p - 1)/2);
+ ASSERT (M.n + p <= (p + n - 1) / 2);
+
+ /* Temporary storage 2 (p + M->n) <= p + n - 1 */
+ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+
+ MPN_COPY (u0, M.p[1][0], M.n);
+ MPN_COPY (u1, M.p[1][1], M.n);
+ un = M.n;
+ while ( (u0[un-1] | u1[un-1] ) == 0)
+ un--;
+ }
+ else
+ {
+ /* mpn_hgcd has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
+ u1[0] = 1;
+
+ ctx.u0 = u0;
+ ctx.u1 = u1;
+ ctx.tp = tp + n; /* ualloc */
+ ctx.un = 1;
+
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+ if (n == 0)
+ {
+ TMP_FREE;
+ return ctx.gn;
+ }
+
+ un = ctx.un;
+ ASSERT (un < ualloc);
+ }
+ }
+
+ while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+ {
+ struct hgcd_matrix M;
+ mp_size_t p = CHOOSE_P_2 (n);
+ mp_size_t nn;
+
+ mpn_hgcd_matrix_init (&M, n - p, tp);
+ nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+ if (nn > 0)
+ {
+ mp_ptr t0;
+
+ t0 = tp + matrix_scratch;
+ ASSERT (M.n <= (n - p - 1)/2);
+ ASSERT (M.n + p <= (p + n - 1) / 2);
+
+ /* Temporary storage 2 (p + M->n) <= p + n - 1 */
+ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
+
+ /* By the same analysis as for mpn_hgcd_matrix_mul */
+ ASSERT (M.n + un <= ualloc);
+
+ /* FIXME: This copying could be avoided by some swapping of
+ * pointers. May need more temporary storage, though. */
+ MPN_COPY (t0, u0, un);
+
+ /* Temporary storage ualloc */
+ un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
+
+ ASSERT (un < ualloc);
+ ASSERT ( (u0[un-1] | u1[un-1]) > 0);
+ }
+ else
+ {
+ /* mpn_hgcd has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
+ ctx.u0 = u0;
+ ctx.u1 = u1;
+ ctx.tp = tp + n; /* ualloc */
+ ctx.un = un;
+
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+ if (n == 0)
+ {
+ TMP_FREE;
+ return ctx.gn;
+ }
+
+ un = ctx.un;
+ ASSERT (un < ualloc);
+ }
+ }
+ /* We have A = ... a + ... b
+ B = u0 a + u1 b
+
+ a = u1 A + ... B
+ b = -u0 A + ... B
+
+ with bounds
+
+ |u0|, |u1| <= B / min(a, b)
+
+ We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
+ in which case the only reduction done so far is a = A - k B for
+ some k.
+
+ Compute g = u a + v b = (u u1 - v u0) A + (...) B
+ Here, u, v are bounded by
+
+ |u| <= b,
+ |v| <= a
+ */
+
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+ if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
+ {
+ /* Must return the smallest cofactor, +u1 or -u0 */
+ int c;
+
+ MPN_COPY (gp, ap, n);
+
+ MPN_CMP (c, u0, u1, un);
+ /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
+ this case we choose the cofactor + 1, corresponding to G = A
+ - k B, rather than -1, corresponding to G = - A + (k+1) B. */
+ ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+ if (c < 0)
+ {
+ MPN_NORMALIZE (u0, un);
+ MPN_COPY (up, u0, un);
+ *usizep = -un;
+ }
+ else
+ {
+ MPN_NORMALIZE_NOT_ZERO (u1, un);
+ MPN_COPY (up, u1, un);
+ *usizep = un;
+ }
+
+ TMP_FREE;
+ return n;
+ }
+ else if (UNLIKELY (u0[0] == 0) && un == 1)
+ {
+ mp_size_t gn;
+ ASSERT (u1[0] == 1);
+
+ /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
+ gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
+
+ TMP_FREE;
+ return gn;
+ }
+ else
+ {
+ mp_size_t u0n;
+ mp_size_t u1n;
+ mp_size_t lehmer_un;
+ mp_size_t lehmer_vn;
+ mp_size_t gn;
+
+ mp_ptr lehmer_up;
+ mp_ptr lehmer_vp;
+ int negate;
+
+ lehmer_up = tp; tp += n;
+
+ /* Call mpn_gcdext_lehmer_n with copies of a and b. */
+ MPN_COPY (tp, ap, n);
+ MPN_COPY (tp + n, bp, n);
+ gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
+
+ u0n = un;
+ MPN_NORMALIZE (u0, u0n);
+ ASSERT (u0n > 0);
+
+ if (lehmer_un == 0)
+ {
+ /* u == 0 ==> v = g / b == 1 ==> g = - u0 A + (...) B */
+ MPN_COPY (up, u0, u0n);
+ *usizep = -u0n;
+
+ TMP_FREE;
+ return gn;
+ }
+
+ lehmer_vp = tp;
+ /* Compute v = (g - u a) / b */
+ lehmer_vn = compute_v (lehmer_vp,
+ ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
+
+ if (lehmer_un > 0)
+ negate = 0;
+ else
+ {
+ lehmer_un = -lehmer_un;
+ negate = 1;
+ }
+
+ u1n = un;
+ MPN_NORMALIZE (u1, u1n);
+ ASSERT (u1n > 0);
+
+ ASSERT (lehmer_un + u1n <= ualloc);
+ ASSERT (lehmer_vn + u0n <= ualloc);
+
+ /* We may still have v == 0 */
+
+ /* Compute u u0 */
+ if (lehmer_un <= u1n)
+ /* Should be the common case */
+ mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
+ else
+ mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
+
+ un = u1n + lehmer_un;
+ un -= (up[un - 1] == 0);
+
+ if (lehmer_vn > 0)
+ {
+ mp_limb_t cy;
+
+ /* Overwrites old u1 value */
+ if (lehmer_vn <= u0n)
+ /* Should be the common case */
+ mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
+ else
+ mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
+
+ u1n = u0n + lehmer_vn;
+ u1n -= (u1[u1n - 1] == 0);
+
+ if (u1n <= un)
+ {
+ cy = mpn_add (up, up, un, u1, u1n);
+ }
+ else
+ {
+ cy = mpn_add (up, u1, u1n, up, un);
+ un = u1n;
+ }
+ up[un] = cy;
+ un += (cy != 0);
+
+ ASSERT (un < ualloc);
+ }
+ *usizep = negate ? -un : un;
+
+ TMP_FREE;
+ return gn;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcdext_1.c b/vendor/gmp-6.3.0/mpn/generic/gcdext_1.c
new file mode 100644
index 0000000..b221a92
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcdext_1.c
@@ -0,0 +1,275 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCDEXT_1_USE_BINARY
+#define GCDEXT_1_USE_BINARY 0
+#endif
+
+#ifndef GCDEXT_1_BINARY_METHOD
+#define GCDEXT_1_BINARY_METHOD 2
+#endif
+
+#if GCDEXT_1_USE_BINARY
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
+ mp_limb_t u, mp_limb_t v)
+{
+ /* Maintain
+
+ U = t1 u + t0 v
+ V = s1 u + s0 v
+
+ where U, V are the inputs (without any shared power of two),
+ and the matrix has determinant ± 2^{shift}.
+ */
+ mp_limb_t s0 = 1;
+ mp_limb_t t0 = 0;
+ mp_limb_t s1 = 0;
+ mp_limb_t t1 = 1;
+ mp_limb_t ug;
+ mp_limb_t vg;
+ mp_limb_t ugh;
+ mp_limb_t vgh;
+ unsigned zero_bits;
+ unsigned shift;
+ unsigned i;
+#if GCDEXT_1_BINARY_METHOD == 2
+ mp_limb_t det_sign;
+#endif
+
+ ASSERT (u > 0);
+ ASSERT (v > 0);
+
+ count_trailing_zeros (zero_bits, u | v);
+ u >>= zero_bits;
+ v >>= zero_bits;
+
+ if ((u & 1) == 0)
+ {
+ count_trailing_zeros (shift, u);
+ u >>= shift;
+ t1 <<= shift;
+ }
+ else if ((v & 1) == 0)
+ {
+ count_trailing_zeros (shift, v);
+ v >>= shift;
+ s0 <<= shift;
+ }
+ else
+ shift = 0;
+
+#if GCDEXT_1_BINARY_METHOD == 1
+ while (u != v)
+ {
+ unsigned count;
+ if (u > v)
+ {
+ u -= v;
+
+ count_trailing_zeros (count, u);
+ u >>= count;
+
+ t0 += t1; t1 <<= count;
+ s0 += s1; s1 <<= count;
+ }
+ else
+ {
+ v -= u;
+
+ count_trailing_zeros (count, v);
+ v >>= count;
+
+ t1 += t0; t0 <<= count;
+ s1 += s0; s0 <<= count;
+ }
+ shift += count;
+ }
+#else
+# if GCDEXT_1_BINARY_METHOD == 2
+ u >>= 1;
+ v >>= 1;
+
+ det_sign = 0;
+
+ while (u != v)
+ {
+ unsigned count;
+ mp_limb_t d = u - v;
+ mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
+ mp_limb_t sx;
+ mp_limb_t tx;
+
+ /* When v <= u (vgtu == 0), the updates are:
+
+ (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor)
+ (t1, t0) <-- (t1 << count, t0 + t1)
+
+ and when v > 0, the updates are
+
+ (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count))
+ (t1, t0) <-- (t0 << count, t0 + t1)
+
+ and similarly for s1, s0
+ */
+
+ /* v <-- min (u, v) */
+ v += (vgtu & d);
+
+ /* u <-- |u - v| */
+ u = (d ^ vgtu) - vgtu;
+
+ /* Number of trailing zeros is the same no matter if we look at
+ * d or u, but using d gives more parallelism. */
+ count_trailing_zeros (count, d);
+
+ det_sign ^= vgtu;
+
+ tx = vgtu & (t0 - t1);
+ sx = vgtu & (s0 - s1);
+ t0 += t1;
+ s0 += s1;
+ t1 += tx;
+ s1 += sx;
+
+ count++;
+ u >>= count;
+ t1 <<= count;
+ s1 <<= count;
+ shift += count;
+ }
+ u = (u << 1) + 1;
+# else /* GCDEXT_1_BINARY_METHOD == 2 */
+# error Unknown GCDEXT_1_BINARY_METHOD
+# endif
+#endif
+
+ /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
+ ug = t0 + t1;
+ vg = s0 + s1;
+
+ ugh = ug/2 + (ug & 1);
+ vgh = vg/2 + (vg & 1);
+
+ /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
+ s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
+ for (i = 0; i < shift; i++)
+ {
+ mp_limb_t mask = - ( (s0 | t0) & 1);
+
+ s0 /= 2;
+ t0 /= 2;
+ s0 += mask & vgh;
+ t0 += mask & ugh;
+ }
+
+ ASSERT_ALWAYS (s0 <= vg);
+ ASSERT_ALWAYS (t0 <= ug);
+
+ if (s0 > vg - s0)
+ {
+ s0 -= vg;
+ t0 -= ug;
+ }
+#if GCDEXT_1_BINARY_METHOD == 2
+ /* Conditional negation. */
+ s0 = (s0 ^ det_sign) - det_sign;
+ t0 = (t0 ^ det_sign) - det_sign;
+#endif
+ *sp = s0;
+ *tp = -t0;
+
+ return u << zero_bits;
+}
+
+#else /* !GCDEXT_1_USE_BINARY */
+
+
+/* FIXME: Takes two single-word limbs. It could be extended to a
+ * function that accepts a bignum for the first input, and only
+ * returns the first co-factor. */
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
+ mp_limb_t a, mp_limb_t b)
+{
+ /* Maintain
+
+ a = u0 A + v0 B
+ b = u1 A + v1 B
+
+ where A, B are the original inputs.
+ */
+ mp_limb_signed_t u0 = 1;
+ mp_limb_signed_t v0 = 0;
+ mp_limb_signed_t u1 = 0;
+ mp_limb_signed_t v1 = 1;
+
+ ASSERT (a > 0);
+ ASSERT (b > 0);
+
+ if (a < b)
+ goto divide_by_b;
+
+ for (;;)
+ {
+ mp_limb_t q;
+
+ q = a / b;
+ a -= q * b;
+
+ if (a == 0)
+ {
+ *up = u1;
+ *vp = v1;
+ return b;
+ }
+ u0 -= q * u1;
+ v0 -= q * v1;
+
+ divide_by_b:
+ q = b / a;
+ b -= q * a;
+
+ if (b == 0)
+ {
+ *up = u0;
+ *vp = v0;
+ return a;
+ }
+ u1 -= q * u0;
+ v1 -= q * v0;
+ }
+}
+#endif /* !GCDEXT_1_USE_BINARY */
diff --git a/vendor/gmp-6.3.0/mpn/generic/gcdext_lehmer.c b/vendor/gmp-6.3.0/mpn/generic/gcdext_lehmer.c
new file mode 100644
index 0000000..ea4e86d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gcdext_lehmer.c
@@ -0,0 +1,336 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Here, d is the index of the cofactor to update. FIXME: Could use qn
+ = 0 for the common case q = 1. */
+void
+mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
+ mp_size_t un = ctx->un;
+
+ if (gp)
+ {
+ mp_srcptr up;
+
+ ASSERT (gn > 0);
+ ASSERT (gp[gn-1] > 0);
+
+ MPN_COPY (ctx->gp, gp, gn);
+ ctx->gn = gn;
+
+ if (d < 0)
+ {
+ int c;
+
+ /* Must return the smallest cofactor, +u1 or -u0 */
+ MPN_CMP (c, ctx->u0, ctx->u1, un);
+ ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
+
+ d = c < 0;
+ }
+
+ up = d ? ctx->u0 : ctx->u1;
+
+ MPN_NORMALIZE (up, un);
+ MPN_COPY (ctx->up, up, un);
+
+ *ctx->usize = d ? -un : un;
+ }
+ else
+ {
+ mp_limb_t cy;
+ mp_ptr u0 = ctx->u0;
+ mp_ptr u1 = ctx->u1;
+
+ ASSERT (d >= 0);
+
+ if (d)
+ MP_PTR_SWAP (u0, u1);
+
+ qn -= (qp[qn-1] == 0);
+
+ /* Update u0 += q * u1 */
+ if (qn == 1)
+ {
+ mp_limb_t q = qp[0];
+
+ if (q == 1)
+ /* A common case. */
+ cy = mpn_add_n (u0, u0, u1, un);
+ else
+ cy = mpn_addmul_1 (u0, u1, un, q);
+ }
+ else
+ {
+ mp_size_t u1n;
+ mp_ptr tp;
+
+ u1n = un;
+ MPN_NORMALIZE (u1, u1n);
+
+ if (u1n == 0)
+ return;
+
+ /* Should always have u1n == un here, and u1 >= u0. The
+ reason is that we alternate adding u0 to u1 and u1 to u0
+ (corresponding to subtractions a - b and b - a), and we
+ can get a large quotient only just after a switch, which
+ means that we'll add (a multiple of) the larger u to the
+ smaller. */
+
+ tp = ctx->tp;
+
+ if (qn > u1n)
+ mpn_mul (tp, qp, qn, u1, u1n);
+ else
+ mpn_mul (tp, u1, u1n, qp, qn);
+
+ u1n += qn;
+ u1n -= tp[u1n-1] == 0;
+
+ if (u1n >= un)
+ {
+ cy = mpn_add (u0, tp, u1n, u0, un);
+ un = u1n;
+ }
+ else
+ /* Note: Unlikely case, maybe never happens? */
+ cy = mpn_add (u0, u0, un, tp, u1n);
+
+ }
+ u0[un] = cy;
+ ctx->un = un + (cy > 0);
+ }
+}
+
+/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
+ the matrix-vector multiplication adjusting a, b. If hgcd fails, we
+ need at most n for the quotient and n+1 for the u update (reusing
+ the extra u). In all, 4n + 3. */
+
+mp_size_t
+mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
+ mp_ptr ap, mp_ptr bp, mp_size_t n,
+ mp_ptr tp)
+{
+ mp_size_t ualloc = n + 1;
+
+ /* Keeps track of the second row of the reduction matrix
+ *
+ * M = (v0, v1 ; u0, u1)
+ *
+ * which correspond to the first column of the inverse
+ *
+ * M^{-1} = (u1, -v1; -u0, v0)
+ *
+ * This implies that
+ *
+ * a = u1 A (mod B)
+ * b = -u0 A (mod B)
+ *
+ * where A, B denotes the input values.
+ */
+
+ struct gcdext_ctx ctx;
+ mp_size_t un;
+ mp_ptr u0;
+ mp_ptr u1;
+ mp_ptr u2;
+
+ MPN_ZERO (tp, 3*ualloc);
+ u0 = tp; tp += ualloc;
+ u1 = tp; tp += ualloc;
+ u2 = tp; tp += ualloc;
+
+ u1[0] = 1; un = 1;
+
+ ctx.gp = gp;
+ ctx.up = up;
+ ctx.usize = usize;
+
+ /* FIXME: Handle n == 2 differently, after the loop? */
+ while (n >= 2)
+ {
+ struct hgcd_matrix1 M;
+ mp_limb_t ah, al, bh, bl;
+ mp_limb_t mask;
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else if (n == 2)
+ {
+ /* We use the full inputs without truncation, so we can
+ safely shift left. */
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
+ al = ap[0] << shift;
+ bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
+ bl = bp[0] << shift;
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_nhgcd2 step */
+ if (mpn_hgcd2 (ah, al, bh, bl, &M))
+ {
+ n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+ MP_PTR_SWAP (ap, tp);
+ un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
+ MP_PTR_SWAP (u0, u2);
+ }
+ else
+ {
+ /* mpn_hgcd2 has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
+ ctx.u0 = u0;
+ ctx.u1 = u1;
+ ctx.tp = u2;
+ ctx.un = un;
+
+ /* Temporary storage n for the quotient and ualloc for the
+ new cofactor. */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+ if (n == 0)
+ return ctx.gn;
+
+ un = ctx.un;
+ }
+ }
+ ASSERT_ALWAYS (ap[0] > 0);
+ ASSERT_ALWAYS (bp[0] > 0);
+
+ if (ap[0] == bp[0])
+ {
+ int c;
+
+ /* Which cofactor to return now? Candidates are +u1 and -u0,
+ depending on which of a and b was most recently reduced,
+ which we don't keep track of. So compare and get the smallest
+ one. */
+
+ gp[0] = ap[0];
+
+ MPN_CMP (c, u0, u1, un);
+ ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+ if (c < 0)
+ {
+ MPN_NORMALIZE (u0, un);
+ MPN_COPY (up, u0, un);
+ *usize = -un;
+ }
+ else
+ {
+ MPN_NORMALIZE_NOT_ZERO (u1, un);
+ MPN_COPY (up, u1, un);
+ *usize = un;
+ }
+ return 1;
+ }
+ else
+ {
+ mp_limb_t uh, vh;
+ mp_limb_signed_t u;
+ mp_limb_signed_t v;
+ int negate;
+
+ gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
+
+ /* Set up = u u1 - v u0. Keep track of size, un grows by one or
+ two limbs. */
+
+ if (u == 0)
+ {
+ ASSERT (v == 1);
+ MPN_NORMALIZE (u0, un);
+ MPN_COPY (up, u0, un);
+ *usize = -un;
+ return 1;
+ }
+ else if (v == 0)
+ {
+ ASSERT (u == 1);
+ MPN_NORMALIZE (u1, un);
+ MPN_COPY (up, u1, un);
+ *usize = un;
+ return 1;
+ }
+ else if (u > 0)
+ {
+ negate = 0;
+ ASSERT (v < 0);
+ v = -v;
+ }
+ else
+ {
+ negate = 1;
+ ASSERT (v > 0);
+ u = -u;
+ }
+
+ uh = mpn_mul_1 (up, u1, un, u);
+ vh = mpn_addmul_1 (up, u0, un, v);
+
+ if ( (uh | vh) > 0)
+ {
+ uh += vh;
+ up[un++] = uh;
+ if (uh < vh)
+ up[un++] = 1;
+ }
+
+ MPN_NORMALIZE_NOT_ZERO (up, un);
+
+ *usize = negate ? -un : un;
+ return 1;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/get_d.c b/vendor/gmp-6.3.0/mpn/generic/get_d.c
new file mode 100644
index 0000000..8bef128
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/get_d.c
@@ -0,0 +1,438 @@
+/* mpn_get_d -- limbs to double conversion.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2003, 2004, 2007, 2009, 2010, 2012, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h> /* for DBL_MANT_DIG and FLT_RADIX */
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+/* To force use of the generic C code for testing, put
+ "#define _GMP_IEEE_FLOATS 0" at this point. */
+
+
+/* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
+ rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
+ wrong if that addition overflows.
+
+ The workaround here avoids this bug by ensuring n is not a literal constant.
+ Note that this is alpha specific. The offending transformation is/was in
+ alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
+
+ Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
+ and has the same solution. Don't know why or how. */
+
+#if HAVE_HOST_CPU_FAMILY_alpha \
+ && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4)) \
+ || defined (_CRAY))
+static volatile const long CONST_1024 = 1024;
+static volatile const long CONST_NEG_1023 = -1023;
+static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
+#else
+#define CONST_1024 (1024)
+#define CONST_NEG_1023 (-1023)
+#define CONST_NEG_1022_SUB_53 (-1022 - 53)
+#endif
+
+
+/* Return the value {ptr,size}*2^exp, and negative if sign<0. Must have
+ size>=1, and a non-zero high limb ptr[size-1].
+
+ When we know the fp format, the result is truncated towards zero. This is
+ consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
+ easy to implement and test.
+
+ When we do not know the format, such truncation seems much harder. One
+ would need to defeat any rounding mode, including round-up.
+
+ It's felt that GMP is not primarily concerned with hardware floats, and
+ really isn't enhanced by getting involved with hardware rounding modes
+ (which could even be some weird unknown style), so something unambiguous and
+ straightforward is best.
+
+
+ The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
+ limb and is done with shifts and masks. The 64-bit case in particular
+ should come out nice and compact.
+
+ The generic code used to work one bit at a time, which was not only slow,
+ but implicitly relied upon denorms for intermediates, since the lowest bits'
+ weight of a perfectly valid fp number underflows in non-denorm. Therefore,
+ the generic code now works limb-per-limb, initially creating a number x such
+ that 1 <= x <= BASE. (BASE is reached only as result of rounding.) Then
+ x's exponent is scaled with explicit code (not ldexp to avoid libm
+ dependency). It is a tap-dance to avoid underflow or overflow, beware!
+
+
+ Traps:
+
+ Hardware traps for overflow to infinity, underflow to zero, or unsupported
+ denorms may or may not be taken. The IEEE code works bitwise and so
+ probably won't trigger them, the generic code works by float operations and
+ so probably will. This difference might be thought less than ideal, but
+ again its felt straightforward code is better than trying to get intimate
+ with hardware exceptions (of perhaps unknown nature).
+
+
+ Not done:
+
+ mpz_get_d in the past handled size==1 with a cast limb->double. This might
+ still be worthwhile there (for up to the mantissa many bits), but for
+ mpn_get_d here, the cost of applying "exp" to the resulting exponent would
+ probably use up any benefit a cast may have over bit twiddling. Also, if
+ the exponent is pushed into denorm range then bit twiddling is the only
+ option, to ensure the desired truncation is obtained.
+
+
+ Other:
+
+ For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL
+ to the kernel for values >= 2^63. This makes it slow, and worse the kernel
+ Linux (what versions?) apparently uses untested code in its trap handling
+ routines, and gets the sign wrong. We don't use such a limb-to-double
+ cast, neither in the IEEE or generic code. */
+
+
+
+#undef FORMAT_RECOGNIZED
+
+double
+mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
+{
+ int lshift, nbits;
+ mp_limb_t x, mhi, mlo;
+
+ ASSERT (size >= 0);
+ ASSERT_MPN (up, size);
+ ASSERT (size == 0 || up[size-1] != 0);
+
+ if (size == 0)
+ return 0.0;
+
+ /* Adjust exp to a radix point just above {up,size}, guarding against
+ overflow. After this exp can of course be reduced to anywhere within
+ the {up,size} region without underflow. */
+ if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
+ > ((unsigned long) LONG_MAX - exp)))
+ {
+#if _GMP_IEEE_FLOATS
+ goto ieee_infinity;
+#endif
+
+ /* generic */
+ exp = LONG_MAX;
+ }
+ else
+ {
+ exp += GMP_NUMB_BITS * size;
+ }
+
+#if _GMP_IEEE_FLOATS
+ {
+ union ieee_double_extract u;
+
+ up += size;
+
+#if GMP_LIMB_BITS == 64
+ mlo = up[-1];
+ count_leading_zeros (lshift, mlo);
+
+ exp -= (lshift - GMP_NAIL_BITS) + 1;
+ mlo <<= lshift;
+
+ nbits = GMP_LIMB_BITS - lshift;
+
+ if (nbits < 53 && size > 1)
+ {
+ x = up[-2];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+
+ if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+ {
+ x = up[-3];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+ }
+ }
+ mhi = mlo >> (32 + 11);
+ mlo = mlo >> 11; /* later implicitly truncated to 32 bits */
+#endif
+#if GMP_LIMB_BITS == 32
+ x = *--up;
+ count_leading_zeros (lshift, x);
+
+ exp -= (lshift - GMP_NAIL_BITS) + 1;
+ x <<= lshift;
+ mhi = x >> 11;
+
+ if (lshift < 11) /* FIXME: never true if NUMB < 20 bits */
+ {
+ /* All 20 bits in mhi */
+ mlo = x << 21;
+ /* >= 1 bit in mlo */
+ nbits = GMP_LIMB_BITS - lshift - 21;
+ }
+ else
+ {
+ if (size > 1)
+ {
+ nbits = GMP_LIMB_BITS - lshift;
+
+ x = *--up, size--;
+ x <<= GMP_NAIL_BITS;
+ mhi |= x >> nbits >> 11;
+
+ mlo = x << (GMP_LIMB_BITS - nbits - 11);
+ nbits = nbits + 11 - GMP_NAIL_BITS;
+ }
+ else
+ {
+ mlo = 0;
+ goto done;
+ }
+ }
+
+ /* Now all needed bits in mhi have been accumulated. Add bits to mlo. */
+
+ if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
+ {
+ x = up[-1];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+
+ if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
+ {
+ x = up[-2];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+
+ if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
+ {
+ x = up[-3];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+ }
+ }
+ }
+
+ done:;
+
+#endif
+ if (UNLIKELY (exp >= CONST_1024))
+ {
+ /* overflow, return infinity */
+ ieee_infinity:
+ mhi = 0;
+ mlo = 0;
+ exp = 1024;
+ }
+ else if (UNLIKELY (exp <= CONST_NEG_1023))
+ {
+ int rshift;
+
+ if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+ return 0.0; /* denorm underflows to zero */
+
+ rshift = -1022 - exp;
+ ASSERT (rshift > 0 && rshift < 53);
+#if GMP_LIMB_BITS > 53
+ mlo >>= rshift;
+ mhi = mlo >> 32;
+#else
+ if (rshift >= 32)
+ {
+ mlo = mhi;
+ mhi = 0;
+ rshift -= 32;
+ }
+ lshift = GMP_LIMB_BITS - rshift;
+ mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+ mhi >>= rshift;
+#endif
+ exp = -1023;
+ }
+ u.s.manh = mhi;
+ u.s.manl = mlo;
+ u.s.exp = exp + 1023;
+ u.s.sig = (sign < 0);
+ return u.d;
+ }
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if HAVE_DOUBLE_VAX_D
+ {
+ union double_extract u;
+
+ up += size;
+
+ mhi = up[-1];
+
+ count_leading_zeros (lshift, mhi);
+ exp -= lshift;
+ mhi <<= lshift;
+
+ mlo = 0;
+ if (size > 1)
+ {
+ mlo = up[-2];
+ if (lshift != 0)
+ mhi += mlo >> (GMP_LIMB_BITS - lshift);
+ mlo <<= lshift;
+
+ if (size > 2 && lshift > 8)
+ {
+ x = up[-3];
+ mlo += x >> (GMP_LIMB_BITS - lshift);
+ }
+ }
+
+ if (UNLIKELY (exp >= 128))
+ {
+ /* overflow, return maximum number */
+ mhi = 0xffffffff;
+ mlo = 0xffffffff;
+ exp = 127;
+ }
+ else if (UNLIKELY (exp < -128))
+ {
+ return 0.0; /* underflows to zero */
+ }
+
+ u.s.man3 = mhi >> 24; /* drop msb, since implicit */
+ u.s.man2 = mhi >> 8;
+ u.s.man1 = (mhi << 8) + (mlo >> 24);
+ u.s.man0 = mlo >> 8;
+ u.s.exp = exp + 128;
+ u.s.sig = sign < 0;
+ return u.d;
+ }
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if ! FORMAT_RECOGNIZED
+
+#if !defined(GMP_DBL_MANT_BITS)
+#if defined(DBL_MANT_DIG) && FLT_RADIX == 2
+#define GMP_DBL_MANT_BITS DBL_MANT_DIG
+#else
+/* FIXME: Chose a smarter default value. */
+#define GMP_DBL_MANT_BITS (16 * sizeof (double))
+#endif
+#endif
+
+ { /* Non-IEEE or strange limb size, generically convert
+ GMP_DBL_MANT_BITS bits. */
+ mp_limb_t l;
+ int m;
+ mp_size_t i;
+ double d, weight;
+ unsigned long uexp;
+
+ /* First generate an fp number disregarding exp, instead keeping things
+ within the numb base factor from 1, which should prevent overflow and
+ underflow even for the most exponent limited fp formats. */
+ i = size - 1;
+ l = up[i];
+ count_leading_zeros (m, l);
+ m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS;
+ if (m < 0)
+ l &= GMP_NUMB_MAX << -m;
+ d = l;
+ for (weight = 1/MP_BASE_AS_DOUBLE; m > 0 && --i >= 0;)
+ {
+ l = up[i];
+ m -= GMP_NUMB_BITS;
+ if (m < 0)
+ l &= GMP_NUMB_MAX << -m;
+ d += l * weight;
+ weight /= MP_BASE_AS_DOUBLE;
+ if (weight == 0)
+ break;
+ }
+
+ /* Now apply exp. */
+ exp -= GMP_NUMB_BITS;
+ if (exp > 0)
+ {
+ weight = 2.0;
+ uexp = exp;
+ }
+ else
+ {
+ weight = 0.5;
+ uexp = NEG_CAST (unsigned long, exp);
+ }
+#if 1
+ /* Square-and-multiply exponentiation. */
+ if (uexp & 1)
+ d *= weight;
+ while (uexp >>= 1)
+ {
+ weight *= weight;
+ if (uexp & 1)
+ d *= weight;
+ }
+#else
+ /* Plain exponentiation. */
+ while (uexp > 0)
+ {
+ d *= weight;
+ uexp--;
+ }
+#endif
+
+ return sign >= 0 ? d : -d;
+ }
+#endif
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/get_str.c b/vendor/gmp-6.3.0/mpn/generic/get_str.c
new file mode 100644
index 0000000..19cc581
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/get_str.c
@@ -0,0 +1,451 @@
+/* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+ FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Conversion of U {up,un} to a string in base b. Internally, we convert to
+ base B = b^m, the largest power of b that fits a limb. Basic algorithms:
+
+ A) Divide U repeatedly by B, generating a quotient and remainder, until the
+ quotient becomes zero. The remainders hold the converted digits. Digits
+ come out from right to left. (Used in mpn_bc_get_str.)
+
+ B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
+ Then develop digits by multiplying the fraction repeatedly by b. Digits
+ come out from left to right. (Currently not used herein, except for in
+ code for converting single limbs to individual digits.)
+
+ C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
+ sqrt(U). Then divide U by B^s, generating quotient and remainder.
+ Recursively convert the quotient, then the remainder, using the
+ precomputed powers. Digits come out from left to right. (Used in
+ mpn_dc_get_str.)
+
+ When using algorithm C, algorithm B might be suitable for basecase code,
+ since the required b^g power will be readily accessible.
+
+ Optimization ideas:
+ 1. The recursive function of (C) could use less temporary memory. The powtab
+ allocation could be trimmed with some computation, and the tmp area could
+ be reduced, or perhaps eliminated if up is reused for both quotient and
+ remainder (it is currently used just for remainder).
+ 2. Store the powers of (C) in normalized form, with the normalization count.
+ Quotients will usually need to be left-shifted before each divide, and
+ remainders will either need to be left-shifted of right-shifted.
+ 3. In the code for developing digits from a single limb, we could avoid using
+ a full umul_ppmm except for the first (or first few) digits, provided base
+ is even. Subsequent digits can be developed using plain multiplication.
+ (This saves on register-starved machines (read x86) and on all machines
+ that generate the upper product half using a separate instruction (alpha,
+ powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
+ 4. Separate mpn_dc_get_str basecase code from code for small conversions. The
+ former code will have the exact right power readily available in the
+ powtab parameter for dividing the current number into a fraction. Convert
+ that using algorithm B.
+ 5. Completely avoid division. Compute the inverses of the powers now in
+ powtab instead of the actual powers.
+ 6. Decrease powtab allocation for even bases. E.g. for base 10 we could save
+ about 30% (1-log(5)/log(10)).
+
+ Basic structure of (C):
+ mpn_get_str:
+ if POW2_P (n)
+ ...
+ else
+ if (un < GET_STR_PRECOMPUTE_THRESHOLD)
+ mpn_bx_get_str (str, base, up, un);
+ else
+ precompute_power_tables
+ mpn_dc_get_str
+
+ mpn_dc_get_str:
+ mpn_tdiv_qr
+ if (qn < GET_STR_DC_THRESHOLD)
+ mpn_bc_get_str
+ else
+ mpn_dc_get_str
+ if (rn < GET_STR_DC_THRESHOLD)
+ mpn_bc_get_str
+ else
+ mpn_dc_get_str
+
+
+ The reason for the two threshold values is the cost of
+ precompute_power_tables. GET_STR_PRECOMPUTE_THRESHOLD will be
+ considerably larger than GET_STR_DC_THRESHOLD. */
+
+
+/* The x86s and m68020 have a quotient and remainder "div" instruction and
+ gcc recognises an adjacent "/" and "%" can be combined using that.
+ Elsewhere "/" and "%" are either separate instructions, or separate
+ libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
+ A multiply and subtract should be faster than a "%" in those cases. */
+#if HAVE_HOST_CPU_FAMILY_x86 \
+ || HAVE_HOST_CPU_m68020 \
+ || HAVE_HOST_CPU_m68030 \
+ || HAVE_HOST_CPU_m68040 \
+ || HAVE_HOST_CPU_m68060 \
+ || HAVE_HOST_CPU_m68360 /* CPU32 */
+#define udiv_qrnd_unnorm(q,r,n,d) \
+ do { \
+ mp_limb_t __q = (n) / (d); \
+ mp_limb_t __r = (n) % (d); \
+ (q) = __q; \
+ (r) = __r; \
+ } while (0)
+#else
+#define udiv_qrnd_unnorm(q,r,n,d) \
+ do { \
+ mp_limb_t __q = (n) / (d); \
+ mp_limb_t __r = (n) - __q*(d); \
+ (q) = __q; \
+ (r) = __r; \
+ } while (0)
+#endif
+
+
+/* Convert {up,un} to a string in base base, and put the result in str.
+ Generate len characters, possibly padding with zeros to the left. If len is
+ zero, generate as many characters as required. Return a pointer immediately
+ after the last digit of the result string. Complexity is O(un^2); intended
+ for small conversions. */
+static unsigned char *
+mpn_bc_get_str (unsigned char *str, size_t len,
+ mp_ptr up, mp_size_t un, int base)
+{
+ mp_limb_t rl, ul;
+ unsigned char *s;
+ size_t l;
+ /* Allocate memory for largest possible string, given that we only get here
+ for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
+ base is 3. 7/11 is an approximation to 1/log2(3). */
+#if TUNE_PROGRAM_BUILD
+#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
+#else
+#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
+#endif
+ unsigned char buf[BUF_ALLOC];
+#if TUNE_PROGRAM_BUILD
+ mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
+#else
+ mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
+#endif
+
+ if (base == 10)
+ {
+ /* Special case code for base==10 so that the compiler has a chance to
+ optimize things. */
+
+ MPN_COPY (rp + 1, up, un);
+
+ s = buf + BUF_ALLOC;
+ while (un > 1)
+ {
+ int i;
+ mp_limb_t frac, digit;
+ MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+ MP_BASES_BIG_BASE_10,
+ MP_BASES_BIG_BASE_INVERTED_10,
+ MP_BASES_NORMALIZATION_STEPS_10);
+ un -= rp[un] == 0;
+ frac = (rp[0] + 1) << GMP_NAIL_BITS;
+ s -= MP_BASES_CHARS_PER_LIMB_10;
+#if HAVE_HOST_CPU_FAMILY_x86
+ /* The code below turns out to be a bit slower for x86 using gcc.
+ Use plain code. */
+ i = MP_BASES_CHARS_PER_LIMB_10;
+ do
+ {
+ umul_ppmm (digit, frac, frac, 10);
+ *s++ = digit;
+ }
+ while (--i);
+#else
+ /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
+ After a few umul_ppmm, we will have accumulated enough low zeros
+ to use a plain multiply. */
+ if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
+ {
+ umul_ppmm (digit, frac, frac, 10);
+ *s++ = digit;
+ }
+ if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
+ {
+ umul_ppmm (digit, frac, frac, 10);
+ *s++ = digit;
+ }
+ if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
+ {
+ umul_ppmm (digit, frac, frac, 10);
+ *s++ = digit;
+ }
+ if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
+ {
+ umul_ppmm (digit, frac, frac, 10);
+ *s++ = digit;
+ }
+ i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
+ ? (4-MP_BASES_NORMALIZATION_STEPS_10)
+ : 0));
+ frac = (frac + 0xf) >> 4;
+ do
+ {
+ frac *= 10;
+ digit = frac >> (GMP_LIMB_BITS - 4);
+ *s++ = digit;
+ frac &= (~(mp_limb_t) 0) >> 4;
+ }
+ while (--i);
+#endif
+ s -= MP_BASES_CHARS_PER_LIMB_10;
+ }
+
+ ul = rp[1];
+ while (ul != 0)
+ {
+ udiv_qrnd_unnorm (ul, rl, ul, 10);
+ *--s = rl;
+ }
+ }
+ else /* not base 10 */
+ {
+ unsigned chars_per_limb;
+ mp_limb_t big_base, big_base_inverted;
+ unsigned normalization_steps;
+
+ chars_per_limb = mp_bases[base].chars_per_limb;
+ big_base = mp_bases[base].big_base;
+ big_base_inverted = mp_bases[base].big_base_inverted;
+ count_leading_zeros (normalization_steps, big_base);
+
+ MPN_COPY (rp + 1, up, un);
+
+ s = buf + BUF_ALLOC;
+ while (un > 1)
+ {
+ int i;
+ mp_limb_t frac;
+ MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+ big_base, big_base_inverted,
+ normalization_steps);
+ un -= rp[un] == 0;
+ frac = (rp[0] + 1) << GMP_NAIL_BITS;
+ s -= chars_per_limb;
+ i = chars_per_limb;
+ do
+ {
+ mp_limb_t digit;
+ umul_ppmm (digit, frac, frac, base);
+ *s++ = digit;
+ }
+ while (--i);
+ s -= chars_per_limb;
+ }
+
+ ul = rp[1];
+ while (ul != 0)
+ {
+ udiv_qrnd_unnorm (ul, rl, ul, base);
+ *--s = rl;
+ }
+ }
+
+ l = buf + BUF_ALLOC - s;
+ while (l < len)
+ {
+ *str++ = 0;
+ len--;
+ }
+ while (l != 0)
+ {
+ *str++ = *s++;
+ l--;
+ }
+ return str;
+}
+
+
+/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
+ the string in STR. Generate LEN characters, possibly padding with zeros to
+ the left. If LEN is zero, generate as many characters as required.
+ Return a pointer immediately after the last digit of the result string.
+ This uses divide-and-conquer and is intended for large conversions. */
+static unsigned char *
+mpn_dc_get_str (unsigned char *str, size_t len,
+ mp_ptr up, mp_size_t un,
+ const powers_t *powtab, mp_ptr tmp)
+{
+ if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
+ {
+ if (un != 0)
+ str = mpn_bc_get_str (str, len, up, un, powtab->base);
+ else
+ {
+ while (len != 0)
+ {
+ *str++ = 0;
+ len--;
+ }
+ }
+ }
+ else
+ {
+ mp_ptr pwp, qp, rp;
+ mp_size_t pwn, qn;
+ mp_size_t sn;
+
+ pwp = powtab->p;
+ pwn = powtab->n;
+ sn = powtab->shift;
+
+ if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
+ {
+ str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
+ }
+ else
+ {
+ qp = tmp; /* (un - pwn + 1) limbs for qp */
+ rp = up; /* pwn limbs for rp; overwrite up area */
+
+ mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
+ qn = un - sn - pwn; qn += qp[qn] != 0; /* quotient size */
+
+ ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
+
+ if (len != 0)
+ len = len - powtab->digits_in_base;
+
+ str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
+ str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
+ }
+ }
+ return str;
+}
+
+/* There are no leading zeros on the digits generated at str, but that's not
+ currently a documented feature. The current mpz_out_str and mpz_get_str
+ rely on it. */
+
+size_t
+mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
+{
+ mp_ptr powtab_mem;
+ powers_t powtab[GMP_LIMB_BITS];
+ int pi;
+ size_t out_len;
+ mp_ptr tmp;
+ TMP_DECL;
+
+ /* Special case zero, as the code below doesn't handle it. */
+ if (un == 0)
+ {
+ str[0] = 0;
+ return 1;
+ }
+
+ if (POW2_P (base))
+ {
+ /* The base is a power of 2. Convert from most significant end. */
+ mp_limb_t n1, n0;
+ int bits_per_digit = mp_bases[base].big_base;
+ int cnt;
+ int bit_pos;
+ mp_size_t i;
+ unsigned char *s = str;
+ mp_bitcnt_t bits;
+
+ n1 = up[un - 1];
+ count_leading_zeros (cnt, n1);
+
+ /* BIT_POS should be R when input ends in least significant nibble,
+ R + bits_per_digit * n when input ends in nth least significant
+ nibble. */
+
+ bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
+ cnt = bits % bits_per_digit;
+ if (cnt != 0)
+ bits += bits_per_digit - cnt;
+ bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
+
+ /* Fast loop for bit output. */
+ i = un - 1;
+ for (;;)
+ {
+ bit_pos -= bits_per_digit;
+ while (bit_pos >= 0)
+ {
+ *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+ bit_pos -= bits_per_digit;
+ }
+ i--;
+ if (i < 0)
+ break;
+ n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+ n1 = up[i];
+ bit_pos += GMP_NUMB_BITS;
+ *s++ = n0 | (n1 >> bit_pos);
+ }
+
+ return s - str;
+ }
+
+ /* General case. The base is not a power of 2. */
+
+ if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
+ return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str;
+
+ TMP_MARK;
+
+ /* Allocate one large block for the powers of big_base. */
+ powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+
+ /* Compute a table of powers, were the largest power is >= sqrt(U). */
+ size_t ndig;
+ mp_size_t xn;
+ DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
+ xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
+
+ pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base);
+
+ /* Using our precomputed powers, now in powtab[], convert our number. */
+ tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
+ out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
+ TMP_FREE;
+
+ return out_len;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/generic/gmp-mparam.h
new file mode 100644
index 0000000..7dc057a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/gmp-mparam.h
@@ -0,0 +1,33 @@
+/* Generic C gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* Values for GMP_LIMB_BITS etc will be determined by ./configure and put
+ in config.h. */
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd.c b/vendor/gmp-6.3.0/mpn/generic/hgcd.c
new file mode 100644
index 0000000..e3e9c66
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd.c
@@ -0,0 +1,182 @@
+/* hgcd.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Size analysis for hgcd:
+
+ For the recursive calls, we have n1 <= ceil(n / 2). Then the
+ storage need is determined by the storage for the recursive call
+ computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
+ (after this, the storage needed for M1 can be recycled).
+
+ Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
+ = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
+ and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
+ 4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
+
+ For the recursive call, we need S(n1) = S(ceil(n/2)).
+
+ S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
+ <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
+ <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
+ <= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
+*/
+
+mp_size_t
+mpn_hgcd_itch (mp_size_t n)
+{
+ unsigned k;
+ int count;
+ mp_size_t nscaled;
+
+ if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+ return n;
+
+ /* Get the recursion depth. */
+ nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
+ count_leading_zeros (count, nscaled);
+ k = GMP_LIMB_BITS - count;
+
+ return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+ with elements of size at most (n+1)/2 - 1. Returns new size of a,
+ b, or zero if no reduction is possible. */
+
+mp_size_t
+mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
+ struct hgcd_matrix *M, mp_ptr tp)
+{
+ mp_size_t s = n/2 + 1;
+
+ mp_size_t nn;
+ int success = 0;
+
+ if (n <= s)
+ /* Happens when n <= 2, a fairly uninteresting case but exercised
+ by the random inputs of the testsuite. */
+ return 0;
+
+ ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+ ASSERT ((n+1)/2 - 1 < M->alloc);
+
+ if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+ {
+ mp_size_t n2 = (3*n)/4 + 1;
+ mp_size_t p = n/2;
+
+ nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+ if (nn)
+ {
+ n = nn;
+ success = 1;
+ }
+
+ /* NOTE: It appears this loop never runs more than once (at
+ least when not recursing to hgcd_appr). */
+ while (n > n2)
+ {
+ /* Needs n + 1 storage */
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ return success ? n : 0;
+
+ n = nn;
+ success = 1;
+ }
+
+ if (n > s + 2)
+ {
+ struct hgcd_matrix M1;
+ mp_size_t scratch;
+
+ p = 2*s - n + 1;
+ scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+ mpn_hgcd_matrix_init(&M1, n - p, tp);
+
+ /* FIXME: Should use hgcd_reduce, but that may require more
+ scratch space, which requires review. */
+
+ nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+ if (nn > 0)
+ {
+ /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+ ASSERT (M->n + 2 >= M1.n);
+
+ /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+ then either q or q + 1 is a correct quotient, and M1 will
+ start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+ rules out the case that the size of M * M1 is much
+ smaller than the expected M->n + M1->n. */
+
+ ASSERT (M->n + M1.n < M->alloc);
+
+ /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+ = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+ n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+ /* We need a bound for of M->n + M1.n. Let n be the original
+ input size. Then
+
+ ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+ and it follows that
+
+ M.n + M1.n <= ceil(n/2) + 1
+
+ Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+ amount of needed scratch space. */
+ mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+ success = 1;
+ }
+ }
+ }
+
+ for (;;)
+ {
+ /* Needs s+3 < n */
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ return success ? n : 0;
+
+ n = nn;
+ success = 1;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd2-div.h b/vendor/gmp-6.3.0/mpn/generic/hgcd2-div.h
new file mode 100644
index 0000000..45ba453
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd2-div.h
@@ -0,0 +1,504 @@
+/* hgcd2-div.h
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 3
+#endif
+
+#ifndef HGCD2_DIV2_METHOD
+#define HGCD2_DIV2_METHOD 2
+#endif
+
+#if HAVE_NATIVE_mpn_div_11
+
+#define div1 mpn_div_11
+/* Single-limb division optimized for small quotients.
+ Returned value holds d0 = r, d1 = q. */
+mp_double_limb_t div1 (mp_limb_t, mp_limb_t);
+
+#elif HGCD2_DIV1_METHOD == 1
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ mp_double_limb_t res;
+ res.d1 = n0 / d0;
+ res.d0 = n0 - res.d1 * d0;
+
+ return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 2
+
+static mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ mp_double_limb_t res;
+ int ncnt, dcnt, cnt;
+ mp_limb_t q;
+ mp_limb_t mask;
+
+ ASSERT (n0 >= d0);
+
+ count_leading_zeros (ncnt, n0);
+ count_leading_zeros (dcnt, d0);
+ cnt = dcnt - ncnt;
+
+ d0 <<= cnt;
+
+ q = -(mp_limb_t) (n0 >= d0);
+ n0 -= d0 & q;
+ d0 >>= 1;
+ q = -q;
+
+ while (--cnt >= 0)
+ {
+ mask = -(mp_limb_t) (n0 >= d0);
+ n0 -= d0 & mask;
+ d0 >>= 1;
+ q = (q << 1) - mask;
+ }
+
+ res.d0 = n0;
+ res.d1 = q;
+ return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 3
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ mp_double_limb_t res;
+ if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
+ || UNLIKELY (n0 >= (d0 << 3)))
+ {
+ res.d1 = n0 / d0;
+ res.d0 = n0 - res.d1 * d0;
+ }
+ else
+ {
+ mp_limb_t q, mask;
+
+ d0 <<= 2;
+
+ mask = -(mp_limb_t) (n0 >= d0);
+ n0 -= d0 & mask;
+ q = 4 & mask;
+
+ d0 >>= 1;
+ mask = -(mp_limb_t) (n0 >= d0);
+ n0 -= d0 & mask;
+ q += 2 & mask;
+
+ d0 >>= 1;
+ mask = -(mp_limb_t) (n0 >= d0);
+ n0 -= d0 & mask;
+ q -= mask;
+
+ res.d0 = n0;
+ res.d1 = q;
+ }
+ return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 4
+
+/* Table quotients. We extract the NBITS most significant bits of the
+ numerator limb, and the corresponding bits from the divisor limb, and use
+ these to form an index into the table. This method is probably only useful
+ for short pipelines with slow multiplication.
+
+ Possible improvements:
+
+ * Perhaps extract the highest NBITS of the divisor instead of the same bits
+ as from the numerator. That would require another count_leading_zeros,
+ and a post-multiply shift of the quotient.
+
+ * Compress tables? Their values are tiny, and there are lots of zero
+ entries (which are never used).
+
+ * Round the table entries more cleverly?
+*/
+
+#ifndef NBITS
+#define NBITS 5
+#endif
+
+#if NBITS == 5
+/* This needs full division about 13.2% of the time. */
+static const unsigned char tab[512] = {
+17, 9, 5,4,3,2,2,2,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18, 9, 6,4,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19,10, 6,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+20,10, 6,5,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+21,11, 7,5,4,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
+22,11, 7,5,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+23,12, 7,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
+24,12, 8,6,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+25,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
+26,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+27,14, 9,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+28,14, 9,7,5,4,3,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+29,15,10,7,5,4,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
+30,15,10,7,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+31,16,10,7,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+32,16,11,8,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+#elif NBITS == 6
+/* This needs full division about 9.8% of the time. */
+static const unsigned char tab[2048] = {
+33,17,11, 8, 6, 5,4,4,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+34,17,11, 8, 6, 5,4,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+35,18,12, 9, 7, 5,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+36,18,12, 9, 7, 6,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+37,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+38,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+39,20,13,10, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+40,20,14,10, 8, 6,5,5,4,3,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+41,21,14,10, 8, 6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+42,21,14,10, 8, 7,6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+43,22,15,11, 8, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+44,22,15,11, 9, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+45,23,15,11, 9, 7,6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+46,23,16,11, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+47,24,16,12, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+48,24,16,12, 9, 8,6,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+49,25,17,12,10, 8,7,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+50,25,17,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+51,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+52,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+53,27,18,13,10, 9,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
+54,27,19,14,11, 9,7,6,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+55,28,19,14,11, 9,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
+56,28,19,14,11, 9,8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+57,29,20,14,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
+58,29,20,15,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+59,30,20,15,12,10,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+60,30,21,15,12,10,8,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+61,31,21,15,12,10,8,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
+62,31,22,16,12,10,9,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+63,32,22,16,13,10,9,7,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+64,32,22,16,13,10,9,8,7,6,5,5,4,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+#else
+#error No table for provided NBITS
+#endif
+
+/* Doing tabp with a #define makes compiler warnings about pointing outside an
+ object go away. We used to define this as a variable. It is not clear if
+ e.g. (vector[100] - 10) + 10 is well- defined as per the C standard;
+ (vector[100] + 10) - 10 surely is and there is no sequence point so the
+ expressions should be equivalent. To make this safe, we might want to
+ define tabp as a macro with the index as an argument. Depending on the
+ platform, relocs might allow for assembly-time or linker-time resolution to
+ take place. */
+#define tabp (tab - (1 << (NBITS - 1) << NBITS))
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ int ncnt;
+ size_t nbi, dbi;
+ mp_limb_t q0;
+ mp_limb_t r0;
+ mp_limb_t mask;
+ mp_double_limb_t res;
+
+ ASSERT (n0 >= d0); /* Actually only msb position is critical. */
+
+ count_leading_zeros (ncnt, n0);
+ nbi = n0 << ncnt >> (GMP_LIMB_BITS - NBITS);
+ dbi = d0 << ncnt >> (GMP_LIMB_BITS - NBITS);
+
+ q0 = tabp[(nbi << NBITS) + dbi];
+ r0 = n0 - q0 * d0;
+ mask = -(mp_limb_t) (r0 >= d0);
+ q0 -= mask;
+ r0 -= d0 & mask;
+
+ if (UNLIKELY (r0 >= d0))
+ {
+ q0 = n0 / d0;
+ r0 = n0 - q0 * d0;
+ }
+
+ res.d1 = q0;
+ res.d0 = r0;
+ return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 5
+
+/* Table inverses of divisors. We don't bother with suppressing the msb from
+ the tables. We index with the NBITS most significant divisor bits,
+ including the always-set highest bit, but use addressing trickery via tabp
+ to suppress it.
+
+ Possible improvements:
+
+ * Do first multiply using 32-bit operations on 64-bit computers. At least
+ on most Arm64 cores, that uses 3 times less resources. It also saves on
+ many x86-64 processors.
+*/
+
+#ifndef NBITS
+#define NBITS 7
+#endif
+
+#if NBITS == 5
+/* This needs full division about 1.63% of the time. */
+static const unsigned char tab[16] = {
+ 63, 59, 55, 52, 50, 47, 45, 43, 41, 39, 38, 36, 35, 34, 33, 32
+};
+#elif NBITS == 6
+/* This needs full division about 0.93% of the time. */
+static const unsigned char tab[32] = {
+127,123,119,116,112,109,106,104,101, 98, 96, 94, 92, 90, 88, 86,
+ 84, 82, 80, 79, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64
+};
+#elif NBITS == 7
+/* This needs full division about 0.49% of the time. */
+static const unsigned char tab[64] = {
+255,251,247,243,239,236,233,229,226,223,220,217,214,211,209,206,
+203,201,198,196,194,191,189,187,185,183,181,179,177,175,173,171,
+169,167,166,164,162,161,159,158,156,155,153,152,150,149,147,146,
+145,143,142,141,140,139,137,136,135,134,133,132,131,130,129,128
+};
+#elif NBITS == 8
+/* This needs full division about 0.26% of the time. */
+static const unsigned short tab[128] = {
+511,507,503,499,495,491,488,484,480,477,473,470,467,463,460,457,
+454,450,447,444,441,438,435,433,430,427,424,421,419,416,413,411,
+408,406,403,401,398,396,393,391,389,386,384,382,380,377,375,373,
+371,369,367,365,363,361,359,357,355,353,351,349,347,345,343,342,
+340,338,336,335,333,331,329,328,326,325,323,321,320,318,317,315,
+314,312,311,309,308,306,305,303,302,301,299,298,296,295,294,292,
+291,290,288,287,286,285,283,282,281,280,279,277,276,275,274,273,
+272,270,269,268,267,266,265,264,263,262,261,260,259,258,257,256
+};
+#else
+#error No table for provided NBITS
+#endif
+
+/* Doing tabp with a #define makes compiler warnings about pointing outside an
+ object go away. We used to define this as a variable. It is not clear if
+ e.g. (vector[100] - 10) + 10 is well- defined as per the C standard;
+ (vector[100] + 10) - 10 surely is and there is no sequence point so the
+ expressions should be equivalent. To make this safe, we might want to
+ define tabp as a macro with the index as an argument. Depending on the
+ platform, relocs might allow for assembly-time or linker-time resolution to
+ take place. */
+#define tabp (tab - (1 << (NBITS - 1)))
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ int ncnt, dcnt;
+ size_t dbi;
+ mp_limb_t inv;
+ mp_limb_t q0;
+ mp_limb_t r0;
+ mp_limb_t mask;
+ mp_double_limb_t res;
+
+ count_leading_zeros (ncnt, n0);
+ count_leading_zeros (dcnt, d0);
+
+ dbi = d0 << dcnt >> (GMP_LIMB_BITS - NBITS);
+ inv = tabp[dbi];
+ q0 = ((n0 << ncnt) >> (NBITS + 1)) * inv >> (GMP_LIMB_BITS - 1 + ncnt - dcnt);
+ r0 = n0 - q0 * d0;
+ mask = -(mp_limb_t) (r0 >= d0);
+ q0 -= mask;
+ r0 -= d0 & mask;
+
+ if (UNLIKELY (r0 >= d0))
+ {
+ q0 = n0 / d0;
+ r0 = n0 - q0 * d0;
+ }
+
+ res.d1 = q0;
+ res.d0 = r0;
+ return res;
+}
+
+#else
+#error Unknown HGCD2_DIV1_METHOD
+#endif
+
+#if HAVE_NATIVE_mpn_div_22
+
+#define div2 mpn_div_22
+/* Two-limb division optimized for small quotients. */
+mp_limb_t div2 (mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#elif HGCD2_DIV2_METHOD == 1
+
+static mp_limb_t
+div2 (mp_ptr rp,
+ mp_limb_t n1, mp_limb_t n0,
+ mp_limb_t d1, mp_limb_t d0)
+{
+ mp_double_limb_t rq = div1 (n1, d1);
+ if (UNLIKELY (rq.d1 > d1))
+ {
+ mp_limb_t n2, q, t1, t0;
+ int c;
+
+ /* Normalize */
+ count_leading_zeros (c, d1);
+ ASSERT (c > 0);
+
+ n2 = n1 >> (GMP_LIMB_BITS - c);
+ n1 = (n1 << c) | (n0 >> (GMP_LIMB_BITS - c));
+ n0 <<= c;
+ d1 = (d1 << c) | (d0 >> (GMP_LIMB_BITS - c));
+ d0 <<= c;
+
+ udiv_qrnnd (q, n1, n2, n1, d1);
+ umul_ppmm (t1, t0, q, d0);
+ if (t1 > n1 || (t1 == n1 && t0 > n0))
+ {
+ ASSERT (q > 0);
+ q--;
+ sub_ddmmss (t1, t0, t1, t0, d1, d0);
+ }
+ sub_ddmmss (n1, n0, n1, n0, t1, t0);
+
+ /* Undo normalization */
+ rp[0] = (n0 >> c) | (n1 << (GMP_LIMB_BITS - c));
+ rp[1] = n1 >> c;
+
+ return q;
+ }
+ else
+ {
+ mp_limb_t q, t1, t0;
+ n1 = rq.d0;
+ q = rq.d1;
+ umul_ppmm (t1, t0, q, d0);
+ if (UNLIKELY (t1 >= n1) && (t1 > n1 || t0 > n0))
+ {
+ ASSERT (q > 0);
+ q--;
+ sub_ddmmss (t1, t0, t1, t0, d1, d0);
+ }
+ sub_ddmmss (rp[1], rp[0], n1, n0, t1, t0);
+ return q;
+ }
+}
+
+#elif HGCD2_DIV2_METHOD == 2
+
+/* Bit-wise div2. Relies on fast count_leading_zeros. */
+static mp_limb_t
+div2 (mp_ptr rp,
+ mp_limb_t n1, mp_limb_t n0,
+ mp_limb_t d1, mp_limb_t d0)
+{
+ mp_limb_t q = 0;
+ int ncnt;
+ int dcnt;
+
+ count_leading_zeros (ncnt, n1);
+ count_leading_zeros (dcnt, d1);
+ dcnt -= ncnt;
+
+ d1 = (d1 << dcnt) + (d0 >> 1 >> (GMP_LIMB_BITS - 1 - dcnt));
+ d0 <<= dcnt;
+
+ do
+ {
+ mp_limb_t mask;
+ q <<= 1;
+ if (UNLIKELY (n1 == d1))
+ mask = -(n0 >= d0);
+ else
+ mask = -(n1 > d1);
+
+ q -= mask;
+
+ sub_ddmmss (n1, n0, n1, n0, mask & d1, mask & d0);
+
+ d0 = (d1 << (GMP_LIMB_BITS - 1)) | (d0 >> 1);
+ d1 = d1 >> 1;
+ }
+ while (dcnt--);
+
+ rp[0] = n0;
+ rp[1] = n1;
+
+ return q;
+}
+#else
+#error Unknown HGCD2_DIV2_METHOD
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd2.c b/vendor/gmp-6.3.0/mpn/generic/hgcd2.c
new file mode 100644
index 0000000..43d4d48
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd2.c
@@ -0,0 +1,283 @@
+/* hgcd2.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/generic/hgcd2-div.h"
+
+#if GMP_NAIL_BITS != 0
+#error Nails not implemented
+#endif
+
+/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
+ matrix M. Returns 1 if we make progress, i.e. can perform at least
+ one subtraction. Otherwise returns zero. */
+
+/* FIXME: Possible optimizations:
+
+ The div2 function starts with checking the most significant bit of
+ the numerator. We can maintained normalized operands here, call
+ hgcd with normalized operands only, which should make the code
+ simpler and possibly faster.
+
+ Experiment with table lookups on the most significant bits.
+
+ This function is also a candidate for assembler implementation.
+*/
+int
+mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+ struct hgcd_matrix1 *M)
+{
+ mp_limb_t u00, u01, u10, u11;
+
+ if (ah < 2 || bh < 2)
+ return 0;
+
+ if (ah > bh || (ah == bh && al > bl))
+ {
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+ if (ah < 2)
+ return 0;
+
+ u00 = u01 = u11 = 1;
+ u10 = 0;
+ }
+ else
+ {
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+ if (bh < 2)
+ return 0;
+
+ u00 = u10 = u11 = 1;
+ u01 = 0;
+ }
+
+ if (ah < bh)
+ goto subtract_a;
+
+ for (;;)
+ {
+ ASSERT (ah >= bh);
+ if (ah == bh)
+ goto done;
+
+ if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+ {
+ ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+ bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+ break;
+ }
+
+ /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+ 1), affecting the second column of M. */
+ ASSERT (ah > bh);
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+
+ if (ah < 2)
+ goto done;
+
+ if (ah <= bh)
+ {
+ /* Use q = 1 */
+ u01 += u00;
+ u11 += u10;
+ }
+ else
+ {
+ mp_limb_t r[2];
+ mp_limb_t q = div2 (r, ah, al, bh, bl);
+ al = r[0]; ah = r[1];
+ if (ah < 2)
+ {
+ /* A is too small, but q is correct. */
+ u01 += q * u00;
+ u11 += q * u10;
+ goto done;
+ }
+ q++;
+ u01 += q * u00;
+ u11 += q * u10;
+ }
+ subtract_a:
+ ASSERT (bh >= ah);
+ if (ah == bh)
+ goto done;
+
+ if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+ {
+ ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+ bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+ goto subtract_a1;
+ }
+
+ /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+ 1), affecting the first column of M. */
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+ if (bh < 2)
+ goto done;
+
+ if (bh <= ah)
+ {
+ /* Use q = 1 */
+ u00 += u01;
+ u10 += u11;
+ }
+ else
+ {
+ mp_limb_t r[2];
+ mp_limb_t q = div2 (r, bh, bl, ah, al);
+ bl = r[0]; bh = r[1];
+ if (bh < 2)
+ {
+ /* B is too small, but q is correct. */
+ u00 += q * u01;
+ u10 += q * u11;
+ goto done;
+ }
+ q++;
+ u00 += q * u01;
+ u10 += q * u11;
+ }
+ }
+
+ /* NOTE: Since we discard the least significant half limb, we don't get a
+ truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
+ /* Single precision loop */
+ for (;;)
+ {
+ ASSERT (ah >= bh);
+
+ ah -= bh;
+ if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+ break;
+
+ if (ah <= bh)
+ {
+ /* Use q = 1 */
+ u01 += u00;
+ u11 += u10;
+ }
+ else
+ {
+ mp_double_limb_t rq = div1 (ah, bh);
+ mp_limb_t q = rq.d1;
+ ah = rq.d0;
+
+ if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+ {
+ /* A is too small, but q is correct. */
+ u01 += q * u00;
+ u11 += q * u10;
+ break;
+ }
+ q++;
+ u01 += q * u00;
+ u11 += q * u10;
+ }
+ subtract_a1:
+ ASSERT (bh >= ah);
+
+ bh -= ah;
+ if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+ break;
+
+ if (bh <= ah)
+ {
+ /* Use q = 1 */
+ u00 += u01;
+ u10 += u11;
+ }
+ else
+ {
+ mp_double_limb_t rq = div1 (bh, ah);
+ mp_limb_t q = rq.d1;
+ bh = rq.d0;
+
+ if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+ {
+ /* B is too small, but q is correct. */
+ u00 += q * u01;
+ u10 += q * u11;
+ break;
+ }
+ q++;
+ u00 += q * u01;
+ u10 += q * u11;
+ }
+ }
+
+ done:
+ M->u[0][0] = u00; M->u[0][1] = u01;
+ M->u[1][0] = u10; M->u[1][1] = u11;
+
+ return 1;
+}
+
+/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
+ * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
+mp_size_t
+mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
+ mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+ mp_limb_t ah, bh;
+
+ /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+ r = u00 * a
+ r += u10 * b
+ b *= u11
+ b += u01 * a
+ */
+
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+ ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
+ bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
+#else
+ ah = mpn_mul_1 (rp, ap, n, M->u[0][0]);
+ ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
+
+ bh = mpn_mul_1 (bp, bp, n, M->u[1][1]);
+ bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
+#endif
+ rp[n] = ah;
+ bp[n] = bh;
+
+ n += (ah | bh) > 0;
+ return n;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd2_jacobi.c b/vendor/gmp-6.3.0/mpn/generic/hgcd2_jacobi.c
new file mode 100644
index 0000000..95d4af1
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd2_jacobi.c
@@ -0,0 +1,251 @@
+/* hgcd2_jacobi.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2011, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/generic/hgcd2-div.h"
+
+#if GMP_NAIL_BITS != 0
+#error Nails not implemented
+#endif
+
+int
+mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+ struct hgcd_matrix1 *M, unsigned *bitsp)
+{
+ mp_limb_t u00, u01, u10, u11;
+ unsigned bits = *bitsp;
+
+ if (ah < 2 || bh < 2)
+ return 0;
+
+ if (ah > bh || (ah == bh && al > bl))
+ {
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+ if (ah < 2)
+ return 0;
+
+ u00 = u01 = u11 = 1;
+ u10 = 0;
+ bits = mpn_jacobi_update (bits, 1, 1);
+ }
+ else
+ {
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+ if (bh < 2)
+ return 0;
+
+ u00 = u10 = u11 = 1;
+ u01 = 0;
+ bits = mpn_jacobi_update (bits, 0, 1);
+ }
+
+ if (ah < bh)
+ goto subtract_a;
+
+ for (;;)
+ {
+ ASSERT (ah >= bh);
+ if (ah == bh)
+ goto done;
+
+ if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+ {
+ ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+ bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+ break;
+ }
+
+ /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+ 1), affecting the second column of M. */
+ ASSERT (ah > bh);
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+
+ if (ah < 2)
+ goto done;
+
+ if (ah <= bh)
+ {
+ /* Use q = 1 */
+ u01 += u00;
+ u11 += u10;
+ bits = mpn_jacobi_update (bits, 1, 1);
+ }
+ else
+ {
+ mp_limb_t r[2];
+ mp_limb_t q = div2 (r, ah, al, bh, bl);
+ al = r[0]; ah = r[1];
+ if (ah < 2)
+ {
+ /* A is too small, but q is correct. */
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ goto done;
+ }
+ q++;
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ }
+ subtract_a:
+ ASSERT (bh >= ah);
+ if (ah == bh)
+ goto done;
+
+ if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+ {
+ ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+ bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+ goto subtract_a1;
+ }
+
+ /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+ 1), affecting the first column of M. */
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+ if (bh < 2)
+ goto done;
+
+ if (bh <= ah)
+ {
+ /* Use q = 1 */
+ u00 += u01;
+ u10 += u11;
+ bits = mpn_jacobi_update (bits, 0, 1);
+ }
+ else
+ {
+ mp_limb_t r[2];
+ mp_limb_t q = div2 (r, bh, bl, ah, al);
+ bl = r[0]; bh = r[1];
+ if (bh < 2)
+ {
+ /* B is too small, but q is correct. */
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ goto done;
+ }
+ q++;
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ }
+ }
+
+ /* NOTE: Since we discard the least significant half limb, we don't get a
+ truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
+ /* Single precision loop */
+ for (;;)
+ {
+ ASSERT (ah >= bh);
+
+ ah -= bh;
+ if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+ break;
+
+ if (ah <= bh)
+ {
+ /* Use q = 1 */
+ u01 += u00;
+ u11 += u10;
+ bits = mpn_jacobi_update (bits, 1, 1);
+ }
+ else
+ {
+ mp_double_limb_t rq = div1 (ah, bh);
+ mp_limb_t q = rq.d1;
+ ah = rq.d0;
+
+ if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+ {
+ /* A is too small, but q is correct. */
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ break;
+ }
+ q++;
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ }
+ subtract_a1:
+ ASSERT (bh >= ah);
+
+ bh -= ah;
+ if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+ break;
+
+ if (bh <= ah)
+ {
+ /* Use q = 1 */
+ u00 += u01;
+ u10 += u11;
+ bits = mpn_jacobi_update (bits, 0, 1);
+ }
+ else
+ {
+ mp_double_limb_t rq = div1 (bh, ah);
+ mp_limb_t q = rq.d1;
+ bh = rq.d0;
+
+ if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+ {
+ /* B is too small, but q is correct. */
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ break;
+ }
+ q++;
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ }
+ }
+
+ done:
+ M->u[0][0] = u00; M->u[0][1] = u01;
+ M->u[1][0] = u10; M->u[1][1] = u11;
+ *bitsp = bits;
+
+ return 1;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd_appr.c b/vendor/gmp-6.3.0/mpn/generic/hgcd_appr.c
new file mode 100644
index 0000000..bb01738
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd_appr.c
@@ -0,0 +1,267 @@
+/* hgcd_appr.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
+ HGCD_THRESHOLD at the end? */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+ if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+ return n;
+ else
+ {
+ unsigned k;
+ int count;
+ mp_size_t nscaled;
+
+ /* Get the recursion depth. */
+ nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
+ count_leading_zeros (count, nscaled);
+ k = GMP_LIMB_BITS - count;
+
+ return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+ }
+}
+
+/* Destroys inputs. */
+int
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
+ struct hgcd_matrix *M, mp_ptr tp)
+{
+ mp_size_t s;
+ int success = 0;
+
+ ASSERT (n > 0);
+
+ ASSERT ((ap[n-1] | bp[n-1]) != 0);
+
+ if (n <= 2)
+ /* Implies s = n. A fairly uninteresting case but exercised by the
+ random inputs of the testsuite. */
+ return 0;
+
+ ASSERT ((n+1)/2 - 1 < M->alloc);
+
+ /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
+ we discard some of the least significant limbs, we must keep one
+ additional bit to account for the truncation error. We maintain
+ the GMP_NUMB_BITS * s - extra_bits as the current target size. */
+
+ s = n/2 + 1;
+ if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+ {
+ unsigned extra_bits = 0;
+
+ while (n > 2)
+ {
+ mp_size_t nn;
+
+ ASSERT (n > s);
+ ASSERT (n <= 2*s);
+
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ break;
+
+ n = nn;
+ success = 1;
+
+ /* We can truncate and discard the lower p bits whenever nbits <=
+ 2*sbits - p. To account for the truncation error, we must
+ adjust
+
+ sbits <-- sbits + 1 - p,
+
+ rather than just sbits <-- sbits - p. This adjustment makes
+ the produced matrix slightly smaller than it could be. */
+
+ if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
+ {
+ mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
+
+ if (extra_bits == 0)
+ {
+ /* We cross a limb boundary and bump s. We can't do that
+ if the result is that it makes makes min(U, V)
+ smaller than 2^{GMP_NUMB_BITS} s. */
+ if (s + 1 == n
+ || mpn_zero_p (ap + s + 1, n - s - 1)
+ || mpn_zero_p (bp + s + 1, n - s - 1))
+ continue;
+
+ extra_bits = GMP_NUMB_BITS - 1;
+ s++;
+ }
+ else
+ {
+ extra_bits--;
+ }
+
+ /* Drop the p least significant limbs */
+ ap += p; bp += p; n -= p; s -= p;
+ }
+ }
+
+ ASSERT (s > 0);
+
+ if (extra_bits > 0)
+ {
+ /* We can get here only of we have dropped at least one of the least
+ significant bits, so we can decrement ap and bp. We can then shift
+ left extra bits using mpn_rshift. */
+ /* NOTE: In the unlikely case that n is large, it would be preferable
+ to do an initial subdiv step to reduce the size before shifting,
+ but that would mean duplicating mpn_gcd_subdiv_step with a bit
+ count rather than a limb count. */
+ ap--; bp--;
+ ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+ bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+ n += (ap[n] | bp[n]) > 0;
+
+ ASSERT (success);
+
+ while (n > 2)
+ {
+ mp_size_t nn;
+
+ ASSERT (n > s);
+ ASSERT (n <= 2*s);
+
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+ if (!nn)
+ return 1;
+
+ n = nn;
+ }
+ }
+
+ if (n == 2)
+ {
+ struct hgcd_matrix1 M1;
+ ASSERT (s == 1);
+
+ if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
+ {
+ /* Multiply M <- M * M1 */
+ mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+ success = 1;
+ }
+ }
+ return success;
+ }
+ else
+ {
+ mp_size_t n2 = (3*n)/4 + 1;
+ mp_size_t p = n/2;
+ mp_size_t nn;
+
+ nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+ if (nn)
+ {
+ n = nn;
+ /* FIXME: Discard some of the low limbs immediately? */
+ success = 1;
+ }
+
+ while (n > n2)
+ {
+ mp_size_t nn;
+
+ /* Needs n + 1 storage */
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ return success;
+
+ n = nn;
+ success = 1;
+ }
+ if (n > s + 2)
+ {
+ struct hgcd_matrix M1;
+ mp_size_t scratch;
+
+ p = 2*s - n + 1;
+ scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+ mpn_hgcd_matrix_init(&M1, n - p, tp);
+ if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
+ {
+ /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+ ASSERT (M->n + 2 >= M1.n);
+
+ /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+ then either q or q + 1 is a correct quotient, and M1 will
+ start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+ rules out the case that the size of M * M1 is much
+ smaller than the expected M->n + M1->n. */
+
+ ASSERT (M->n + M1.n < M->alloc);
+
+ /* We need a bound for of M->n + M1.n. Let n be the original
+ input size. Then
+
+ ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+ and it follows that
+
+ M.n + M1.n <= ceil(n/2) + 1
+
+ Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+ amount of needed scratch space. */
+ mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+ return 1;
+ }
+ }
+
+ for(;;)
+ {
+ mp_size_t nn;
+
+ ASSERT (n > s);
+ ASSERT (n <= 2*s);
+
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+ if (!nn)
+ return success;
+
+ n = nn;
+ success = 1;
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd_jacobi.c b/vendor/gmp-6.3.0/mpn/generic/hgcd_jacobi.c
new file mode 100644
index 0000000..24014ce
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd_jacobi.c
@@ -0,0 +1,243 @@
+/* hgcd_jacobi.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This file is almost a copy of hgcd.c, with some added calls to
+ mpn_jacobi_update */
+
+struct hgcd_jacobi_ctx
+{
+ struct hgcd_matrix *M;
+ unsigned *bitsp;
+};
+
+static void
+hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ ASSERT (!gp);
+ ASSERT (d >= 0);
+
+ MPN_NORMALIZE (qp, qn);
+ if (qn > 0)
+ {
+ struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
+ /* NOTES: This is a bit ugly. A tp area is passed to
+ gcd_subdiv_step, which stores q at the start of that area. We
+ now use the rest. */
+ mp_ptr tp = (mp_ptr) qp + qn;
+
+ mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
+ *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
+ }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+ division. Reduces the size by almost one limb or more, but never
+ below the given size s. Return new size for a and b, or 0 if no
+ more steps are possible.
+
+ If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
+ limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+ fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+ hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+ resulting size of M.
+
+ If N is the input size to the calling hgcd, then s = floor(N/2) +
+ 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+ < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+ struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+ struct hgcd_matrix1 M1;
+ mp_limb_t mask;
+ mp_limb_t ah, al, bh, bl;
+
+ ASSERT (n > s);
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (n == s + 1)
+ {
+ if (mask < 4)
+ goto subtract;
+
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_hgcd2 step */
+ if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
+ {
+ /* Multiply M <- M * M1 */
+ mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+ /* Can't swap inputs, so we need to copy. */
+ MPN_COPY (tp, ap, n);
+ /* Multiply M1^{-1} (a;b) */
+ return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+ }
+
+ subtract:
+ {
+ struct hgcd_jacobi_ctx ctx;
+ ctx.M = M;
+ ctx.bitsp = bitsp;
+
+ return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
+ }
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+ with elements of size at most (n+1)/2 - 1. Returns new size of a,
+ b, or zero if no reduction is possible. */
+
+/* Same scratch requirements as for mpn_hgcd. */
+mp_size_t
+mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
+ struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+ mp_size_t s = n/2 + 1;
+
+ mp_size_t nn;
+ int success = 0;
+
+ if (n <= s)
+ /* Happens when n <= 2, a fairly uninteresting case but exercised
+ by the random inputs of the testsuite. */
+ return 0;
+
+ ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+ ASSERT ((n+1)/2 - 1 < M->alloc);
+
+ if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+ {
+ mp_size_t n2 = (3*n)/4 + 1;
+ mp_size_t p = n/2;
+
+ nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
+ if (nn > 0)
+ {
+ /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+ = 2 (n - 1) */
+ n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+ success = 1;
+ }
+ while (n > n2)
+ {
+ /* Needs n + 1 storage */
+ nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+ if (!nn)
+ return success ? n : 0;
+ n = nn;
+ success = 1;
+ }
+
+ if (n > s + 2)
+ {
+ struct hgcd_matrix M1;
+ mp_size_t scratch;
+
+ p = 2*s - n + 1;
+ scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+ mpn_hgcd_matrix_init(&M1, n - p, tp);
+ nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
+ if (nn > 0)
+ {
+ /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+ ASSERT (M->n + 2 >= M1.n);
+
+ /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+ then either q or q + 1 is a correct quotient, and M1 will
+ start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+ rules out the case that the size of M * M1 is much
+ smaller than the expected M->n + M1->n. */
+
+ ASSERT (M->n + M1.n < M->alloc);
+
+ /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+ = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+ n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+ /* We need a bound for of M->n + M1.n. Let n be the original
+ input size. Then
+
+ ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+ and it follows that
+
+ M.n + M1.n <= ceil(n/2) + 1
+
+ Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+ amount of needed scratch space. */
+ mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+ success = 1;
+ }
+ }
+ }
+
+ for (;;)
+ {
+ /* Needs s+3 < n */
+ nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+ if (!nn)
+ return success ? n : 0;
+
+ n = nn;
+ success = 1;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd_matrix.c b/vendor/gmp-6.3.0/mpn/generic/hgcd_matrix.c
new file mode 100644
index 0000000..54c795d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd_matrix.c
@@ -0,0 +1,265 @@
+/* hgcd_matrix.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+ - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+ mp_size_t s = (n+1)/2 + 1;
+ M->alloc = s;
+ M->n = 1;
+ MPN_ZERO (p, 4 * s);
+ M->p[0][0] = p;
+ M->p[0][1] = p + s;
+ M->p[1][0] = p + 2 * s;
+ M->p[1][1] = p + 3 * s;
+
+ M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Update column COL, adding in Q * column (1-COL). Temporary storage:
+ * qn + n <= M->alloc, where n is the size of the largest element in
+ * column 1 - COL. */
+void
+mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+ unsigned col, mp_ptr tp)
+{
+ ASSERT (col < 2);
+
+ if (qn == 1)
+ {
+ mp_limb_t q = qp[0];
+ mp_limb_t c0, c1;
+
+ c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+ c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+ M->p[0][col][M->n] = c0;
+ M->p[1][col][M->n] = c1;
+
+ M->n += (c0 | c1) != 0;
+ }
+ else
+ {
+ unsigned row;
+
+ /* Carries for the unlikely case that we get both high words
+ from the multiplication and carries from the addition. */
+ mp_limb_t c[2];
+ mp_size_t n;
+
+ /* The matrix will not necessarily grow in size by qn, so we
+ need normalization in order not to overflow M. */
+
+ for (n = M->n; n + qn > M->n; n--)
+ {
+ ASSERT (n > 0);
+ if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+ break;
+ }
+
+ ASSERT (qn + n <= M->alloc);
+
+ for (row = 0; row < 2; row++)
+ {
+ if (qn <= n)
+ mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+ else
+ mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+ ASSERT (n + qn >= M->n);
+ c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+ }
+
+ n += qn;
+
+ if (c[0] | c[1])
+ {
+ M->p[0][col][n] = c[0];
+ M->p[1][col][n] = c[1];
+ n++;
+ }
+ else
+ {
+ n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+ ASSERT (n >= M->n);
+ }
+ M->n = n;
+ }
+
+ ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+ GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+ temporary space M->n */
+void
+mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+ mp_ptr tp)
+{
+ mp_size_t n0, n1;
+
+ /* Could avoid copy by some swapping of pointers. */
+ MPN_COPY (tp, M->p[0][0], M->n);
+ n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+ MPN_COPY (tp, M->p[1][0], M->n);
+ n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+ /* Depends on zero initialization */
+ M->n = MAX(n0, n1);
+ ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
+ of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+ mp_ptr tp)
+{
+ mp_size_t n;
+
+ /* About the new size of M:s elements. Since M1's diagonal elements
+ are > 0, no element can decrease. The new elements are of size
+ M->n + M1->n, one limb more or less. The computation of the
+ matrix product produces elements of size M->n + M1->n + 1. But
+ the true size, after normalization, may be three limbs smaller.
+
+ The reason that the product has normalized size >= M->n + M1->n -
+ 2 is subtle. It depends on the fact that M and M1 can be factored
+ as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
+ M ending with a large power and M1 starting with a large power of
+ the same matrix. */
+
+ /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+ multiplication range, this function could be sped up quite a lot
+ using invariance. */
+ ASSERT (M->n + M1->n < M->alloc);
+
+ ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+ | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+ ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+ | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+ mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+ M->p[1][0], M->p[1][1], M->n,
+ M1->p[0][0], M1->p[0][1],
+ M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+ /* Index of last potentially non-zero limb, size is one greater. */
+ n = M->n + M1->n;
+
+ n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+ n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+ n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+ ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+ M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+ Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
+ mp_size_t n, mp_ptr ap, mp_ptr bp,
+ mp_size_t p, mp_ptr tp)
+{
+ /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+ = (r11 a - r01 b; - r10 a + r00 b */
+
+ mp_ptr t0 = tp;
+ mp_ptr t1 = tp + p + M->n;
+ mp_limb_t ah, bh;
+ mp_limb_t cy;
+
+ ASSERT (p + M->n < n);
+
+ /* First compute the two values depending on a, before overwriting a */
+
+ if (M->n >= p)
+ {
+ mpn_mul (t0, M->p[1][1], M->n, ap, p);
+ mpn_mul (t1, M->p[1][0], M->n, ap, p);
+ }
+ else
+ {
+ mpn_mul (t0, ap, p, M->p[1][1], M->n);
+ mpn_mul (t1, ap, p, M->p[1][0], M->n);
+ }
+
+ /* Update a */
+ MPN_COPY (ap, t0, p);
+ ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+ if (M->n >= p)
+ mpn_mul (t0, M->p[0][1], M->n, bp, p);
+ else
+ mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+ cy = mpn_sub (ap, ap, n, t0, p + M->n);
+ ASSERT (cy <= ah);
+ ah -= cy;
+
+ /* Update b */
+ if (M->n >= p)
+ mpn_mul (t0, M->p[0][0], M->n, bp, p);
+ else
+ mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+ MPN_COPY (bp, t0, p);
+ bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+ cy = mpn_sub (bp, bp, n, t1, p + M->n);
+ ASSERT (cy <= bh);
+ bh -= cy;
+
+ if (ah > 0 || bh > 0)
+ {
+ ap[n] = ah;
+ bp[n] = bh;
+ n++;
+ }
+ else
+ {
+ /* The subtraction can reduce the size by at most one limb. */
+ if (ap[n-1] == 0 && bp[n-1] == 0)
+ n--;
+ }
+ ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+ return n;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd_reduce.c b/vendor/gmp-6.3.0/mpn/generic/hgcd_reduce.c
new file mode 100644
index 0000000..3aee77d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd_reduce.c
@@ -0,0 +1,242 @@
+/* hgcd_reduce.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes R -= A * B. Result must be non-negative. Normalized down
+ to size an, and resulting size is returned. */
+static mp_size_t
+submul (mp_ptr rp, mp_size_t rn,
+ mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (bn > 0);
+ ASSERT (an >= bn);
+ ASSERT (rn >= an);
+ ASSERT (an + bn <= rn + 1);
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (an + bn);
+
+ mpn_mul (tp, ap, an, bp, bn);
+ ASSERT ((an + bn <= rn) || (tp[rn] == 0));
+ ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn)));
+ TMP_FREE;
+
+ while (rn > an && (rp[rn-1] == 0))
+ rn--;
+
+ return rn;
+}
+
+/* Computes (a, b) <-- M^{-1} (a; b) */
+/* FIXME:
+ x Take scratch parameter, and figure out scratch need.
+
+ x Use some fallback for small M->n?
+*/
+static mp_size_t
+hgcd_matrix_apply (const struct hgcd_matrix *M,
+ mp_ptr ap, mp_ptr bp,
+ mp_size_t n)
+{
+ mp_size_t an, bn, un, vn, nn;
+ mp_size_t mn[2][2];
+ mp_size_t modn;
+ mp_ptr tp, sp, scratch;
+ mp_limb_t cy;
+ unsigned i, j;
+
+ TMP_DECL;
+
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+ an = n;
+ MPN_NORMALIZE (ap, an);
+ bn = n;
+ MPN_NORMALIZE (bp, bn);
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ {
+ mp_size_t k;
+ k = M->n;
+ MPN_NORMALIZE (M->p[i][j], k);
+ mn[i][j] = k;
+ }
+
+ ASSERT (mn[0][0] > 0);
+ ASSERT (mn[1][1] > 0);
+ ASSERT ( (mn[0][1] | mn[1][0]) > 0);
+
+ TMP_MARK;
+
+ if (mn[0][1] == 0)
+ {
+ /* A unchanged, M = (1, 0; q, 1) */
+ ASSERT (mn[0][0] == 1);
+ ASSERT (M->p[0][0][0] == 1);
+ ASSERT (mn[1][1] == 1);
+ ASSERT (M->p[1][1][0] == 1);
+
+ /* Put B <-- B - q A */
+ nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
+ }
+ else if (mn[1][0] == 0)
+ {
+ /* B unchanged, M = (1, q; 0, 1) */
+ ASSERT (mn[0][0] == 1);
+ ASSERT (M->p[0][0][0] == 1);
+ ASSERT (mn[1][1] == 1);
+ ASSERT (M->p[1][1][0] == 1);
+
+ /* Put A <-- A - q * B */
+ nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
+ }
+ else
+ {
+ /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01.
+ B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */
+ un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
+ vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
+
+ nn = MAX (un, vn);
+ /* In the range of interest, mulmod_bnm1 should always beat mullo. */
+ modn = mpn_mulmod_bnm1_next_size (nn + 1);
+
+ TMP_ALLOC_LIMBS_3 (tp, modn,
+ sp, modn,
+ scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n));
+
+ ASSERT (n <= 2*modn);
+
+ if (n > modn)
+ {
+ cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
+ MPN_INCR_U (ap, modn, cy);
+
+ cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
+ MPN_INCR_U (bp, modn, cy);
+
+ n = modn;
+ }
+
+ mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
+ mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
+
+ /* FIXME: Handle the small n case in some better way. */
+ if (n + mn[1][1] < modn)
+ MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
+ if (n + mn[0][1] < modn)
+ MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
+
+ cy = mpn_sub_n (tp, tp, sp, modn);
+ MPN_DECR_U (tp, modn, cy);
+
+ ASSERT (mpn_zero_p (tp + nn, modn - nn));
+
+ mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
+ MPN_COPY (ap, tp, nn);
+ mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
+
+ if (n + mn[1][0] < modn)
+ MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
+ if (n + mn[0][0] < modn)
+ MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
+
+ cy = mpn_sub_n (tp, tp, sp, modn);
+ MPN_DECR_U (tp, modn, cy);
+
+ ASSERT (mpn_zero_p (tp + nn, modn - nn));
+ MPN_COPY (bp, tp, nn);
+
+ while ( (ap[nn-1] | bp[nn-1]) == 0)
+ {
+ nn--;
+ ASSERT (nn > 0);
+ }
+ }
+ TMP_FREE;
+
+ return nn;
+}
+
+mp_size_t
+mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
+{
+ mp_size_t itch;
+ if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+ {
+ itch = mpn_hgcd_itch (n-p);
+
+ /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
+ (p + ceil((n-p)/2) - 1 <= n + p - 1 */
+ if (itch < n + p - 1)
+ itch = n + p - 1;
+ }
+ else
+ {
+ itch = 2*(n-p) + mpn_hgcd_itch (n-p);
+ /* Currently, hgcd_matrix_apply allocates its own storage. */
+ }
+ return itch;
+}
+
+/* FIXME: Document storage need. */
+mp_size_t
+mpn_hgcd_reduce (struct hgcd_matrix *M,
+ mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
+ mp_ptr tp)
+{
+ mp_size_t nn;
+ if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+ {
+ nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+ if (nn > 0)
+ /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+ = 2 (n - 1) */
+ return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+ }
+ else
+ {
+ MPN_COPY (tp, ap + p, n - p);
+ MPN_COPY (tp + n - p, bp + p, n - p);
+ if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
+ return hgcd_matrix_apply (M, ap, bp, n);
+ }
+ return 0;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/hgcd_step.c b/vendor/gmp-6.3.0/mpn/generic/hgcd_step.c
new file mode 100644
index 0000000..a978a88
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/hgcd_step.c
@@ -0,0 +1,127 @@
+/* hgcd_step.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void
+hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ ASSERT (!gp);
+ ASSERT (d >= 0);
+ ASSERT (d <= 1);
+
+ MPN_NORMALIZE (qp, qn);
+ if (qn > 0)
+ {
+ struct hgcd_matrix *M = (struct hgcd_matrix *) p;
+ /* NOTES: This is a bit ugly. A tp area is passed to
+ gcd_subdiv_step, which stores q at the start of that area. We
+ now use the rest. */
+ mp_ptr tp = (mp_ptr) qp + qn;
+ mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
+ }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+ division. Reduces the size by almost one limb or more, but never
+ below the given size s. Return new size for a and b, or 0 if no
+ more steps are possible.
+
+ If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
+ limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+ fails, needs space for the quotient, qn <= n - s limbs, for and
+ hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+ (resulting size of M) + 1.
+
+ If N is the input size to the calling hgcd, then s = floor(N/2) +
+ 1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
+ <= N.
+*/
+
+mp_size_t
+mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+ struct hgcd_matrix *M, mp_ptr tp)
+{
+ struct hgcd_matrix1 M1;
+ mp_limb_t mask;
+ mp_limb_t ah, al, bh, bl;
+
+ ASSERT (n > s);
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (n == s + 1)
+ {
+ if (mask < 4)
+ goto subtract;
+
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_hgcd2 step */
+ if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+ {
+ /* Multiply M <- M * M1 */
+ mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+ /* Can't swap inputs, so we need to copy. */
+ MPN_COPY (tp, ap, n);
+ /* Multiply M1^{-1} (a;b) */
+ return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+ }
+
+ subtract:
+
+ return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/invert.c b/vendor/gmp-6.3.0/mpn/generic/invert.c
new file mode 100644
index 0000000..157ff2b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/invert.c
@@ -0,0 +1,86 @@
+/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010, 2012, 2014-2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+ ASSERT (n > 0);
+ ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+ ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+ ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+ ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+ if (n == 1)
+ invert_limb (*ip, *dp);
+ else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
+ {
+ /* Maximum scratch needed by this branch: 2*n */
+ mp_ptr xp;
+
+ xp = scratch; /* 2 * n limbs */
+ /* n > 1 here */
+ MPN_FILL (xp, n, GMP_NUMB_MAX);
+ mpn_com (xp + n, dp, n);
+ if (n == 2) {
+ mpn_divrem_2 (ip, 0, xp, 4, dp);
+ } else {
+ gmp_pi1_t inv;
+ invert_pi1 (inv, dp[n-1], dp[n-2]);
+ /* FIXME: should we use dcpi1_div_q, for big sizes? */
+ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
+ }
+ }
+ else { /* Use approximated inverse; correct the result if needed. */
+ mp_limb_t e; /* The possible error in the approximate inverse */
+
+ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
+ e = mpn_ni_invertappr (ip, dp, n, scratch);
+
+ if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
+ /* Code to detect and correct the "off by one" approximation. */
+ mpn_mul_n (scratch, ip, dp, n);
+ e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/
+ if (LIKELY(e)) /* The high part can not give a carry by itself. */
+ e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */
+ /* If the value was wrong (no carry), correct it (increment). */
+ e ^= CNST_LIMB (1);
+ MPN_INCR_U (ip, n, e);
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/invertappr.c b/vendor/gmp-6.3.0/mpn/generic/invertappr.c
new file mode 100644
index 0000000..3be5596
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/invertappr.c
@@ -0,0 +1,300 @@
+/* mpn_invertappr and helper functions. Compute I such that
+ floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ The algorithm used here was inspired by ApproximateReciprocal from "Modern
+ Computer Arithmetic", by Richard P. Brent and Paul Zimmermann. Special
+ thanks to Paul Zimmermann for his very valuable suggestions on all the
+ theoretical aspects during the work on this code.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010, 2012, 2015, 2016 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: The iterative version splits the operand in two slightly unbalanced
+ parts, the use of log_2 (or counting the bits) underestimate the maximum
+ number of iterations. */
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#define MAYBE_dcpi1_divappr 1
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
+#define MAYBE_dcpi1_divappr \
+ (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
+#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
+ (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
+#undef INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
+#endif
+#endif
+
+/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
+ the strictly normalised value {dp,n} (i.e., most significant bit must be set)
+ as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
+
+ Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
+ following conditions are satisfied by the output:
+ 0 <= e <= 1;
+ {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
+ I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
+ e=1 means that the result _may_ be one less than expected.
+
+ The _bc version returns e=1 most of the time.
+ The _ni version should return e=0 most of the time; only about 1% of
+ possible random input should give e=1.
+
+ When the strict result is needed, i.e., e=0 in the relation above:
+ {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
+ the function mpn_invert (ip, dp, n, scratch) should be used instead. */
+
+/* Maximum scratch needed by this branch (at xp): 2*n */
+static mp_limb_t
+mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr xp)
+{
+ ASSERT (n > 0);
+ ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+ ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+ ASSERT (! MPN_OVERLAP_P (ip, n, xp, mpn_invertappr_itch(n)));
+ ASSERT (! MPN_OVERLAP_P (dp, n, xp, mpn_invertappr_itch(n)));
+
+ /* Compute a base value of r limbs. */
+ if (n == 1)
+ invert_limb (*ip, *dp);
+ else {
+ /* n > 1 here */
+ MPN_FILL (xp, n, GMP_NUMB_MAX);
+ mpn_com (xp + n, dp, n);
+
+ /* Now xp contains B^2n - {dp,n}*B^n - 1 */
+
+ /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
+ if (n == 2) {
+ mpn_divrem_2 (ip, 0, xp, 4, dp);
+ } else {
+ gmp_pi1_t inv;
+ invert_pi1 (inv, dp[n-1], dp[n-2]);
+ if (! MAYBE_dcpi1_divappr
+ || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
+ mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
+ else
+ mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
+ MPN_DECR_U(ip, n, CNST_LIMB (1));
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
+ iterations (at least one).
+
+ Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
+ Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
+ in version 0.4 of the book.
+
+ Some adaptations were introduced, to allow product mod B^m-1 and return the
+ value e.
+
+ We introduced a correction in such a way that "the value of
+ B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
+ "2B^n-1").
+
+ Maximum scratch needed by this branch <= 2*n, but have to fit 3*rn
+ in the scratch, i.e. 3*rn <= 2*n: we require n>4.
+
+ We use a wrapped product modulo B^m-1. NOTE: is there any normalisation
+ problem for the [0] class? It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
+ B^m-1. We may get [0] if and only if we get AX_h = B^{n+h}. This can
+ happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
+ B^{n+h} - A, then we get into the "negative" branch, where X_h is not
+ incremented (because A < B^n).
+
+ FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
+ is allocated apart.
+ */
+
+mp_limb_t
+mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+ mp_limb_t cy;
+ mp_size_t rn, mn;
+ mp_size_t sizes[NPOWS], *sizp;
+ mp_ptr tp;
+ TMP_DECL;
+#define xp scratch
+
+ ASSERT (n > 4);
+ ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+ ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+ ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+ ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+ /* Compute the computation precisions from highest to lowest, leaving the
+ base case size in 'rn'. */
+ sizp = sizes;
+ rn = n;
+ do {
+ *sizp = rn;
+ rn = (rn >> 1) + 1;
+ ++sizp;
+ } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
+
+ /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
+ dp += n;
+ ip += n;
+
+ /* Compute a base value of rn limbs. */
+ mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
+
+ TMP_MARK;
+
+ if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
+ {
+ mn = mpn_mulmod_bnm1_next_size (n + 1);
+ tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
+ }
+ /* Use Newton's iterations to get the desired precision.*/
+
+ while (1) {
+ n = *--sizp;
+ /*
+ v n v
+ +----+--+
+ ^ rn ^
+ */
+
+ /* Compute i_jd . */
+ if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
+ || ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
+ /* FIXME: We do only need {xp,n+1}*/
+ mpn_mul (xp, dp - n, n, ip - rn, rn);
+ mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
+ cy = CNST_LIMB(1); /* Remember we truncated, Mod B^(n+1) */
+ /* We computed (truncated) {xp,n+1} <- 1.{ip,rn} * 0.{dp,n} */
+ } else { /* Use B^mn-1 wraparound */
+ mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
+ /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
+ /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
+ /* Add dp*B^rn mod (B^mn-1) */
+ ASSERT (n >= mn - rn);
+ cy = mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
+ cy = mpn_add_nc (xp, xp, dp - (n - (mn - rn)), n - (mn - rn), cy);
+ /* Subtract B^{rn+n}, maybe only compensate the carry*/
+ xp[mn] = CNST_LIMB (1); /* set a limit for DECR_U */
+ MPN_DECR_U (xp + rn + n - mn, 2 * mn + 1 - rn - n, CNST_LIMB (1) - cy);
+ MPN_DECR_U (xp, mn, CNST_LIMB (1) - xp[mn]); /* if DECR_U eroded xp[mn] */
+ cy = CNST_LIMB(0); /* Remember we are working Mod B^mn-1 */
+ }
+
+ if (xp[n] < CNST_LIMB (2)) { /* "positive" residue class */
+ cy = xp[n]; /* 0 <= cy <= 1 here. */
+#if HAVE_NATIVE_mpn_sublsh1_n
+ if (cy++) {
+ if (mpn_cmp (xp, dp - n, n) > 0) {
+ mp_limb_t chk;
+ chk = mpn_sublsh1_n (xp, xp, dp - n, n);
+ ASSERT (chk == xp[n]);
+ ++ cy;
+ } else
+ ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+ }
+#else /* no mpn_sublsh1_n*/
+ if (cy++ && !mpn_sub_n (xp, xp, dp - n, n)) {
+ ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+ ++cy;
+ }
+#endif
+ /* 1 <= cy <= 3 here. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+ if (mpn_cmp (xp, dp - n, n) > 0) {
+ ASSERT_NOCARRY (mpn_rsblsh1_n (xp + n, xp, dp - n, n));
+ ++cy;
+ } else
+ ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#else /* no mpn_rsblsh1_n*/
+ if (mpn_cmp (xp, dp - n, n) > 0) {
+ ASSERT_NOCARRY (mpn_sub_n (xp, xp, dp - n, n));
+ ++cy;
+ }
+ ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#endif
+ MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 4 here. */
+ } else { /* "negative" residue class */
+ ASSERT (xp[n] >= GMP_NUMB_MAX - CNST_LIMB(1));
+ MPN_DECR_U(xp, n + 1, cy);
+ if (xp[n] != GMP_NUMB_MAX) {
+ MPN_INCR_U(ip - rn, rn, CNST_LIMB (1));
+ ASSERT_CARRY (mpn_add_n (xp, xp, dp - n, n));
+ }
+ mpn_com (xp + 2 * n - rn, xp + n - rn, rn);
+ }
+
+ /* Compute x_ju_j. FIXME:We need {xp+rn,rn}, mulhi? */
+ mpn_mul_n (xp, xp + 2 * n - rn, ip - rn, rn);
+ cy = mpn_add_n (xp + rn, xp + rn, xp + 2 * n - rn, 2 * rn - n);
+ cy = mpn_add_nc (ip - n, xp + 3 * rn - n, xp + n + rn, n - rn, cy);
+ MPN_INCR_U (ip - rn, rn, cy);
+ if (sizp == sizes) { /* Get out of the cycle */
+ /* Check for possible carry propagation from below. */
+ cy = xp[3 * rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
+ /* cy = mpn_add_1 (xp + rn, xp + rn, 2*rn - n, 4); */
+ break;
+ }
+ rn = n;
+ }
+ TMP_FREE;
+
+ return cy;
+#undef xp
+}
+
+mp_limb_t
+mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+ ASSERT (n > 0);
+ ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+ ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+ ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+ ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+ if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
+ return mpn_bc_invertappr (ip, dp, n, scratch);
+ else
+ return mpn_ni_invertappr (ip, dp, n, scratch);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/jacbase.c b/vendor/gmp-6.3.0/mpn/generic/jacbase.c
new file mode 100644
index 0000000..391ceac
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/jacbase.c
@@ -0,0 +1,242 @@
+/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
+
+ THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
+ INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
+
+Copyright 1999-2002, 2010, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Use the simple loop by default. The generic count_trailing_zeros is not
+ very fast, and the extra trickery of method 3 has proven to be less use
+ than might have been though. */
+#ifndef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 2
+#endif
+
+
+/* Use count_trailing_zeros. */
+#if JACOBI_BASE_METHOD == 1
+#define PROCESS_TWOS_ANY \
+ { \
+ mp_limb_t twos; \
+ count_trailing_zeros (twos, a); \
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \
+ a >>= twos; \
+ }
+#define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY
+#endif
+
+/* Use a simple loop. A disadvantage of this is that there's a branch on a
+ 50/50 chance of a 0 or 1 low bit. */
+#if JACOBI_BASE_METHOD == 2
+#define PROCESS_TWOS_EVEN \
+ { \
+ int two; \
+ two = JACOBI_TWO_U_BIT1 (b); \
+ do \
+ { \
+ a >>= 1; \
+ result_bit1 ^= two; \
+ ASSERT (a != 0); \
+ } \
+ while ((a & 1) == 0); \
+ }
+#define PROCESS_TWOS_ANY \
+ if ((a & 1) == 0) \
+ PROCESS_TWOS_EVEN;
+#endif
+
+/* Process one bit arithmetically, then a simple loop. This cuts the loop
+ condition down to a 25/75 chance, which should branch predict better.
+ The CPU will need a reasonable variable left shift. */
+#if JACOBI_BASE_METHOD == 3
+#define PROCESS_TWOS_EVEN \
+ { \
+ int two, mask, shift; \
+ \
+ two = JACOBI_TWO_U_BIT1 (b); \
+ mask = (~a & 2); \
+ a >>= 1; \
+ \
+ shift = (~a & 1); \
+ a >>= shift; \
+ result_bit1 ^= two ^ (two & mask); \
+ \
+ while ((a & 1) == 0) \
+ { \
+ a >>= 1; \
+ result_bit1 ^= two; \
+ ASSERT (a != 0); \
+ } \
+ }
+#define PROCESS_TWOS_ANY \
+ { \
+ int two, mask, shift; \
+ \
+ two = JACOBI_TWO_U_BIT1 (b); \
+ shift = (~a & 1); \
+ a >>= shift; \
+ \
+ mask = shift << 1; \
+ result_bit1 ^= (two & mask); \
+ \
+ while ((a & 1) == 0) \
+ { \
+ a >>= 1; \
+ result_bit1 ^= two; \
+ ASSERT (a != 0); \
+ } \
+ }
+#endif
+
+#if JACOBI_BASE_METHOD < 4
+/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
+ with a restricted range of inputs accepted, namely b>1, b odd.
+
+ The initial result_bit1 is taken as a parameter for the convenience of
+ mpz_kronecker_ui() et al. The sign changes both here and in those
+ routines accumulate nicely in bit 1, see the JACOBI macros.
+
+ The return value here is the normal +1, 0, or -1. Note that +1 and -1
+ have bit 1 in the "BIT1" sense, which could be useful if the caller is
+ accumulating it into some extended calculation.
+
+ Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
+ possible, but a couple of tests suggest it's not a significant speedup,
+ and may even be a slowdown, so what's here is good enough for now. */
+
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
+{
+ ASSERT (b & 1); /* b odd */
+ ASSERT (b != 1);
+
+ if (a == 0)
+ return 0;
+
+ PROCESS_TWOS_ANY;
+ if (a == 1)
+ goto done;
+
+ if (a >= b)
+ goto a_gt_b;
+
+ for (;;)
+ {
+ result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
+ MP_LIMB_T_SWAP (a, b);
+
+ a_gt_b:
+ do
+ {
+ /* working on (a/b), a,b odd, a>=b */
+ ASSERT (a & 1);
+ ASSERT (b & 1);
+ ASSERT (a >= b);
+
+ if ((a -= b) == 0)
+ return 0;
+
+ PROCESS_TWOS_EVEN;
+ if (a == 1)
+ goto done;
+ }
+ while (a >= b);
+ }
+
+ done:
+ return JACOBI_BIT1_TO_PN (result_bit1);
+}
+#endif
+
+#if JACOBI_BASE_METHOD == 4
+/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
+ * parameter. We have no need for the convention that the sign is in
+ * bit 1, internally we use bit 0. */
+
+/* FIXME: Could try table-based count_trailing_zeros. */
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
+{
+ int c;
+
+ ASSERT (b & 1);
+ ASSERT (b > 1);
+
+ if (a == 0)
+ /* This is the only line which depends on b > 1 */
+ return 0;
+
+ bit >>= 1;
+
+ /* Below, we represent a and b shifted right so that the least
+ significant one bit is implicit. */
+
+ b >>= 1;
+
+ count_trailing_zeros (c, a);
+ bit ^= c & (b ^ (b >> 1));
+
+ /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
+ a >>= c;
+ a >>= 1;
+
+ do
+ {
+ mp_limb_t t = a - b;
+ mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
+
+ if (t == 0)
+ return 0;
+
+ /* If b > a, invoke reciprocity */
+ bit ^= (bgta & a & b);
+
+ /* b <-- min (a, b) */
+ b += (bgta & t);
+
+ /* a <-- |a - b| */
+ a = (t ^ bgta) - bgta;
+
+ /* Number of trailing zeros is the same no matter if we look at
+ * t or a, but using t gives more parallelism. */
+ count_trailing_zeros (c, t);
+ c ++;
+ /* (2/b) = -1 if b = 3 or 5 mod 8 */
+ bit ^= c & (b ^ (b >> 1));
+ a >>= c;
+ }
+ while (a > 0);
+
+ return 1-2*(bit & 1);
+}
+#endif /* JACOBI_BASE_METHOD == 4 */
diff --git a/vendor/gmp-6.3.0/mpn/generic/jacobi.c b/vendor/gmp-6.3.0/mpn/generic/jacobi.c
new file mode 100644
index 0000000..d98b126
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/jacobi.c
@@ -0,0 +1,294 @@
+/* jacobi.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_DC_THRESHOLD
+#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
+#endif
+
+/* Schönhage's rules:
+ *
+ * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
+ *
+ * If r1 is odd, then
+ *
+ * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
+ *
+ * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
+ *
+ * If r1 is even, r2 must be odd. We have
+ *
+ * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
+ * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
+ * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
+ *
+ * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
+ * q1 times gives
+ *
+ * (r1 | r0) = (r1 | r2) = (r3 | r2)
+ *
+ * On the other hand, if r1 = 2 (mod 4), the sign factor is
+ * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
+ *
+ * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
+ * = q1 (r0-1)/2 + q1 (q1-1)/2
+ *
+ * and we can summarize the even case as
+ *
+ * (r1 | r0) = t(r1, r0, q1) (r3 | r2)
+ *
+ * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
+ *
+ * What about termination? The remainder sequence ends with (0|1) = 1
+ * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
+ * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
+ * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
+ *
+ * Examples: (11|15) = - (15|11) = - (4|11)
+ * (4|11) = (4| 3) = (1| 3)
+ * (1| 3) = (3|1) = (0|1) = 1
+ *
+ * (2|7) = (2|1) = (0|1) = 1
+ *
+ * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
+ * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3)
+ * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
+ *
+ */
+
+/* In principle, the state consists of four variables: e (one bit), a,
+ b (two bits each), d (one bit). Collected factors are (-1)^e. a and
+ b are the least significant bits of the current remainders. d
+ (denominator) is 0 if we're currently subtracting multiplies of a
+ from b, and 1 if we're subtracting b from a.
+
+ e is stored in the least significant bit, while a, b and d are
+ coded as only 13 distinct values in bits 1-4, according to the
+ following table. For rows not mentioning d, the value is either
+ implied, or it doesn't matter. */
+
+#if WANT_ASSERT
+static const struct
+{
+ unsigned char a;
+ unsigned char b;
+} decode_table[13] = {
+ /* 0 */ { 0, 1 },
+ /* 1 */ { 0, 3 },
+ /* 2 */ { 1, 1 },
+ /* 3 */ { 1, 3 },
+ /* 4 */ { 2, 1 },
+ /* 5 */ { 2, 3 },
+ /* 6 */ { 3, 1 },
+ /* 7 */ { 3, 3 }, /* d = 1 */
+ /* 8 */ { 1, 0 },
+ /* 9 */ { 1, 2 },
+ /* 10 */ { 3, 0 },
+ /* 11 */ { 3, 2 },
+ /* 12 */ { 3, 3 }, /* d = 0 */
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+#endif /* WANT_ASSERT */
+
+const unsigned char jacobi_table[208] = {
+#include "jacobitab.h"
+};
+
+#define BITS_FAIL 31
+
+static void
+jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ unsigned *bitsp = (unsigned *) p;
+
+ if (gp)
+ {
+ ASSERT (gn > 0);
+ if (gn != 1 || gp[0] != 1)
+ {
+ *bitsp = BITS_FAIL;
+ return;
+ }
+ }
+
+ if (qp)
+ {
+ ASSERT (qn > 0);
+ ASSERT (d >= 0);
+ *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
+ }
+}
+
+#define CHOOSE_P(n) (2*(n) / 3)
+
+int
+mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
+{
+ mp_size_t scratch;
+ mp_size_t matrix_scratch;
+ mp_ptr tp;
+
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+ ASSERT ( (bp[0] | ap[0]) & 1);
+
+ /* FIXME: Check for small sizes first, before setting up temporary
+ storage etc. */
+ scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+ if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+ {
+ mp_size_t hgcd_scratch;
+ mp_size_t update_scratch;
+ mp_size_t p = CHOOSE_P (n);
+ mp_size_t dc_scratch;
+
+ matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+ hgcd_scratch = mpn_hgcd_itch (n - p);
+ update_scratch = p + n - 1;
+
+ dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+ if (dc_scratch > scratch)
+ scratch = dc_scratch;
+ }
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS(scratch);
+
+ while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+ {
+ struct hgcd_matrix M;
+ mp_size_t p = 2*n/3;
+ mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+ mp_size_t nn;
+ mpn_hgcd_matrix_init (&M, n - p, tp);
+
+ nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
+ tp + matrix_scratch);
+ if (nn > 0)
+ {
+ ASSERT (M.n <= (n - p - 1)/2);
+ ASSERT (M.n + p <= (p + n - 1) / 2);
+ /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+ }
+ else
+ {
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
+ if (!n)
+ {
+ TMP_FREE;
+ return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+ }
+ }
+ }
+
+ while (n > 2)
+ {
+ struct hgcd_matrix1 M;
+ mp_limb_t ah, al, bh, bl;
+ mp_limb_t mask;
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_nhgcd2 step */
+ if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
+ {
+ n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+ MP_PTR_SWAP (ap, tp);
+ }
+ else
+ {
+ /* mpn_hgcd2 has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
+ if (!n)
+ {
+ TMP_FREE;
+ return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+ }
+ }
+ }
+
+ if (bits >= 16)
+ MP_PTR_SWAP (ap, bp);
+
+ ASSERT (bp[0] & 1);
+
+ if (n == 1)
+ {
+ mp_limb_t al, bl;
+ al = ap[0];
+ bl = bp[0];
+
+ TMP_FREE;
+ if (bl == 1)
+ return 1 - 2*(bits & 1);
+ else
+ return mpn_jacobi_base (al, bl, bits << 1);
+ }
+
+ else
+ {
+ int res = mpn_jacobi_2 (ap, bp, bits & 1);
+ TMP_FREE;
+ return res;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/jacobi_2.c b/vendor/gmp-6.3.0/mpn/generic/jacobi_2.c
new file mode 100644
index 0000000..028b8a4
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/jacobi_2.c
@@ -0,0 +1,351 @@
+/* jacobi_2.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_2_METHOD
+#define JACOBI_2_METHOD 2
+#endif
+
+/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
+ two-limb numbers. */
+#if JACOBI_2_METHOD == 1
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+ mp_limb_t ah, al, bh, bl;
+ int c;
+
+ al = ap[0];
+ ah = ap[1];
+ bl = bp[0];
+ bh = bp[1];
+
+ ASSERT (bl & 1);
+
+ bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
+ bh >>= 1;
+
+ if ( (bh | bl) == 0)
+ return 1 - 2*(bit & 1);
+
+ if ( (ah | al) == 0)
+ return 0;
+
+ if (al == 0)
+ {
+ al = ah;
+ ah = 0;
+ bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+ }
+ count_trailing_zeros (c, al);
+ bit ^= c & (bl ^ (bl >> 1));
+
+ c++;
+ if (UNLIKELY (c == GMP_NUMB_BITS))
+ {
+ al = ah;
+ ah = 0;
+ }
+ else
+ {
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ }
+ while ( (ah | bh) > 0)
+ {
+ mp_limb_t th, tl;
+ mp_limb_t bgta;
+
+ sub_ddmmss (th, tl, ah, al, bh, bl);
+ if ( (tl | th) == 0)
+ return 0;
+
+ bgta = LIMB_HIGHBIT_TO_MASK (th);
+
+ /* If b > a, invoke reciprocity */
+ bit ^= (bgta & al & bl);
+
+ /* b <-- min (a, b) */
+ add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
+
+ if ( (bh | bl) == 0)
+ return 1 - 2*(bit & 1);
+
+ /* a <-- |a - b| */
+ al = (bgta ^ tl) - bgta;
+ ah = (bgta ^ th);
+
+ if (UNLIKELY (al == 0))
+ {
+ /* If b > a, al == 0 implies that we have a carry to
+ propagate. */
+ al = ah - bgta;
+ ah = 0;
+ bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+ }
+ count_trailing_zeros (c, al);
+ c++;
+ bit ^= c & (bl ^ (bl >> 1));
+
+ if (UNLIKELY (c == GMP_NUMB_BITS))
+ {
+ al = ah;
+ ah = 0;
+ }
+ else
+ {
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ }
+ }
+
+ ASSERT (bl > 0);
+
+ while ( (al | bl) & GMP_LIMB_HIGHBIT)
+ {
+ /* Need an extra comparison to get the mask. */
+ mp_limb_t t = al - bl;
+ mp_limb_t bgta = - (bl > al);
+
+ if (t == 0)
+ return 0;
+
+ /* If b > a, invoke reciprocity */
+ bit ^= (bgta & al & bl);
+
+ /* b <-- min (a, b) */
+ bl += (bgta & t);
+
+ /* a <-- |a - b| */
+ al = (t ^ bgta) - bgta;
+
+ /* Number of trailing zeros is the same no matter if we look at
+ * t or a, but using t gives more parallelism. */
+ count_trailing_zeros (c, t);
+ c ++;
+ /* (2/b) = -1 if b = 3 or 5 mod 8 */
+ bit ^= c & (bl ^ (bl >> 1));
+
+ if (UNLIKELY (c == GMP_NUMB_BITS))
+ return 1 - 2*(bit & 1);
+
+ al >>= c;
+ }
+
+ /* Here we have a little impedance mismatch. Better to inline it? */
+ return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
+}
+#elif JACOBI_2_METHOD == 2
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+ mp_limb_t ah, al, bh, bl;
+ int c;
+
+ al = ap[0];
+ ah = ap[1];
+ bl = bp[0];
+ bh = bp[1];
+
+ ASSERT (bl & 1);
+
+ /* Use bit 1. */
+ bit <<= 1;
+
+ if (bh == 0 && bl == 1)
+ /* (a|1) = 1 */
+ return 1 - (bit & 2);
+
+ if (al == 0)
+ {
+ if (ah == 0)
+ /* (0|b) = 0, b > 1 */
+ return 0;
+
+ count_trailing_zeros (c, ah);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+ al = bl;
+ bl = ah >> c;
+
+ if (bl == 1)
+ /* (1|b) = 1 */
+ return 1 - (bit & 2);
+
+ ah = bh;
+
+ bit ^= al & bl;
+
+ goto b_reduced;
+ }
+ if ( (al & 1) == 0)
+ {
+ count_trailing_zeros (c, al);
+
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ }
+ if (ah == 0)
+ {
+ if (bh > 0)
+ {
+ bit ^= al & bl;
+ MP_LIMB_T_SWAP (al, bl);
+ ah = bh;
+ goto b_reduced;
+ }
+ goto ab_reduced;
+ }
+
+ while (bh > 0)
+ {
+ /* Compute (a|b) */
+ while (ah > bh)
+ {
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+ if (al == 0)
+ {
+ count_trailing_zeros (c, ah);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+ al = bl;
+ bl = ah >> c;
+ ah = bh;
+
+ bit ^= al & bl;
+ goto b_reduced;
+ }
+ count_trailing_zeros (c, al);
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ }
+ if (ah == bh)
+ goto cancel_hi;
+
+ if (ah == 0)
+ {
+ bit ^= al & bl;
+ MP_LIMB_T_SWAP (al, bl);
+ ah = bh;
+ break;
+ }
+
+ bit ^= al & bl;
+
+ /* Compute (b|a) */
+ while (bh > ah)
+ {
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+ if (bl == 0)
+ {
+ count_trailing_zeros (c, bh);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
+
+ bl = bh >> c;
+ bit ^= al & bl;
+ goto b_reduced;
+ }
+ count_trailing_zeros (c, bl);
+ bit ^= (c << 1) & (al ^ (al >> 1));
+ bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
+ bh >>= c;
+ }
+ bit ^= al & bl;
+
+ /* Compute (a|b) */
+ if (ah == bh)
+ {
+ cancel_hi:
+ if (al < bl)
+ {
+ MP_LIMB_T_SWAP (al, bl);
+ bit ^= al & bl;
+ }
+ al -= bl;
+ if (al == 0)
+ return 0;
+
+ count_trailing_zeros (c, al);
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ al >>= c;
+
+ if (al == 1)
+ return 1 - (bit & 2);
+
+ MP_LIMB_T_SWAP (al, bl);
+ bit ^= al & bl;
+ break;
+ }
+ }
+
+ b_reduced:
+ /* Compute (a|b), with b a single limb. */
+ ASSERT (bl & 1);
+
+ if (bl == 1)
+ /* (a|1) = 1 */
+ return 1 - (bit & 2);
+
+ while (ah > 0)
+ {
+ ah -= (al < bl);
+ al -= bl;
+ if (al == 0)
+ {
+ if (ah == 0)
+ return 0;
+ count_trailing_zeros (c, ah);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+ al = ah >> c;
+ goto ab_reduced;
+ }
+ count_trailing_zeros (c, al);
+
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ }
+ ab_reduced:
+ ASSERT (bl & 1);
+ ASSERT (bl > 1);
+
+ return mpn_jacobi_base (al, bl, bit);
+}
+#else
+#error Unsupported value for JACOBI_2_METHOD
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/logops_n.c b/vendor/gmp-6.3.0/mpn/generic/logops_n.c
new file mode 100644
index 0000000..3adba2c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/logops_n.c
@@ -0,0 +1,77 @@
+/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#ifdef OPERATION_and_n
+#define func __MPN(and_n)
+#define call mpn_and_n
+#endif
+
+#ifdef OPERATION_andn_n
+#define func __MPN(andn_n)
+#define call mpn_andn_n
+#endif
+
+#ifdef OPERATION_nand_n
+#define func __MPN(nand_n)
+#define call mpn_nand_n
+#endif
+
+#ifdef OPERATION_ior_n
+#define func __MPN(ior_n)
+#define call mpn_ior_n
+#endif
+
+#ifdef OPERATION_iorn_n
+#define func __MPN(iorn_n)
+#define call mpn_iorn_n
+#endif
+
+#ifdef OPERATION_nior_n
+#define func __MPN(nior_n)
+#define call mpn_nior_n
+#endif
+
+#ifdef OPERATION_xor_n
+#define func __MPN(xor_n)
+#define call mpn_xor_n
+#endif
+
+#ifdef OPERATION_xnor_n
+#define func __MPN(xnor_n)
+#define call mpn_xnor_n
+#endif
+
+void
+func (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ call (rp, up, vp, n);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/lshift.c b/vendor/gmp-6.3.0/mpn/generic/lshift.c
new file mode 100644
index 0000000..7e1fdef
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/lshift.c
@@ -0,0 +1,72 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+ and store the n least significant limbs of the result at rp.
+ Return the bits shifted out from the most significant limb.
+
+ Argument constraints:
+ 1. 0 < cnt < GMP_NUMB_BITS.
+ 2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+ mp_limb_t high_limb, low_limb;
+ unsigned int tnc;
+ mp_size_t i;
+ mp_limb_t retval;
+
+ ASSERT (n >= 1);
+ ASSERT (cnt >= 1);
+ ASSERT (cnt < GMP_NUMB_BITS);
+ ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+ up += n;
+ rp += n;
+
+ tnc = GMP_NUMB_BITS - cnt;
+ low_limb = *--up;
+ retval = low_limb >> tnc;
+ high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+
+ for (i = n - 1; i != 0; i--)
+ {
+ low_limb = *--up;
+ *--rp = high_limb | (low_limb >> tnc);
+ high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+ }
+ *--rp = high_limb;
+
+ return retval;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/lshiftc.c b/vendor/gmp-6.3.0/mpn/generic/lshiftc.c
new file mode 100644
index 0000000..a583602
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/lshiftc.c
@@ -0,0 +1,73 @@
+/* mpn_lshiftc -- Shift left low level with complement.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+ and store the n least significant limbs of the result at rp.
+ Return the bits shifted out from the most significant limb.
+
+ Argument constraints:
+ 1. 0 < cnt < GMP_NUMB_BITS.
+ 2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+ mp_limb_t high_limb, low_limb;
+ unsigned int tnc;
+ mp_size_t i;
+ mp_limb_t retval;
+
+ ASSERT (n >= 1);
+ ASSERT (cnt >= 1);
+ ASSERT (cnt < GMP_NUMB_BITS);
+ ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+ up += n;
+ rp += n;
+
+ tnc = GMP_NUMB_BITS - cnt;
+ low_limb = *--up;
+ retval = low_limb >> tnc;
+ high_limb = (low_limb << cnt);
+
+ for (i = n - 1; i != 0; i--)
+ {
+ low_limb = *--up;
+ *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+ high_limb = low_limb << cnt;
+ }
+ *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+ return retval;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/matrix22_mul.c b/vendor/gmp-6.3.0/mpn/generic/matrix22_mul.c
new file mode 100644
index 0000000..6a1299a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/matrix22_mul.c
@@ -0,0 +1,321 @@
+/* matrix22_mul.c.
+
+ Contributed by Niels Möller and Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define MUL(rp, ap, an, bp, bn) do { \
+ if (an >= bn) \
+ mpn_mul (rp, ap, an, bp, bn); \
+ else \
+ mpn_mul (rp, bp, bn, ap, an); \
+} while (0)
+
+/* Inputs are unsigned. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ int c;
+ MPN_CMP (c, ap, bp, n);
+ if (c >= 0)
+ {
+ mpn_sub_n (rp, ap, bp, n);
+ return 0;
+ }
+ else
+ {
+ mpn_sub_n (rp, bp, ap, n);
+ return 1;
+ }
+}
+
+static int
+add_signed_n (mp_ptr rp,
+ mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
+{
+ if (as != bs)
+ return as ^ abs_sub_n (rp, ap, bp, n);
+ else
+ {
+ ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
+ return as;
+ }
+}
+
+mp_size_t
+mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
+{
+ if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+ || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+ return 3*rn + 2*mn;
+ else
+ return 3*(rn + mn) + 5;
+}
+
+/* Algorithm:
+
+ / s0 \ / 1 0 0 0 \ / r0 \
+ | s1 | | 0 1 0 1 | | r1 |
+ | s2 | | 0 0 -1 1 | | r2 |
+ | s3 | = | 0 1 -1 1 | \ r3 /
+ | s4 | | -1 1 -1 1 |
+ | s5 | | 0 1 0 0 |
+ \ s6 / \ 0 0 1 0 /
+
+ / t0 \ / 1 0 0 0 \ / m0 \
+ | t1 | | 0 1 0 1 | | m1 |
+ | t2 | | 0 0 -1 1 | | m2 |
+ | t3 | = | 0 1 -1 1 | \ m3 /
+ | t4 | | -1 1 -1 1 |
+ | t5 | | 0 1 0 0 |
+ \ t6 / \ 0 0 1 0 /
+
+ Note: the two matrices above are the same, but s_i and t_i are used
+ in the same product, only for i<4, see "A Strassen-like Matrix
+ Multiplication suited for squaring and higher power computation" by
+ M. Bodrato, in Proceedings of ISSAC 2010.
+
+ / r0 \ / 1 0 0 0 0 1 0 \ / s0*t0 \
+ | r1 | = | 0 0 -1 1 -1 1 0 | | s1*t1 |
+ | r2 | | 0 1 0 -1 0 -1 -1 | | s2*t2 |
+ \ r3 / \ 0 1 1 -1 0 -1 0 / | s3*t3 |
+ | s4*t5 |
+ | s5*t6 |
+ \ s6*t4 /
+
+ The scheduling uses two temporaries U0 and U1 to store products, and
+ two, S0 and T0, to store combinations of entries of the two
+ operands.
+*/
+
+/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
+ *
+ * Resulting elements are of size up to rn + mn + 1.
+ *
+ * Temporary storage: 3 rn + 3 mn + 5. */
+static void
+mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+ mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+ mp_ptr tp)
+{
+ mp_ptr s0, t0, u0, u1;
+ int r1s, r3s, s0s, t0s, u1s;
+ s0 = tp; tp += rn + 1;
+ t0 = tp; tp += mn + 1;
+ u0 = tp; tp += rn + mn + 1;
+ u1 = tp; /* rn + mn + 2 */
+
+ MUL (u0, r1, rn, m2, mn); /* u5 = s5 * t6 */
+ r3s = abs_sub_n (r3, r3, r2, rn); /* r3 - r2 */
+ if (r3s)
+ {
+ r1s = abs_sub_n (r1, r1, r3, rn);
+ r1[rn] = 0;
+ }
+ else
+ {
+ r1[rn] = mpn_add_n (r1, r1, r3, rn);
+ r1s = 0; /* r1 - r2 + r3 */
+ }
+ if (r1s)
+ {
+ s0[rn] = mpn_add_n (s0, r1, r0, rn);
+ s0s = 0;
+ }
+ else if (r1[rn] != 0)
+ {
+ s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
+ s0s = 1; /* s4 = -r0 + r1 - r2 + r3 */
+ /* Reverse sign! */
+ }
+ else
+ {
+ s0s = abs_sub_n (s0, r0, r1, rn);
+ s0[rn] = 0;
+ }
+ MUL (u1, r0, rn, m0, mn); /* u0 = s0 * t0 */
+ r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
+ ASSERT (r0[rn+mn] < 2); /* u0 + u5 */
+
+ t0s = abs_sub_n (t0, m3, m2, mn);
+ u1s = r3s^t0s^1; /* Reverse sign! */
+ MUL (u1, r3, rn, t0, mn); /* u2 = s2 * t2 */
+ u1[rn+mn] = 0;
+ if (t0s)
+ {
+ t0s = abs_sub_n (t0, m1, t0, mn);
+ t0[mn] = 0;
+ }
+ else
+ {
+ t0[mn] = mpn_add_n (t0, t0, m1, mn);
+ }
+
+ /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
+ at r3. I'd expect that for matrices of random size, the high
+ words t0[mn] and r1[rn] are non-zero with a pretty small
+ probability. If that can be confirmed this should be done as an
+ unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
+ add_n. */
+ if (t0[mn] != 0)
+ {
+ MUL (r3, r1, rn, t0, mn + 1); /* u3 = s3 * t3 */
+ ASSERT (r1[rn] < 2);
+ if (r1[rn] != 0)
+ mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
+ }
+ else
+ {
+ MUL (r3, r1, rn + 1, t0, mn);
+ }
+
+ ASSERT (r3[rn+mn] < 4);
+
+ u0[rn+mn] = 0;
+ if (r1s^t0s)
+ {
+ r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
+ r3s = 0; /* u3 + u5 */
+ }
+
+ if (t0s)
+ {
+ t0[mn] = mpn_add_n (t0, t0, m0, mn);
+ }
+ else if (t0[mn] != 0)
+ {
+ t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
+ }
+ else
+ {
+ t0s = abs_sub_n (t0, t0, m0, mn);
+ }
+ MUL (u0, r2, rn, t0, mn + 1); /* u6 = s6 * t4 */
+ ASSERT (u0[rn+mn] < 2);
+ if (r1s)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
+ }
+ else
+ {
+ r1[rn] += mpn_add_n (r1, r1, r2, rn);
+ }
+ rn++;
+ t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
+ /* u3 + u5 + u6 */
+ ASSERT (r2[rn+mn-1] < 4);
+ r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
+ /* -u2 + u3 + u5 */
+ ASSERT (r3[rn+mn-1] < 3);
+ MUL (u0, s0, rn, m1, mn); /* u4 = s4 * t5 */
+ ASSERT (u0[rn+mn-1] < 2);
+ t0[mn] = mpn_add_n (t0, m3, m1, mn);
+ MUL (u1, r1, rn, t0, mn + 1); /* u1 = s1 * t1 */
+ mn += rn;
+ ASSERT (u1[mn-1] < 4);
+ ASSERT (u1[mn] == 0);
+ ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
+ /* -u2 + u3 - u4 + u5 */
+ ASSERT (r1[mn-1] < 2);
+ if (r3s)
+ {
+ ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
+ /* u1 + u2 - u3 - u5 */
+ }
+ ASSERT (r3[mn-1] < 2);
+ if (t0s)
+ {
+ ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
+ /* u1 - u3 - u5 - u6 */
+ }
+ ASSERT (r2[mn-1] < 2);
+}
+
+void
+mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+ mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+ mp_ptr tp)
+{
+ if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+ || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+ {
+ mp_ptr p0, p1;
+ unsigned i;
+
+ /* Temporary storage: 3 rn + 2 mn */
+ p0 = tp + rn;
+ p1 = p0 + rn + mn;
+
+ for (i = 0; i < 2; i++)
+ {
+ MPN_COPY (tp, r0, rn);
+
+ if (rn >= mn)
+ {
+ mpn_mul (p0, r0, rn, m0, mn);
+ mpn_mul (p1, r1, rn, m3, mn);
+ mpn_mul (r0, r1, rn, m2, mn);
+ mpn_mul (r1, tp, rn, m1, mn);
+ }
+ else
+ {
+ mpn_mul (p0, m0, mn, r0, rn);
+ mpn_mul (p1, m3, mn, r1, rn);
+ mpn_mul (r0, m2, mn, r1, rn);
+ mpn_mul (r1, m1, mn, tp, rn);
+ }
+ r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
+ r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
+
+ r0 = r2; r1 = r3;
+ }
+ }
+ else
+ mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
+ m0, m1, m2, m3, mn, tp);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/matrix22_mul1_inverse_vector.c b/vendor/gmp-6.3.0/mpn/generic/matrix22_mul1_inverse_vector.c
new file mode 100644
index 0000000..68d50b7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/matrix22_mul1_inverse_vector.c
@@ -0,0 +1,64 @@
+/* matrix22_mul1_inverse_vector.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+ the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
+ mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+ mp_limb_t h0, h1;
+
+ /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+ r = u11 * a
+ r -= u01 * b
+ b *= u00
+ b -= u10 * a
+ */
+
+ h0 = mpn_mul_1 (rp, ap, n, M->u[1][1]);
+ h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+ ASSERT (h0 == h1);
+
+ h0 = mpn_mul_1 (bp, bp, n, M->u[0][0]);
+ h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+ ASSERT (h0 == h1);
+
+ n -= (rp[n-1] | bp[n-1]) == 0;
+ return n;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mod_1.c b/vendor/gmp-6.3.0/mpn/generic/mod_1.c
new file mode 100644
index 0000000..f737bc2
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mod_1.c
@@ -0,0 +1,278 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012, 2020
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+ meaning the quotient size where that should happen, the quotient size
+ being how many udiv divisions will be done.
+
+ The default is to use preinv always, CPUs where this doesn't suit have
+ tuned thresholds. Note in particular that preinv should certainly be
+ used if that's the only division available (USE_PREINV_ALWAYS). */
+
+#ifndef MOD_1_NORM_THRESHOLD
+#define MOD_1_NORM_THRESHOLD 0
+#endif
+
+#ifndef MOD_1_UNNORM_THRESHOLD
+#define MOD_1_UNNORM_THRESHOLD 0
+#endif
+
+#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
+#endif
+
+#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
+#endif
+
+#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
+/* Duplicates declarations in tune/speed.h */
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+
+#undef mpn_mod_1_1p
+#define mpn_mod_1_1p(ap, n, b, pre) \
+ (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre) \
+ : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre) \
+ : __gmpn_mod_1_1p (ap, n, b, pre)))
+
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p_cps(pre, b) \
+ (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b) \
+ : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b) \
+ : __gmpn_mod_1_1p_cps (pre, b)))
+#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
+
+
+/* The comments in mpn/generic/divrem_1.c apply here too.
+
+ As noted in the algorithms section of the manual, the shifts in the loop
+ for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
+ by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can
+ skip a division where (a*2^n)%(d*2^n) can't then there's the same number
+ of divide steps, though how often that happens depends on the assumed
+ distributions of dividend and divisor. In any case this idea is left to
+ CPU specific implementations to consider. */
+
+static mp_limb_t
+mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+ mp_size_t i;
+ mp_limb_t n1, n0, r;
+ mp_limb_t dummy;
+ int cnt;
+
+ ASSERT (un > 0);
+ ASSERT (d != 0);
+
+ /* Skip a division if high < divisor. Having the test here before
+ normalizing will still skip as often as possible. */
+ r = up[un - 1];
+ if (r < d)
+ {
+ if (--un == 0)
+ return r;
+ }
+ else
+ r = 0;
+
+ d <<= GMP_NAIL_BITS;
+
+ /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
+ code above. */
+ if (! UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+ {
+ for (i = un - 1; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ udiv_qrnnd (dummy, r, r, n0, d);
+ r >>= GMP_NAIL_BITS;
+ }
+ return r;
+ }
+
+ count_leading_zeros (cnt, d);
+ d <<= cnt;
+
+ n1 = up[un - 1] << GMP_NAIL_BITS;
+ r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
+
+ if (UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+ {
+ mp_limb_t nshift;
+ for (i = un - 2; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd (dummy, r, r, nshift, d);
+ r >>= GMP_NAIL_BITS;
+ n1 = n0;
+ }
+ udiv_qrnnd (dummy, r, r, n1 << cnt, d);
+ r >>= GMP_NAIL_BITS;
+ return r >> cnt;
+ }
+ else
+ {
+ mp_limb_t inv, nshift;
+ invert_limb (inv, d);
+
+ for (i = un - 2; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, nshift, d, inv);
+ r >>= GMP_NAIL_BITS;
+ n1 = n0;
+ }
+ udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
+ r >>= GMP_NAIL_BITS;
+ return r >> cnt;
+ }
+}
+
+static mp_limb_t
+mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+ mp_size_t i;
+ mp_limb_t n0, r;
+ mp_limb_t dummy;
+
+ ASSERT (un > 0);
+
+ d <<= GMP_NAIL_BITS;
+
+ ASSERT (d & GMP_LIMB_HIGHBIT);
+
+ /* High limb is initial remainder, possibly with one subtract of
+ d to get r<d. */
+ r = up[un - 1] << GMP_NAIL_BITS;
+ if (r >= d)
+ r -= d;
+ r >>= GMP_NAIL_BITS;
+ un--;
+ if (un == 0)
+ return r;
+
+ if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
+ {
+ for (i = un - 1; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ udiv_qrnnd (dummy, r, r, n0, d);
+ r >>= GMP_NAIL_BITS;
+ }
+ return r;
+ }
+ else
+ {
+ mp_limb_t inv;
+ invert_limb (inv, d);
+ for (i = un - 1; i >= 0; i--)
+ {
+ n0 = up[i] << GMP_NAIL_BITS;
+ udiv_rnnd_preinv (r, r, n0, d, inv);
+ r >>= GMP_NAIL_BITS;
+ }
+ return r;
+ }
+}
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+ ASSERT (n >= 0);
+ ASSERT (b != 0);
+
+ /* Should this be handled at all? Rely on callers? Note un==0 is currently
+ required by mpz/fdiv_r_ui.c and possibly other places. */
+ if (n == 0)
+ return 0;
+
+ if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+ {
+ if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+ {
+ return mpn_mod_1_norm (ap, n, b);
+ }
+ else
+ {
+ mp_limb_t pre[4];
+ mpn_mod_1_1p_cps (pre, b);
+ return mpn_mod_1_1p (ap, n, b, pre);
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+ {
+ return mpn_mod_1_unnorm (ap, n, b);
+ }
+ else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+ {
+ mp_limb_t pre[4];
+ mpn_mod_1_1p_cps (pre, b);
+ return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+ }
+ else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+ {
+ mp_limb_t pre[5];
+ mpn_mod_1s_2p_cps (pre, b);
+ return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+ }
+ else
+ {
+ mp_limb_t pre[7];
+ mpn_mod_1s_4p_cps (pre, b);
+ return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mod_1_1.c b/vendor/gmp-6.3.0/mpn/generic/mod_1_1.c
new file mode 100644
index 0000000..be199ff
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mod_1_1.c
@@ -0,0 +1,341 @@
+/* mpn_mod_1_1p (ap, n, b, cps)
+ Divide (ap,,n) by b. Return the single-limb remainder.
+
+ Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+ Based on a suggestion by Peter L. Montgomery.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2011, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1 /* need to make sure this is 2 for asm testing */
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %k2\n\t" \
+ "adc %4, %k1\n\t" \
+ "sbb %k0, %k0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %q2\n\t" \
+ "adc %4, %q1\n\t" \
+ "sbb %q0, %q0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addxcc %r3, %4, %1\n\t" \
+ "subx %%g0, %%g0, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addccc %r7, %8, %%g0\n\t" \
+ "addccc %r3, %4, %1\n\t" \
+ "clr %0\n\t" \
+ "movcs %%xcc, -1, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
+ "rJ" ((al) >> 32), "rI" ((bl) >> 32) \
+ __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_mssaaaa
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addxccc %r3, %4, %1\n\t" \
+ "clr %0\n\t" \
+ "movcs %%xcc, -1, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#endif
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add%I6c %2, %5, %6\n\t" \
+ "adde %1, %3, %4\n\t" \
+ "subfe %0, %0, %0\n\t" \
+ "nor %0, %0, %0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "r" (a1), "r" (b1), "%r" (a0), "rI" (b0) \
+ __CLOBBER_CC)
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "algr %2, %6\n\t" \
+ "alcgr %1, %4\n\t" \
+ "lghi %0, 0\n\t" \
+ "alcgr %0, %0\n\t" \
+ "lcgr %0, %0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "adds %2, %5, %6\n\t" \
+ "adcs %1, %3, %4\n\t" \
+ "movcc %0, #0\n\t" \
+ "movcs %0, #-1" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "adds %2, %x5, %6\n\t" \
+ "adcs %1, %x3, %x4\n\t" \
+ "csinv %0, xzr, xzr, cc\n\t" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rZ" (ah), "rZ" (bh), "%rZ" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (m) = - (__c1 + (__s1 < __c0)); \
+ } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B1modb, B2modb;
+ int cnt;
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ B1modb = -b;
+ if (LIKELY (cnt != 0))
+ B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+
+ /* In the normalized case, this can be simplified to
+ *
+ * B2modb = - b * bi;
+ * ASSERT (B2modb <= b); // NB: equality iff b = B/2
+ */
+ udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+ cps[3] = B2modb >> cnt;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+{
+ mp_limb_t rh, rl, bi, ph, pl, r;
+ mp_limb_t B1modb, B2modb;
+ mp_size_t i;
+ int cnt;
+ mp_limb_t mask;
+
+ ASSERT (n >= 2); /* fix tuneup.c if this is changed */
+
+ B1modb = bmodb[2];
+ B2modb = bmodb[3];
+
+ rl = ap[n - 1];
+ umul_ppmm (ph, pl, rl, B1modb);
+ add_ssaaaa (rh, rl, ph, pl, CNST_LIMB(0), ap[n - 2]);
+
+ for (i = n - 3; i >= 0; i -= 1)
+ {
+ /* rr = ap[i] < B
+ + LO(rr) * (B mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^2 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm (ph, pl, rl, B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i]);
+
+ umul_ppmm (rh, rl, rh, B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ cnt = bmodb[1];
+ bi = bmodb[0];
+
+ if (LIKELY (cnt != 0))
+ rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+
+ mask = -(mp_limb_t) (rh >= b);
+ rh -= mask & b;
+
+ udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B2modb;
+ int cnt;
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ if (LIKELY (cnt != 0))
+ {
+ mp_limb_t B1modb = -b;
+ B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+ }
+ B2modb = - b * bi;
+ ASSERT (B2modb <= b); // NB: equality iff b = B/2
+ cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+{
+ int cnt;
+ mp_limb_t bi, B1modb;
+ mp_limb_t r0, r1;
+ mp_limb_t r;
+
+ ASSERT (n >= 2); /* fix tuneup.c if this is changed */
+
+ r0 = ap[n-2];
+ r1 = ap[n-1];
+
+ if (n > 2)
+ {
+ mp_limb_t B2modb, B2mb;
+ mp_limb_t p0, p1;
+ mp_limb_t r2;
+ mp_size_t j;
+
+ B2modb = bmodb[3];
+ B2mb = B2modb - b;
+
+ umul_ppmm (p1, p0, r1, B2modb);
+ add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+ for (j = n-4; j >= 0; j--)
+ {
+ mp_limb_t cy;
+ /* mp_limb_t t = r0 + B2mb; */
+ umul_ppmm (p1, p0, r1, B2modb);
+
+ ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+ /* Alternative, for cmov: if (cy) r0 = t; */
+ r0 -= (-cy) & b;
+ add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+ }
+
+ r1 -= (r2 & b);
+ }
+
+ cnt = bmodb[1];
+
+ if (LIKELY (cnt != 0))
+ {
+ mp_limb_t t;
+ mp_limb_t B1modb = bmodb[2];
+
+ umul_ppmm (r1, t, r1, B1modb);
+ r0 += t;
+ r1 += (r0 < t);
+
+ /* Normalize */
+ r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+ r0 <<= cnt;
+
+ /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows that. */
+ }
+ else
+ {
+ mp_limb_t mask = -(mp_limb_t) (r1 >= b);
+ r1 -= mask & b;
+ }
+
+ bi = bmodb[0];
+
+ udiv_rnnd_preinv (r, r1, r0, b, bi);
+ return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 2 */
diff --git a/vendor/gmp-6.3.0/mpn/generic/mod_1_2.c b/vendor/gmp-6.3.0/mpn/generic/mod_1_2.c
new file mode 100644
index 0000000..b00d19e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mod_1_2.c
@@ -0,0 +1,148 @@
+/* mpn_mod_1s_2p (ap, n, b, cps)
+ Divide (ap,,n) by b. Return the single-limb remainder.
+ Requires that b < B / 2.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B1modb, B2modb, B3modb;
+ int cnt;
+
+ ASSERT (b <= (~(mp_limb_t) 0) / 2);
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+ cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+ cps[4] = B3modb >> cnt;
+
+#if WANT_ASSERT
+ {
+ int i;
+ b = cps[2];
+ for (i = 3; i <= 4; i++)
+ {
+ b += cps[i];
+ ASSERT (b >= cps[i]);
+ }
+ }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
+{
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+ mp_limb_t B1modb, B2modb, B3modb;
+ mp_size_t i;
+ int cnt;
+
+ ASSERT (n >= 1);
+
+ B1modb = cps[2];
+ B2modb = cps[3];
+ B3modb = cps[4];
+
+ if ((n & 1) != 0)
+ {
+ if (n == 1)
+ {
+ rl = ap[n - 1];
+ bi = cps[0];
+ cnt = cps[1];
+ udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
+ rl << cnt, b, bi);
+ return r >> cnt;
+ }
+
+ umul_ppmm (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+ umul_ppmm (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n--;
+ }
+ else
+ {
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ }
+
+ for (i = n - 4; i >= 0; i -= 2)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^2 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^3 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+ umul_ppmm (ch, cl, rl, B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (rh, rl, rh, B3modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+ cnt = cps[1];
+ bi = cps[0];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mod_1_3.c b/vendor/gmp-6.3.0/mpn/generic/mod_1_3.c
new file mode 100644
index 0000000..e4a908d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mod_1_3.c
@@ -0,0 +1,155 @@
+/* mpn_mod_1s_3p (ap, n, b, cps)
+ Divide (ap,,n) by b. Return the single-limb remainder.
+ Requires that b < B / 3.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb;
+ int cnt;
+
+ ASSERT (b <= (~(mp_limb_t) 0) / 3);
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+ cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+ cps[4] = B3modb >> cnt;
+
+ udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+ cps[5] = B4modb >> cnt;
+
+#if WANT_ASSERT
+ {
+ int i;
+ b = cps[2];
+ for (i = 3; i <= 5; i++)
+ {
+ b += cps[i];
+ ASSERT (b >= cps[i]);
+ }
+ }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
+{
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb;
+ mp_size_t i;
+ int cnt;
+
+ ASSERT (n >= 1);
+
+ B1modb = cps[2];
+ B2modb = cps[3];
+ B3modb = cps[4];
+ B4modb = cps[5];
+
+ /* We compute n mod 3 in a tricky way, which works except for when n is so
+ close to the maximum size that we don't need to support it. The final
+ cast to int is a workaround for HP cc. */
+ switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
+ {
+ case 0:
+ umul_ppmm (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+ umul_ppmm (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 3;
+ break;
+ default: /* n mod 3 = 1; (case 2)*/
+ rh = 0;
+ rl = ap[--n];
+ break;
+ case 1: /* n mod 3 = 2 */
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ n -= 2;
+ break;
+ }
+
+ for (i = n - 3; i >= 0; i -= 3)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^3 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^4 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+ umul_ppmm (ch, cl, ap[i + 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, rl, B3modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (rh, rl, rh, B4modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+ cnt = cps[1];
+ bi = cps[0];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mod_1_4.c b/vendor/gmp-6.3.0/mpn/generic/mod_1_4.c
new file mode 100644
index 0000000..80b42ba
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mod_1_4.c
@@ -0,0 +1,170 @@
+/* mpn_mod_1s_4p (ap, n, b, cps)
+ Divide (ap,,n) by b. Return the single-limb remainder.
+ Requires that b < B / 4.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ int cnt;
+
+ ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+ cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+ cps[4] = B3modb >> cnt;
+
+ udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+ cps[5] = B4modb >> cnt;
+
+ udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
+ cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+ {
+ int i;
+ b = cps[2];
+ for (i = 3; i <= 6; i++)
+ {
+ b += cps[i];
+ ASSERT (b >= cps[i]);
+ }
+ }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
+{
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ mp_size_t i;
+ int cnt;
+
+ ASSERT (n >= 1);
+
+ B1modb = cps[2];
+ B2modb = cps[3];
+ B3modb = cps[4];
+ B4modb = cps[5];
+ B5modb = cps[6];
+
+ switch (n & 3)
+ {
+ case 0:
+ umul_ppmm (ph, pl, ap[n - 3], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
+ umul_ppmm (ch, cl, ap[n - 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+ umul_ppmm (rh, rl, ap[n - 1], B3modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 4;
+ break;
+ case 1:
+ rh = 0;
+ rl = ap[n - 1];
+ n -= 1;
+ break;
+ case 2:
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ n -= 2;
+ break;
+ case 3:
+ umul_ppmm (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+ umul_ppmm (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 3;
+ break;
+ }
+
+ for (i = n - 4; i >= 0; i -= 4)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
+ + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+ umul_ppmm (ch, cl, ap[i + 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, ap[i + 3], B3modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, rl, B4modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (rh, rl, rh, B5modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+ cnt = cps[1];
+ bi = cps[0];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mod_34lsub1.c b/vendor/gmp-6.3.0/mpn/generic/mod_34lsub1.c
new file mode 100644
index 0000000..af9c6c6
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mod_34lsub1.c
@@ -0,0 +1,128 @@
+/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
+ The remainder is not fully reduced, it's any limb value congruent to
+ {p,n} modulo that divisor.
+
+ This implementation is only correct when GMP_NUMB_BITS is a multiple of
+ 4.
+
+ FIXME: If GMP_NAIL_BITS is some silly big value during development then
+ it's possible the carry accumulators c0,c1,c2 could overflow.
+
+ General notes:
+
+ The basic idea is to use a set of N accumulators (N=3 in this case) to
+ effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
+ by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
+ remainder mod 2^(GMP_NUMB_BITS*N/M)-1. N and M are chosen to give a good
+ set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
+
+ N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
+ a few more primes than a single accumulator N=1 does, and for no extra
+ cost (assuming the processor has a decent number of registers).
+
+ For strange nailified values of GMP_NUMB_BITS the idea would be to look
+ for what N and M give good primes. With GMP_NUMB_BITS not a power of 2
+ the choices for M may be opened up a bit. But such things are probably
+ best done in separate code, not grafted on here. */
+
+#if GMP_NUMB_BITS % 4 == 0
+
+#define B1 (GMP_NUMB_BITS / 4)
+#define B2 (B1 * 2)
+#define B3 (B1 * 3)
+
+#define M1 ((CNST_LIMB(1) << B1) - 1)
+#define M2 ((CNST_LIMB(1) << B2) - 1)
+#define M3 ((CNST_LIMB(1) << B3) - 1)
+
+#define LOW0(n) ((n) & M3)
+#define HIGH0(n) ((n) >> B3)
+
+#define LOW1(n) (((n) & M2) << B1)
+#define HIGH1(n) ((n) >> B2)
+
+#define LOW2(n) (((n) & M1) << B2)
+#define HIGH2(n) ((n) >> B1)
+
+#define PARTS0(n) (LOW0(n) + HIGH0(n))
+#define PARTS1(n) (LOW1(n) + HIGH1(n))
+#define PARTS2(n) (LOW2(n) + HIGH2(n))
+
+#define ADD(c,a,val) \
+ do { \
+ mp_limb_t new_c; \
+ ADDC_LIMB (new_c, a, a, val); \
+ (c) += new_c; \
+ } while (0)
+
+mp_limb_t
+mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
+{
+ mp_limb_t c0, c1, c2;
+ mp_limb_t a0, a1, a2;
+
+ ASSERT (n >= 1);
+ ASSERT (n/3 < GMP_NUMB_MAX);
+
+ a0 = a1 = a2 = 0;
+ c0 = c1 = c2 = 0;
+
+ while ((n -= 3) >= 0)
+ {
+ ADD (c0, a0, p[0]);
+ ADD (c1, a1, p[1]);
+ ADD (c2, a2, p[2]);
+ p += 3;
+ }
+
+ if (n != -3)
+ {
+ ADD (c0, a0, p[0]);
+ if (n != -2)
+ ADD (c1, a1, p[1]);
+ }
+
+ return
+ PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
+ + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/mode1o.c b/vendor/gmp-6.3.0/mpn/generic/mode1o.c
new file mode 100644
index 0000000..9ba0ae1
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mode1o.c
@@ -0,0 +1,235 @@
+/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Calculate an r satisfying
+
+ r*B^k + a - c == q*d
+
+ where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
+ (the caller won't know which), and q is the quotient (discarded). d must
+ be odd, c can be any limb value.
+
+ If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
+
+ This slightly strange function suits the initial Nx1 reduction for GCDs
+ or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving
+ -r == a mod d (by passing c=0). For a GCD the factor of -1 on r can be
+ ignored, or for the Jacobi symbol it can be accounted for. The function
+ also suits divisibility and congruence testing since if r=0 (or r=d) is
+ obtained then a==c mod d.
+
+
+ r is a bit like the remainder returned by mpn_divexact_by3c, and is the
+ sort of remainder mpn_divexact_1 might return. Like mpn_divexact_by3c, r
+ represents a borrow, since effectively quotient limbs are chosen so that
+ subtracting that multiple of d from src at each step will produce a zero
+ limb.
+
+ A long calculation can be done piece by piece from low to high by passing
+ the return value from one part as the carry parameter to the next part.
+ The effective final k becomes anything between size and size-n, if n
+ pieces are used.
+
+
+ A similar sort of routine could be constructed based on adding multiples
+ of d at each limb, much like redc in mpz_powm does. Subtracting however
+ has a small advantage that when subtracting to cancel out l there's never
+ a borrow into h, whereas using an addition would put a carry into h
+ depending whether l==0 or l!=0.
+
+
+ In terms of efficiency, this function is similar to a mul-by-inverse
+ mpn_mod_1. Both are essentially two multiplies and are best suited to
+ CPUs with low latency multipliers (in comparison to a divide instruction
+ at least.) But modexact has a few less supplementary operations, only
+ needs low part and high part multiplies, and has fewer working quantities
+ (helping CPUs with few registers).
+
+
+ In the main loop it will be noted that the new carry (call it r) is the
+ sum of the high product h and any borrow from l=s-c. If c<d then we will
+ have r<d too, for the following reasons. Let q=l*inverse be the quotient
+ limb, so that q*d = B*h + l, where B=2^GMP_NUMB_BITS. Now if h=d-1 then
+
+ l = q*d - B*(d-1) <= (B-1)*d - B*(d-1) = B-d
+
+ But if l=s-c produces a borrow when c<d, then l>=B-d+1 and hence will
+ never have h=d-1 and so r=h+borrow <= d-1.
+
+ When c>=d, on the other hand, h=d-1 can certainly occur together with a
+ borrow, thereby giving only r<=d, as per the function definition above.
+
+ As a design decision it's left to the caller to check for r=d if it might
+ be passing c>=d. Several applications have c<d initially so the extra
+ test is often unnecessary, for example the GCDs or a plain divisibility
+ d|a test will pass c=0.
+
+
+ The special case for size==1 is so that it can be assumed c<=d in the
+ high<=divisor test at the end. c<=d is only guaranteed after at least
+ one iteration of the main loop. There's also a decent chance one % is
+ faster than a binvert_limb, though that will depend on the processor.
+
+ A CPU specific implementation might want to omit the size==1 code or the
+ high<divisor test. mpn/x86/k6/mode1o.asm for instance finds neither
+ useful. */
+
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
+ mp_limb_t orig_c)
+{
+ mp_limb_t s, h, l, inverse, dummy, dmul, ret;
+ mp_limb_t c = orig_c;
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+ ASSERT (d & 1);
+ ASSERT_MPN (src, size);
+ ASSERT_LIMB (d);
+ ASSERT_LIMB (c);
+
+ if (size == 1)
+ {
+ s = src[0];
+ if (s > c)
+ {
+ l = s-c;
+ h = l % d;
+ if (h != 0)
+ h = d - h;
+ }
+ else
+ {
+ l = c-s;
+ h = l % d;
+ }
+ return h;
+ }
+
+
+ binvert_limb (inverse, d);
+ dmul = d << GMP_NAIL_BITS;
+
+ i = 0;
+ do
+ {
+ s = src[i];
+ SUBC_LIMB (c, l, s, c);
+ l = (l * inverse) & GMP_NUMB_MASK;
+ umul_ppmm (h, dummy, l, dmul);
+ c += h;
+ }
+ while (++i < size-1);
+
+
+ s = src[i];
+ if (s <= d)
+ {
+ /* With high<=d the final step can be a subtract and addback. If c==0
+ then the addback will restore to l>=0. If c==d then will get l==d
+ if s==0, but that's ok per the function definition. */
+
+ l = c - s;
+ if (c < s)
+ l += d;
+
+ ret = l;
+ }
+ else
+ {
+ /* Can't skip a divide, just do the loop code once more. */
+
+ SUBC_LIMB (c, l, s, c);
+ l = (l * inverse) & GMP_NUMB_MASK;
+ umul_ppmm (h, dummy, l, dmul);
+ c += h;
+ ret = c;
+ }
+
+ ASSERT (orig_c < d ? ret < d : ret <= d);
+ return ret;
+}
+
+
+
+#if 0
+
+/* The following is an alternate form that might shave one cycle on a
+ superscalar processor since it takes c+=h off the dependent chain,
+ leaving just a low product, high product, and a subtract.
+
+ This is for CPU specific implementations to consider. A special case for
+ high<divisor and/or size==1 can be added if desired.
+
+ Notice that c is only ever 0 or 1, since if s-c produces a borrow then
+ x=0xFF..FF and x-h cannot produce a borrow. The c=(x>s) could become
+ c=(x==0xFF..FF) too, if that helped. */
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
+{
+ mp_limb_t s, x, y, inverse, dummy, dmul, c1, c2;
+ mp_limb_t c = 0;
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+ ASSERT (d & 1);
+
+ binvert_limb (inverse, d);
+ dmul = d << GMP_NAIL_BITS;
+
+ for (i = 0; i < size; i++)
+ {
+ ASSERT (c==0 || c==1);
+
+ s = src[i];
+ SUBC_LIMB (c1, x, s, c);
+
+ SUBC_LIMB (c2, y, x, h);
+ c = c1 + c2;
+
+ y = (y * inverse) & GMP_NUMB_MASK;
+ umul_ppmm (h, dummy, y, dmul);
+ }
+
+ h += c;
+ return h;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/mu_bdiv_q.c b/vendor/gmp-6.3.0/mpn/generic/mu_bdiv_q.c
new file mode 100644
index 0000000..0ef3bd8
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mu_bdiv_q.c
@@ -0,0 +1,281 @@
+/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
+ storing the result in {qp,nn}. Overlap allowed between Q and N; all other
+ overlap disallowed.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ The idea of the algorithm used herein is to compute a smaller inverted value
+ than used in the standard Barrett algorithm, and thus save time in the
+ Newton iterations, and pay just a small price when using the inverted value
+ for developing quotient bits. This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+ D = {dp,dn}
+
+ Requirements: N >= D
+ D >= 1
+ D odd
+ dn >= 2
+ nn >= 2
+ scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
+
+ Write quotient to Q = {qp,nn}.
+
+ FIXME: When iterating, perhaps do the small step before loop, not after.
+ FIXME: Try to avoid the scalar divisions when computing inverse size.
+ FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In
+ particular, when dn==in, tp and rp could use the same space.
+ FIXME: Trim final quotient calculation to qn limbs of precision.
+*/
+static void
+mpn_mu_bdiv_q_old (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_size_t qn;
+ mp_size_t in;
+ int cy, c0;
+ mp_size_t tn, wn;
+
+ qn = nn;
+
+ ASSERT (dn >= 2);
+ ASSERT (qn >= 2);
+
+ if (qn > dn)
+ {
+ mp_size_t b;
+
+ /* |_______________________| dividend
+ |________| divisor */
+
+#define ip scratch /* in */
+#define rp (scratch + in) /* dn or rest >= binvert_itch(in) */
+#define tp (scratch + in + dn) /* dn+in or next_size(dn) */
+#define scratch_out (scratch + in + dn + tn) /* mulmod_bnm1_itch(next_size(dn)) */
+
+ /* Compute an inverse size that is a nice partition of the quotient. */
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+ /* Some notes on allocation:
+
+ When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+ limbs of R dies at that point. We could save memory by letting T live
+ just under R, and let the upper part of T expand into R. These changes
+ should reduce itch to perhaps 3dn.
+ */
+
+ mpn_binvert (ip, dp, in, rp);
+
+ cy = 0;
+
+ MPN_COPY (rp, np, dn);
+ np += dn;
+ mpn_mullo_n (qp, rp, ip, in);
+ qn -= in;
+
+ while (qn > in)
+ {
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+ wn = dn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ qp += in;
+ if (dn != in)
+ {
+ /* Subtract tp[dn-1...in] from partial remainder. */
+ cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+ if (cy == 2)
+ {
+ mpn_incr_u (tp + dn, 1);
+ cy = 1;
+ }
+ }
+ /* Subtract tp[dn+in-1...dn] from dividend. */
+ cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+ np += in;
+ mpn_mullo_n (qp, rp, ip, in);
+ qn -= in;
+ }
+
+ /* Generate last qn limbs.
+ FIXME: It should be possible to limit precision here, since qn is
+ typically somewhat smaller than dn. No big gains expected. */
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[qn+in-1...in] */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+ wn = dn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ qp += in;
+ if (dn != in)
+ {
+ cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+ if (cy == 2)
+ {
+ mpn_incr_u (tp + dn, 1);
+ cy = 1;
+ }
+ }
+
+ mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
+ mpn_mullo_n (qp, rp, ip, qn);
+
+#undef ip
+#undef rp
+#undef tp
+#undef scratch_out
+ }
+ else
+ {
+ /* |_______________________| dividend
+ |________________| divisor */
+
+#define ip scratch /* in */
+#define tp (scratch + in) /* qn+in or next_size(qn) or rest >= binvert_itch(in) */
+#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
+
+ /* Compute half-sized inverse. */
+ in = qn - (qn >> 1);
+
+ mpn_binvert (ip, dp, in, tp);
+
+ mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, qn, qp, in); /* mulhigh */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (qn);
+ mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
+ wn = qn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_cmp (tp, np, wn) < 0;
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ mpn_sub_n (tp, np + in, tp + in, qn - in);
+ mpn_mullo_n (qp + in, tp, ip, qn - in); /* high qn-in quotient limbs */
+
+#undef ip
+#undef tp
+#undef scratch_out
+ }
+}
+
+void
+mpn_mu_bdiv_q (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+{
+ mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
+ mpn_neg (qp, qp, nn);
+}
+
+mp_size_t
+mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+ mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+ mp_size_t b;
+
+ ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
+
+ qn = nn;
+
+ if (qn > dn)
+ {
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ {
+ tn = dn + in;
+ itch_out = 0;
+ }
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+ }
+ itches = dn + tn + itch_out;
+ }
+ else
+ {
+ in = qn - (qn >> 1);
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ {
+ tn = qn + in;
+ itch_out = 0;
+ }
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (qn);
+ itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
+ }
+ itches = tn + itch_out;
+ }
+
+ itch_binvert = mpn_binvert_itch (in);
+ return in + MAX (itches, itch_binvert);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c b/vendor/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c
new file mode 100644
index 0000000..540ad73
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c
@@ -0,0 +1,312 @@
+/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
+ where qn = nn-dn, storing the result in {qp,qn}. Overlap allowed between Q
+ and N; all other overlap disallowed.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ The idea of the algorithm used herein is to compute a smaller inverted value
+ than used in the standard Barrett algorithm, and thus save time in the
+ Newton iterations, and pay just a small price when using the inverted value
+ for developing quotient bits. This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+ D = {dp,dn}
+
+ Requirements: N >= D
+ D >= 1
+ D odd
+ dn >= 2
+ nn >= 2
+ scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
+
+ Write quotient to Q = {qp,nn-dn}.
+
+ FIXME: When iterating, perhaps do the small step before loop, not after.
+ FIXME: Try to avoid the scalar divisions when computing inverse size.
+ FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In
+ particular, when dn==in, tp and rp could use the same space.
+*/
+static mp_limb_t
+mpn_mu_bdiv_qr_old (mp_ptr qp,
+ mp_ptr rp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_size_t qn;
+ mp_size_t in;
+ mp_limb_t cy, c0;
+ mp_size_t tn, wn;
+
+ qn = nn - dn;
+
+ ASSERT (dn >= 2);
+ ASSERT (qn >= 2);
+
+ if (qn > dn)
+ {
+ mp_size_t b;
+
+ /* |_______________________| dividend
+ |________| divisor */
+
+#define ip scratch /* in */
+#define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+ /* Compute an inverse size that is a nice partition of the quotient. */
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+ /* Some notes on allocation:
+
+ When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+ limbs of R dies at that point. We could save memory by letting T live
+ just under R, and let the upper part of T expand into R. These changes
+ should reduce itch to perhaps 3dn.
+ */
+
+ mpn_binvert (ip, dp, in, tp);
+
+ MPN_COPY (rp, np, dn);
+ np += dn;
+ cy = 0;
+
+ while (qn > in)
+ {
+ mpn_mullo_n (qp, rp, ip, in);
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+ wn = dn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ qp += in;
+ qn -= in;
+
+ if (dn != in)
+ {
+ /* Subtract tp[dn-1...in] from partial remainder. */
+ cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+ if (cy == 2)
+ {
+ mpn_incr_u (tp + dn, 1);
+ cy = 1;
+ }
+ }
+ /* Subtract tp[dn+in-1...dn] from dividend. */
+ cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+ np += in;
+ }
+
+ /* Generate last qn limbs. */
+ mpn_mullo_n (qp, rp, ip, qn);
+
+ if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, qn); /* mulhi, need tp[qn+in-1...in] */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+ wn = dn + qn - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ if (dn != qn)
+ {
+ cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+ if (cy == 2)
+ {
+ mpn_incr_u (tp + dn, 1);
+ cy = 1;
+ }
+ }
+ return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+ }
+ else
+ {
+ /* |_______________________| dividend
+ |________________| divisor */
+
+#define ip scratch /* in */
+#define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+ /* Compute half-sized inverse. */
+ in = qn - (qn >> 1);
+
+ mpn_binvert (ip, dp, in, tp);
+
+ mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, in); /* mulhigh */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+ wn = dn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_sub_n (tp + tn, tp, np, wn);
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ qp += in;
+ qn -= in;
+
+ cy = mpn_sub_n (rp, np + in, tp + in, dn);
+ mpn_mullo_n (qp, rp, ip, qn); /* high qn quotient limbs */
+
+ if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, qn); /* mulhigh */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+ wn = dn + qn - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+ mpn_decr_u (tp + wn, c0);
+ }
+ }
+
+ cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+ if (cy == 2)
+ {
+ mpn_incr_u (tp + dn, 1);
+ cy = 1;
+ }
+ return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+ }
+}
+
+mp_limb_t
+mpn_mu_bdiv_qr (mp_ptr qp,
+ mp_ptr rp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
+
+ /* R' B^{qn} = U - Q' D
+ *
+ * Q = B^{qn} - Q' (assuming Q' != 0)
+ *
+ * R B^{qn} = U + Q D = U + B^{qn} D - Q' D
+ * = B^{qn} D + R'
+ */
+
+ if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
+ {
+ /* Zero quotient. */
+ ASSERT (cy == 0);
+ return 0;
+ }
+ else
+ {
+ mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
+ ASSERT (cy2 >= cy);
+
+ return cy2 - cy;
+ }
+}
+
+
+mp_size_t
+mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+ mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+ mp_size_t b;
+
+ ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
+
+ qn = nn - dn;
+
+ if (qn > dn)
+ {
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+ }
+ else
+ {
+ in = qn - (qn >> 1);
+ }
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ {
+ tn = dn + in;
+ itch_out = 0;
+ }
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn);
+ itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+ }
+
+ itch_binvert = mpn_binvert_itch (in);
+ itches = tn + itch_out;
+ return in + MAX (itches, itch_binvert);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mu_div_q.c b/vendor/gmp-6.3.0/mpn/generic/mu_div_q.c
new file mode 100644
index 0000000..44cfb40
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mu_div_q.c
@@ -0,0 +1,184 @@
+/* mpn_mu_div_q.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ The idea of the algorithm used herein is to compute a smaller inverted value
+ than used in the standard Barrett algorithm, and thus save time in the
+ Newton iterations, and pay just a small price when using the inverted value
+ for developing quotient bits. This algorithm was presented at ICMS 2006.
+*/
+
+/*
+ Things to work on:
+
+ 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is
+ probably close to optimal, except when mpn_mu_divappr_q fails.
+
+ 2. We used to fall back to mpn_mu_div_qr when we detect a possible
+ mpn_mu_divappr_q rounding problem, now we multiply and compare.
+ Unfortunately, since mpn_mu_divappr_q does not return the partial
+ remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could
+ solve that.
+
+ 3. The allocations done here should be made from the scratch area, which
+ then would need to be amended.
+*/
+
+#include <stdlib.h> /* for NULL */
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_mu_div_q (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_ptr tp, rp;
+ mp_size_t qn;
+ mp_limb_t cy, qh;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ qn = nn - dn;
+
+ tp = TMP_BALLOC_LIMBS (qn + 1);
+
+ if (qn >= dn) /* nn >= 2*dn + 1 */
+ {
+ /* |_______________________| dividend
+ |________| divisor */
+
+ rp = TMP_BALLOC_LIMBS (nn + 1);
+ MPN_COPY (rp + 1, np, nn);
+ rp[0] = 0;
+
+ qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
+ if (qh != 0)
+ mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
+
+ cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
+
+ if (UNLIKELY (cy != 0))
+ {
+ /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
+ canonically reduced, replace the returned value of B^(qn-dn)+eps
+ by the largest possible value. */
+ mp_size_t i;
+ for (i = 0; i < qn + 1; i++)
+ tp[i] = GMP_NUMB_MAX;
+ }
+
+ /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is
+ smaller than the max error, we cannot trust the quotient. */
+ if (tp[0] > 4)
+ {
+ MPN_COPY (qp, tp + 1, qn);
+ }
+ else
+ {
+ mp_limb_t cy;
+ mp_ptr pp;
+
+ pp = rp;
+ mpn_mul (pp, tp + 1, qn, dp, dn);
+
+ cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
+
+ if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+ qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+ else /* Same as above */
+ MPN_COPY (qp, tp + 1, qn);
+ }
+ }
+ else
+ {
+ /* |_______________________| dividend
+ |________________| divisor */
+
+ /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
+ here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only
+ the most significant dn-1 limbs will actually be read, but it is not
+ pretty. */
+
+ qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
+ dp + dn - (qn + 1), qn + 1, scratch);
+
+ /* The max error of mpn_mu_divappr_q is +4, but we get an additional
+ error from the divisor truncation. */
+ if (tp[0] > 6)
+ {
+ MPN_COPY (qp, tp + 1, qn);
+ }
+ else
+ {
+ mp_limb_t cy;
+
+ /* FIXME: a shorter product should be enough; we may use already
+ allocated space... */
+ rp = TMP_BALLOC_LIMBS (nn);
+ mpn_mul (rp, dp, dn, tp + 1, qn);
+
+ cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
+
+ if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+ qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+ else /* Same as above */
+ MPN_COPY (qp, tp + 1, qn);
+ }
+ }
+
+ TMP_FREE;
+ return qh;
+}
+
+mp_size_t
+mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+ mp_size_t qn;
+
+ qn = nn - dn;
+ if (qn >= dn)
+ {
+ return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
+ }
+ else
+ {
+ return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mu_div_qr.c b/vendor/gmp-6.3.0/mpn/generic/mu_div_qr.c
new file mode 100644
index 0000000..8b9c702
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mu_div_qr.c
@@ -0,0 +1,417 @@
+/* mpn_mu_div_qr, mpn_preinv_mu_div_qr.
+
+ Compute Q = floor(N / D) and R = N-QD. N is nn limbs and D is dn limbs and
+ must be normalized, and Q must be nn-dn limbs. The requirement that Q is
+ nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us to
+ let N be unmodified during the operation.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ The idea of the algorithm used herein is to compute a smaller inverted value
+ than used in the standard Barrett algorithm, and thus save time in the
+ Newton iterations, and pay just a small price when using the inverted value
+ for developing quotient bits. This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_divappr_q.c should be edited in sync.
+
+ Things to work on:
+
+ * This isn't optimal when the quotient isn't needed, as it might take a lot
+ of space. The computation is always needed, though, so there is no time to
+ save with special code.
+
+ * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+ demonstrated by the fact that the mpn_invertappr function's scratch needs
+ mean that we need to keep a large allocation long after it is needed.
+ Things are worse as mpn_mul_fft does not accept any scratch parameter,
+ which means we'll have a large memory hole while in mpn_mul_fft. In
+ general, a peak scratch need in the beginning of a function isn't
+ well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h> /* for NULL */
+#include "gmp-impl.h"
+
+
+/* FIXME: The MU_DIV_QR_SKEW_THRESHOLD was not analysed properly. It gives a
+ speedup according to old measurements, but does the decision mechanism
+ really make sense? It seem like the quotient between dn and qn might be
+ what we really should be checking. */
+#ifndef MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 100
+#endif
+
+#ifdef CHECK /* FIXME: Enable in minithres */
+#undef MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 1
+#endif
+
+
+static mp_limb_t mpn_mu_div_qr2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+static mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int);
+
+
+mp_limb_t
+mpn_mu_div_qr (mp_ptr qp,
+ mp_ptr rp,
+ mp_srcptr np,
+ mp_size_t nn,
+ mp_srcptr dp,
+ mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_size_t qn;
+ mp_limb_t cy, qh;
+
+ qn = nn - dn;
+ if (qn + MU_DIV_QR_SKEW_THRESHOLD < dn)
+ {
+ /* |______________|_ign_first__| dividend nn
+ |_______|_ign_first__| divisor dn
+
+ |______| quotient (prel) qn
+
+ |___________________| quotient * ignored-divisor-part dn-1
+ */
+
+ /* Compute a preliminary quotient and a partial remainder by dividing the
+ most significant limbs of each operand. */
+ qh = mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
+ np + nn - (2 * qn + 1), 2 * qn + 1,
+ dp + dn - (qn + 1), qn + 1,
+ scratch);
+
+ /* Multiply the quotient by the divisor limbs ignored above. */
+ if (dn - (qn + 1) > qn)
+ mpn_mul (scratch, dp, dn - (qn + 1), qp, qn); /* prod is dn-1 limbs */
+ else
+ mpn_mul (scratch, qp, qn, dp, dn - (qn + 1)); /* prod is dn-1 limbs */
+
+ if (qh)
+ cy = mpn_add_n (scratch + qn, scratch + qn, dp, dn - (qn + 1));
+ else
+ cy = 0;
+ scratch[dn - 1] = cy;
+
+ cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
+ cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
+ rp + nn - (2 * qn + 1),
+ scratch + nn - (2 * qn + 1),
+ qn + 1, cy);
+ if (cy)
+ {
+ qh -= mpn_sub_1 (qp, qp, qn, 1);
+ mpn_add_n (rp, rp, dp, dn);
+ }
+ }
+ else
+ {
+ qh = mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+ }
+
+ return qh;
+}
+
+static mp_limb_t
+mpn_mu_div_qr2 (mp_ptr qp,
+ mp_ptr rp,
+ mp_srcptr np,
+ mp_size_t nn,
+ mp_srcptr dp,
+ mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_size_t qn, in;
+ mp_limb_t cy, qh;
+ mp_ptr ip, tp;
+
+ ASSERT (dn > 1);
+
+ qn = nn - dn;
+
+ /* Compute the inverse size. */
+ in = mpn_mu_div_qr_choose_in (qn, dn, 0);
+ ASSERT (in <= dn);
+
+#if 1
+ /* This alternative inverse computation method gets slightly more accurate
+ results. FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+ not adapted (3) mpn_invertappr scratch needs not met. */
+ ip = scratch;
+ tp = scratch + in + 1;
+
+ /* compute an approximate inverse on (in+1) limbs */
+ if (dn == in)
+ {
+ MPN_COPY (tp + 1, dp, in);
+ tp[0] = 1;
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+ MPN_COPY_INCR (ip, ip + 1, in);
+ }
+ else
+ {
+ cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+ if (UNLIKELY (cy != 0))
+ MPN_ZERO (ip, in);
+ else
+ {
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+ MPN_COPY_INCR (ip, ip + 1, in);
+ }
+ }
+#else
+ /* This older inverse computation method gets slightly worse results than the
+ one above. */
+ ip = scratch;
+ tp = scratch + in;
+
+ /* Compute inverse of D to in+1 limbs, then round to 'in' limbs. Ideally the
+ inversion function should do this automatically. */
+ if (dn == in)
+ {
+ tp[in + 1] = 0;
+ MPN_COPY (tp + in + 2, dp, in);
+ mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+ }
+ else
+ {
+ mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+ }
+ cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+ if (UNLIKELY (cy != 0))
+ MPN_ZERO (tp + 1, in);
+ MPN_COPY (ip, tp + 1, in);
+#endif
+
+ qh = mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+
+ return qh;
+}
+
+mp_limb_t
+mpn_preinv_mu_div_qr (mp_ptr qp,
+ mp_ptr rp,
+ mp_srcptr np,
+ mp_size_t nn,
+ mp_srcptr dp,
+ mp_size_t dn,
+ mp_srcptr ip,
+ mp_size_t in,
+ mp_ptr scratch)
+{
+ mp_size_t qn;
+ mp_limb_t cy, cx, qh;
+ mp_limb_t r;
+ mp_size_t tn, wn;
+
+#define tp scratch
+#define scratch_out (scratch + tn)
+
+ qn = nn - dn;
+
+ np += qn;
+ qp += qn;
+
+ qh = mpn_cmp (np, dp, dn) >= 0;
+ if (qh != 0)
+ mpn_sub_n (rp, np, dp, dn);
+ else
+ MPN_COPY_INCR (rp, np, dn);
+
+ /* if (qn == 0) */ /* The while below handles this case */
+ /* return qh; */ /* Degenerate use. Should we allow this? */
+
+ while (qn > 0)
+ {
+ if (qn < in)
+ {
+ ip += in - qn;
+ in = qn;
+ }
+ np -= in;
+ qp -= in;
+
+ /* Compute the next block of quotient limbs by multiplying the inverse I
+ by the upper part of the partial remainder R. */
+ mpn_mul_n (tp, rp + dn - in, ip, in); /* mulhi */
+ cy = mpn_add_n (qp, tp + in, rp + dn - in, in); /* I's msb implicit */
+ ASSERT_ALWAYS (cy == 0);
+
+ qn -= in;
+
+ /* Compute the product of the quotient block and the divisor D, to be
+ subtracted from the partial remainder combined with new limbs from the
+ dividend N. We only really need the low dn+1 limbs. */
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, in); /* dn+in limbs, high 'in' cancels */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn + 1);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+ wn = dn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+ cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+ cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+ ASSERT_ALWAYS (cx >= cy);
+ mpn_incr_u (tp, cx - cy);
+ }
+ }
+
+ r = rp[dn - in] - tp[dn];
+
+ /* Subtract the product from the partial remainder combined with new
+ limbs from the dividend N, generating a new partial remainder R. */
+ if (dn != in)
+ {
+ cy = mpn_sub_n (tp, np, tp, in); /* get next 'in' limbs from N */
+ cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+ MPN_COPY (rp, tp, dn); /* FIXME: try to avoid this */
+ }
+ else
+ {
+ cy = mpn_sub_n (rp, np, tp, in); /* get next 'in' limbs from N */
+ }
+
+ STAT (int i; int err = 0;
+ static int errarr[5]; static int err_rec; static int tot);
+
+ /* Check the remainder R and adjust the quotient as needed. */
+ r -= cy;
+ while (r != 0)
+ {
+ /* We loop 0 times with about 69% probability, 1 time with about 31%
+ probability, 2 times with about 0.6% probability, if inverse is
+ computed as recommended. */
+ mpn_incr_u (qp, 1);
+ cy = mpn_sub_n (rp, rp, dp, dn);
+ r -= cy;
+ STAT (err++);
+ }
+ if (mpn_cmp (rp, dp, dn) >= 0)
+ {
+ /* This is executed with about 76% probability. */
+ mpn_incr_u (qp, 1);
+ cy = mpn_sub_n (rp, rp, dp, dn);
+ STAT (err++);
+ }
+
+ STAT (
+ tot++;
+ errarr[err]++;
+ if (err > err_rec)
+ err_rec = err;
+ if (tot % 0x10000 == 0)
+ {
+ for (i = 0; i <= err_rec; i++)
+ printf (" %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+ printf ("\n");
+ }
+ );
+ }
+
+ return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+ (a) dn < qn: in = ceil(qn / ceil(qn/dn))
+ (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+ (c) qn < dn/3: in = qn
+ In all cases we have in <= dn.
+ */
+static mp_size_t
+mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+ mp_size_t in;
+
+ if (k == 0)
+ {
+ mp_size_t b;
+ if (qn > dn)
+ {
+ /* Compute an inverse size that is a nice partition of the quotient. */
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+ }
+ else if (3 * qn > dn)
+ {
+ in = (qn - 1) / 2 + 1; /* b = 2 */
+ }
+ else
+ {
+ in = (qn - 1) / 1 + 1; /* b = 1 */
+ }
+ }
+ else
+ {
+ mp_size_t xn;
+ xn = MIN (dn, qn);
+ in = (xn - 1) / k + 1;
+ }
+
+ return in;
+}
+
+mp_size_t
+mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+ mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+ mp_size_t itch_preinv = mpn_preinv_mu_div_qr_itch (nn, dn, in);
+ mp_size_t itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+ ASSERT (itch_preinv >= itch_invapp);
+ return in + MAX (itch_invapp, itch_preinv);
+}
+
+mp_size_t
+mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
+{
+ mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+ return itch_local + itch_out;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c b/vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c
new file mode 100644
index 0000000..0ef7e03
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c
@@ -0,0 +1,368 @@
+/* mpn_mu_divappr_q, mpn_preinv_mu_divappr_q.
+
+ Compute Q = floor(N / D) + e. N is nn limbs, D is dn limbs and must be
+ normalized, and Q must be nn-dn limbs, 0 <= e <= 4. The requirement that Q
+ is nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us
+ to let N be unmodified during the operation.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ The idea of the algorithm used herein is to compute a smaller inverted value
+ than used in the standard Barrett algorithm, and thus save time in the
+ Newton iterations, and pay just a small price when using the inverted value
+ for developing quotient bits. This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
+
+ Things to work on:
+
+ * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+ demonstrated by the fact that the mpn_invertappr function's scratch needs
+ mean that we need to keep a large allocation long after it is needed.
+ Things are worse as mpn_mul_fft does not accept any scratch parameter,
+ which means we'll have a large memory hole while in mpn_mul_fft. In
+ general, a peak scratch need in the beginning of a function isn't
+ well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h> /* for NULL */
+#include "gmp-impl.h"
+
+static mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t,
+ mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+static mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int);
+
+mp_limb_t
+mpn_mu_divappr_q (mp_ptr qp,
+ mp_srcptr np,
+ mp_size_t nn,
+ mp_srcptr dp,
+ mp_size_t dn,
+ mp_ptr scratch)
+{
+ mp_size_t qn, in;
+ mp_limb_t cy, qh;
+ mp_ptr ip, tp;
+
+ ASSERT (dn > 1);
+
+ qn = nn - dn;
+
+ /* If Q is smaller than D, truncate operands. */
+ if (qn + 1 < dn)
+ {
+ np += dn - (qn + 1);
+ nn -= dn - (qn + 1);
+ dp += dn - (qn + 1);
+ dn = qn + 1;
+ }
+
+ /* Compute the inverse size. */
+ in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
+ ASSERT (in <= dn);
+
+#if 1
+ /* This alternative inverse computation method gets slightly more accurate
+ results. FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+ not adapted (3) mpn_invertappr scratch needs not met. */
+ ip = scratch;
+ tp = scratch + in + 1;
+
+ /* compute an approximate inverse on (in+1) limbs */
+ if (dn == in)
+ {
+ MPN_COPY (tp + 1, dp, in);
+ tp[0] = 1;
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+ MPN_COPY_INCR (ip, ip + 1, in);
+ }
+ else
+ {
+ cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+ if (UNLIKELY (cy != 0))
+ MPN_ZERO (ip, in);
+ else
+ {
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+ MPN_COPY_INCR (ip, ip + 1, in);
+ }
+ }
+#else
+ /* This older inverse computation method gets slightly worse results than the
+ one above. */
+ ip = scratch;
+ tp = scratch + in;
+
+ /* Compute inverse of D to in+1 limbs, then round to 'in' limbs. Ideally the
+ inversion function should do this automatically. */
+ if (dn == in)
+ {
+ tp[in + 1] = 0;
+ MPN_COPY (tp + in + 2, dp, in);
+ mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+ }
+ else
+ {
+ mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+ }
+ cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+ if (UNLIKELY (cy != 0))
+ MPN_ZERO (tp + 1, in);
+ MPN_COPY (ip, tp + 1, in);
+#endif
+
+ qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
+
+ return qh;
+}
+
+static mp_limb_t
+mpn_preinv_mu_divappr_q (mp_ptr qp,
+ mp_srcptr np,
+ mp_size_t nn,
+ mp_srcptr dp,
+ mp_size_t dn,
+ mp_srcptr ip,
+ mp_size_t in,
+ mp_ptr scratch)
+{
+ mp_size_t qn;
+ mp_limb_t cy, cx, qh;
+ mp_limb_t r;
+ mp_size_t tn, wn;
+
+#define rp scratch
+#define tp (scratch + dn)
+#define scratch_out (scratch + dn + tn)
+
+ qn = nn - dn;
+
+ np += qn;
+ qp += qn;
+
+ qh = mpn_cmp (np, dp, dn) >= 0;
+ if (qh != 0)
+ mpn_sub_n (rp, np, dp, dn);
+ else
+ MPN_COPY (rp, np, dn);
+
+ if (UNLIKELY (qn == 0))
+ return qh; /* Degenerate use. Should we allow this? */
+
+ for (;;) /* The exit condition (qn == 0) is verified in the loop. */
+ {
+ if (qn < in)
+ {
+ ip += in - qn;
+ in = qn;
+ }
+ np -= in;
+ qp -= in;
+
+ /* Compute the next block of quotient limbs by multiplying the inverse I
+ by the upper part of the partial remainder R. */
+ mpn_mul_n (tp, rp + dn - in, ip, in); /* mulhi */
+ cy = mpn_add_n (qp, tp + in, rp + dn - in, in); /* I's msb implicit */
+ ASSERT_ALWAYS (cy == 0);
+
+ qn -= in;
+ if (qn == 0)
+ break;
+
+ /* Compute the product of the quotient block and the divisor D, to be
+ subtracted from the partial remainder combined with new limbs from the
+ dividend N. We only really need the low dn limbs. */
+
+ if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+ mpn_mul (tp, dp, dn, qp, in); /* dn+in limbs, high 'in' cancels */
+ else
+ {
+ tn = mpn_mulmod_bnm1_next_size (dn + 1);
+ mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+ wn = dn + in - tn; /* number of wrapped limbs */
+ if (wn > 0)
+ {
+ cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+ cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+ cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+ ASSERT_ALWAYS (cx >= cy);
+ mpn_incr_u (tp, cx - cy);
+ }
+ }
+
+ r = rp[dn - in] - tp[dn];
+
+ /* Subtract the product from the partial remainder combined with new
+ limbs from the dividend N, generating a new partial remainder R. */
+ if (dn != in)
+ {
+ cy = mpn_sub_n (tp, np, tp, in); /* get next 'in' limbs from N */
+ cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+ MPN_COPY (rp, tp, dn); /* FIXME: try to avoid this */
+ }
+ else
+ {
+ cy = mpn_sub_n (rp, np, tp, in); /* get next 'in' limbs from N */
+ }
+
+ STAT (int i; int err = 0;
+ static int errarr[5]; static int err_rec; static int tot);
+
+ /* Check the remainder R and adjust the quotient as needed. */
+ r -= cy;
+ while (r != 0)
+ {
+ /* We loop 0 times with about 69% probability, 1 time with about 31%
+ probability, 2 times with about 0.6% probability, if inverse is
+ computed as recommended. */
+ mpn_incr_u (qp, 1);
+ cy = mpn_sub_n (rp, rp, dp, dn);
+ r -= cy;
+ STAT (err++);
+ }
+ if (mpn_cmp (rp, dp, dn) >= 0)
+ {
+ /* This is executed with about 76% probability. */
+ mpn_incr_u (qp, 1);
+ cy = mpn_sub_n (rp, rp, dp, dn);
+ STAT (err++);
+ }
+
+ STAT (
+ tot++;
+ errarr[err]++;
+ if (err > err_rec)
+ err_rec = err;
+ if (tot % 0x10000 == 0)
+ {
+ for (i = 0; i <= err_rec; i++)
+ printf (" %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+ printf ("\n");
+ }
+ );
+ }
+
+ /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
+ quotient. For now, just make sure the returned quotient is >= the real
+ quotient; add 3 with saturating arithmetic. */
+ qn = nn - dn;
+ cy += mpn_add_1 (qp, qp, qn, 3);
+ if (cy != 0)
+ {
+ if (qh != 0)
+ {
+ /* Return a quotient of just 1-bits, with qh set. */
+ mp_size_t i;
+ for (i = 0; i < qn; i++)
+ qp[i] = GMP_NUMB_MAX;
+ }
+ else
+ {
+ /* Propagate carry into qh. */
+ qh = 1;
+ }
+ }
+
+ return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+ (a) dn < qn: in = ceil(qn / ceil(qn/dn))
+ (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+ (c) qn < dn/3: in = qn
+ In all cases we have in <= dn.
+ */
+static mp_size_t
+mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+ mp_size_t in;
+
+ if (k == 0)
+ {
+ mp_size_t b;
+ if (qn > dn)
+ {
+ /* Compute an inverse size that is a nice partition of the quotient. */
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+ }
+ else if (3 * qn > dn)
+ {
+ in = (qn - 1) / 2 + 1; /* b = 2 */
+ }
+ else
+ {
+ in = (qn - 1) / 1 + 1; /* b = 1 */
+ }
+ }
+ else
+ {
+ mp_size_t xn;
+ xn = MIN (dn, qn);
+ in = (xn - 1) / k + 1;
+ }
+
+ return in;
+}
+
+mp_size_t
+mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+ mp_size_t qn, in, itch_local, itch_out, itch_invapp;
+
+ qn = nn - dn;
+ if (qn + 1 < dn)
+ {
+ dn = qn + 1;
+ }
+ in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
+
+ itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+ itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+ itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+ ASSERT (dn + itch_local + itch_out >= itch_invapp);
+ return in + MAX (dn + itch_local + itch_out, itch_invapp);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mul.c b/vendor/gmp-6.3.0/mpn/generic/mul.c
new file mode 100644
index 0000000..37444e9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mul.c
@@ -0,0 +1,441 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012,
+2014, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+#ifndef MUL_BASECASE_MAX_UN
+#define MUL_BASECASE_MAX_UN 500
+#endif
+
+/* Areas where the different toom algorithms can be called (extracted
+ from the t-toom*.c files, and ignoring small constant offsets):
+
+ 1/6 1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5 1 vn/un
+ 4/7 6/7
+ 6/11
+ |--------------------| toom22 (small)
+ || toom22 (large)
+ |xxxx| toom22 called
+ |-------------------------------------| toom32
+ |xxxxxxxxxxxxxxxx| | toom32 called
+ |------------| toom33
+ |x| toom33 called
+ |---------------------------------| | toom42
+ |xxxxxxxxxxxxxxxxxxxxxxxx| | toom42 called
+ |--------------------| toom43
+ |xxxxxxxxxx| toom43 called
+ |-----------------------------| toom52 (unused)
+ |--------| toom44
+ |xxxxxxxx| toom44 called
+ |--------------------| | toom53
+ |xxxxxx| toom53 called
+ |-------------------------| toom62 (unused)
+ |----------------| toom54 (unused)
+ |--------------------| toom63
+ |xxxxxxxxx| | toom63 called
+ |---------------------------------| toom6h
+ |xxxxxxxx| toom6h called
+ |-------------------------| toom8h (32 bit)
+ |------------------------------------------| toom8h (64 bit)
+ |xxxxxxxx| toom8h called
+*/
+
+#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
+#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
+
+/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
+ (pointed to by VP, with VN limbs), and store the result at PRODP. The
+ result is UN + VN limbs. Return the most significant limb of the result.
+
+ NOTE: The space pointed to by PRODP is overwritten before finished with U
+ and V, so overlap is an error.
+
+ Argument constraints:
+ 1. UN >= VN.
+ 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
+ the multiplier and the multiplicand. */
+
+/*
+ * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
+ ideal lines of the surrounding algorithms. Is that optimal?
+
+ * The toomX3 code now uses a structure similar to the one of toomX2, except
+ that it loops longer in the unbalanced case. The result is that the
+ remaining area might have un < vn. Should we fix the toomX2 code in a
+ similar way?
+
+ * The toomX3 code is used for the largest non-FFT unbalanced operands. It
+ therefore calls mpn_mul recursively for certain cases.
+
+ * Allocate static temp space using THRESHOLD variables (except for toom44
+ when !WANT_FFT). That way, we can typically have no TMP_ALLOC at all.
+
+ * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
+ have the same vn threshold. This is not true, we should actually use
+ mul_basecase for slightly larger operands for toom32 than for toom22, and
+ even larger for toom42.
+
+ * That problem is even more prevalent for toomX3. We therefore use special
+ THRESHOLD variables there.
+*/
+
+mp_limb_t
+mpn_mul (mp_ptr prodp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ ASSERT (un >= vn);
+ ASSERT (vn >= 1);
+ ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
+ ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
+
+ if (BELOW_THRESHOLD (un, MUL_TOOM22_THRESHOLD))
+ {
+ /* When un (and thus vn) is below the toom22 range, do mul_basecase.
+ Test un and not vn here not to thwart the un >> vn code below.
+ This special case is not necessary, but cuts the overhead for the
+ smallest operands. */
+ mpn_mul_basecase (prodp, up, un, vp, vn);
+ }
+ else if (un == vn)
+ {
+ mpn_mul_n (prodp, up, vp, un);
+ }
+ else if (vn < MUL_TOOM22_THRESHOLD)
+ { /* plain schoolbook multiplication */
+
+ /* Unless un is very large, or else if have an applicable mpn_mul_N,
+ perform basecase multiply directly. */
+ if (un <= MUL_BASECASE_MAX_UN
+#if HAVE_NATIVE_mpn_mul_2
+ || vn <= 2
+#else
+ || vn == 1
+#endif
+ )
+ mpn_mul_basecase (prodp, up, un, vp, vn);
+ else
+ {
+ /* We have un >> MUL_BASECASE_MAX_UN > vn. For better memory
+ locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
+ these pieces with the vp[] operand. After each such partial
+ multiplication (but the last) we copy the most significant vn
+ limbs into a temporary buffer since that part would otherwise be
+ overwritten by the next multiplication. After the next
+ multiplication, we add it back. This illustrates the situation:
+
+ -->vn<--
+ | |<------- un ------->|
+ _____________________|
+ X /|
+ /XX__________________/ |
+ _____________________ |
+ X / |
+ /XX__________________/ |
+ _____________________ |
+ / / |
+ /____________________/ |
+ ==================================================================
+
+ The parts marked with X are the parts whose sums are copied into
+ the temporary buffer. */
+
+ mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
+ mp_limb_t cy;
+ ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
+
+ mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+ prodp += MUL_BASECASE_MAX_UN;
+ MPN_COPY (tp, prodp, vn); /* preserve high triangle */
+ up += MUL_BASECASE_MAX_UN;
+ un -= MUL_BASECASE_MAX_UN;
+ while (un > MUL_BASECASE_MAX_UN)
+ {
+ mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+ cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+ mpn_incr_u (prodp + vn, cy);
+ prodp += MUL_BASECASE_MAX_UN;
+ MPN_COPY (tp, prodp, vn); /* preserve high triangle */
+ up += MUL_BASECASE_MAX_UN;
+ un -= MUL_BASECASE_MAX_UN;
+ }
+ if (un > vn)
+ {
+ mpn_mul_basecase (prodp, up, un, vp, vn);
+ }
+ else
+ {
+ ASSERT (un > 0);
+ mpn_mul_basecase (prodp, vp, vn, up, un);
+ }
+ cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+ mpn_incr_u (prodp + vn, cy);
+ }
+ }
+ else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
+ {
+ /* Use ToomX2 variants */
+ mp_ptr scratch;
+ TMP_SDECL; TMP_SMARK;
+
+#define ITCH_TOOMX2 (9 * vn / 2 + GMP_NUMB_BITS * 2)
+ scratch = TMP_SALLOC_LIMBS (ITCH_TOOMX2);
+ ASSERT (mpn_toom22_mul_itch ((5*vn-1)/4, vn) <= ITCH_TOOMX2); /* 5vn/2+ */
+ ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX2); /* 7vn/6+ */
+ ASSERT (mpn_toom42_mul_itch (3 * vn - 1, vn) <= ITCH_TOOMX2); /* 9vn/2+ */
+#undef ITCH_TOOMX2
+
+ /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
+ square to a (3vn-1)*vn rectangle. Leaving such a rectangle is hardly
+ wise; we would get better balance by slightly moving the bound. We
+ will sometimes end up with un < vn, like in the X3 arm below. */
+ if (un >= 3 * vn)
+ {
+ mp_limb_t cy;
+ mp_ptr ws;
+
+ /* The maximum ws usage is for the mpn_mul result. */
+ ws = TMP_SALLOC_LIMBS (4 * vn);
+
+ mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+ un -= 2 * vn;
+ up += 2 * vn;
+ prodp += 2 * vn;
+
+ while (un >= 3 * vn)
+ {
+ mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+ un -= 2 * vn;
+ up += 2 * vn;
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+ mpn_incr_u (prodp + vn, cy);
+ prodp += 2 * vn;
+ }
+
+ /* vn <= un < 3vn */
+
+ if (4 * un < 5 * vn)
+ mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+ else if (4 * un < 7 * vn)
+ mpn_toom32_mul (ws, up, un, vp, vn, scratch);
+ else
+ mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, un);
+ mpn_incr_u (prodp + vn, cy);
+ }
+ else
+ {
+ if (4 * un < 5 * vn)
+ mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+ else if (4 * un < 7 * vn)
+ mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+ else
+ mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+ }
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
+ BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
+ {
+ /* Handle the largest operands that are not in the FFT range. The 2nd
+ condition makes very unbalanced operands avoid the FFT code (except
+ perhaps as coefficient products of the Toom code. */
+
+ if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
+ {
+ /* Use ToomX3 variants */
+ mp_ptr scratch;
+ TMP_DECL; TMP_MARK;
+
+#define ITCH_TOOMX3 (4 * vn + GMP_NUMB_BITS)
+ scratch = TMP_ALLOC_LIMBS (ITCH_TOOMX3);
+ ASSERT (mpn_toom33_mul_itch ((7*vn-1)/6, vn) <= ITCH_TOOMX3); /* 7vn/2+ */
+ ASSERT (mpn_toom43_mul_itch ((3*vn-1)/2, vn) <= ITCH_TOOMX3); /* 9vn/4+ */
+ ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX3); /* 7vn/6+ */
+ ASSERT (mpn_toom53_mul_itch ((11*vn-1)/6, vn) <= ITCH_TOOMX3); /* 11vn/3+ */
+ ASSERT (mpn_toom42_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
+ ASSERT (mpn_toom63_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
+#undef ITCH_TOOMX3
+
+ if (2 * un >= 5 * vn)
+ {
+ mp_limb_t cy;
+ mp_ptr ws;
+
+ /* The maximum ws usage is for the mpn_mul result. */
+ ws = TMP_ALLOC_LIMBS (7 * vn >> 1);
+
+ if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+ mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+ else
+ mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
+ un -= 2 * vn;
+ up += 2 * vn;
+ prodp += 2 * vn;
+
+ while (2 * un >= 5 * vn) /* un >= 2.5vn */
+ {
+ if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+ mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+ else
+ mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
+ un -= 2 * vn;
+ up += 2 * vn;
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+ mpn_incr_u (prodp + vn, cy);
+ prodp += 2 * vn;
+ }
+
+ /* vn / 2 <= un < 2.5vn */
+
+ if (un < vn)
+ mpn_mul (ws, vp, vn, up, un);
+ else
+ mpn_mul (ws, up, un, vp, vn);
+
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, un);
+ mpn_incr_u (prodp + vn, cy);
+ }
+ else
+ {
+ if (6 * un < 7 * vn)
+ mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
+ else if (2 * un < 3 * vn)
+ {
+ if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
+ mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+ else
+ mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
+ }
+ else if (6 * un < 11 * vn)
+ {
+ if (4 * un < 7 * vn)
+ {
+ if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
+ mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+ else
+ mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
+ mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+ else
+ mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+ mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+ else
+ mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
+ }
+ }
+ TMP_FREE;
+ }
+ else
+ {
+ mp_ptr scratch;
+ TMP_DECL; TMP_MARK;
+
+ if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
+ {
+ scratch = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
+ mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
+ }
+ else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
+ {
+ scratch = TMP_SALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
+ mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
+ }
+ else
+ {
+ scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
+ mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
+ }
+ TMP_FREE;
+ }
+ }
+ else
+ {
+ if (un >= 8 * vn)
+ {
+ mp_limb_t cy;
+ mp_ptr ws;
+ TMP_DECL; TMP_MARK;
+
+ /* The maximum ws usage is for the mpn_mul result. */
+ ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
+
+ mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
+ un -= 3 * vn;
+ up += 3 * vn;
+ prodp += 3 * vn;
+
+ while (2 * un >= 7 * vn) /* un >= 3.5vn */
+ {
+ mpn_fft_mul (ws, up, 3 * vn, vp, vn);
+ un -= 3 * vn;
+ up += 3 * vn;
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, 3 * vn);
+ mpn_incr_u (prodp + vn, cy);
+ prodp += 3 * vn;
+ }
+
+ /* vn / 2 <= un < 3.5vn */
+
+ if (un < vn)
+ mpn_mul (ws, vp, vn, up, un);
+ else
+ mpn_mul (ws, up, un, vp, vn);
+
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, un);
+ mpn_incr_u (prodp + vn, cy);
+
+ TMP_FREE;
+ }
+ else
+ mpn_fft_mul (prodp, up, un, vp, vn);
+ }
+
+ return prodp[un + vn - 1]; /* historic */
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mul_1.c b/vendor/gmp-6.3.0/mpn/generic/mul_1.c
new file mode 100644
index 0000000..52d46da
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mul_1.c
@@ -0,0 +1,96 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and store the
+ product in a second limb vector.
+
+Copyright 1991-1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t ul, cl, hpl, lpl;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+ cl = 0;
+ do
+ {
+ ul = *up++;
+ umul_ppmm (hpl, lpl, ul, vl);
+
+ lpl += cl;
+ cl = (lpl < cl) + hpl;
+
+ *rp++ = lpl;
+ }
+ while (--n != 0);
+
+ return cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t shifted_vl, ul, lpl, hpl, prev_hpl, xw, cl, xl;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (vl);
+
+ shifted_vl = vl << GMP_NAIL_BITS;
+ cl = 0;
+ prev_hpl = 0;
+ do
+ {
+ ul = *up++;
+
+ umul_ppmm (hpl, lpl, ul, shifted_vl);
+ lpl >>= GMP_NAIL_BITS;
+ xw = prev_hpl + lpl + cl;
+ cl = xw >> GMP_NUMB_BITS;
+ xl = xw & GMP_NUMB_MASK;
+ *rp++ = xl;
+ prev_hpl = hpl;
+ }
+ while (--n != 0);
+
+ return prev_hpl + cl;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/mul_basecase.c b/vendor/gmp-6.3.0/mpn/generic/mul_basecase.c
new file mode 100644
index 0000000..2487fba
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mul_basecase.c
@@ -0,0 +1,165 @@
+/* mpn_mul_basecase -- Internal routine to multiply two natural numbers
+ of length m and n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1991-1994, 1996, 1997, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Multiply {up,usize} by {vp,vsize} and write the result to
+ {prodp,usize+vsize}. Must have usize>=vsize.
+
+ Note that prodp gets usize+vsize limbs stored, even if the actual result
+ only needs usize+vsize-1.
+
+ There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
+ Currently this is allowed, but it might not be in the future.
+
+ This is the most critical code for multiplication. All multiplies rely
+ on this, both small and huge. Small ones arrive here immediately, huge
+ ones arrive here as this is the base case for Karatsuba's recursive
+ algorithm. */
+
+void
+mpn_mul_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ ASSERT (un >= vn);
+ ASSERT (vn >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));
+ ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));
+
+ /* We first multiply by the low order limb (or depending on optional function
+ availability, limbs). This result can be stored, not added, to rp. We
+ also avoid a loop for zeroing this way. */
+
+#if HAVE_NATIVE_mpn_mul_2
+ if (vn >= 2)
+ {
+ rp[un + 1] = mpn_mul_2 (rp, up, un, vp);
+ rp += 2, vp += 2, vn -= 2;
+ }
+ else
+ {
+ rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+ return;
+ }
+#else
+ rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+ rp += 1, vp += 1, vn -= 1;
+#endif
+
+ /* Now accumulate the product of up[] and the next higher limb (or depending
+ on optional function availability, limbs) from vp[]. */
+
+#define MAX_LEFT MP_SIZE_T_MAX /* Used to simplify loops into if statements */
+
+
+#if HAVE_NATIVE_mpn_addmul_6
+ while (vn >= 6)
+ {
+ rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp);
+ if (MAX_LEFT == 6)
+ return;
+ rp += 6, vp += 6, vn -= 6;
+ if (MAX_LEFT < 2 * 6)
+ break;
+ }
+#undef MAX_LEFT
+#define MAX_LEFT (6 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_5
+ while (vn >= 5)
+ {
+ rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp);
+ if (MAX_LEFT == 5)
+ return;
+ rp += 5, vp += 5, vn -= 5;
+ if (MAX_LEFT < 2 * 5)
+ break;
+ }
+#undef MAX_LEFT
+#define MAX_LEFT (5 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_4
+ while (vn >= 4)
+ {
+ rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);
+ if (MAX_LEFT == 4)
+ return;
+ rp += 4, vp += 4, vn -= 4;
+ if (MAX_LEFT < 2 * 4)
+ break;
+ }
+#undef MAX_LEFT
+#define MAX_LEFT (4 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_3
+ while (vn >= 3)
+ {
+ rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);
+ if (MAX_LEFT == 3)
+ return;
+ rp += 3, vp += 3, vn -= 3;
+ if (MAX_LEFT < 2 * 3)
+ break;
+ }
+#undef MAX_LEFT
+#define MAX_LEFT (3 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+ while (vn >= 2)
+ {
+ rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);
+ if (MAX_LEFT == 2)
+ return;
+ rp += 2, vp += 2, vn -= 2;
+ if (MAX_LEFT < 2 * 2)
+ break;
+ }
+#undef MAX_LEFT
+#define MAX_LEFT (2 - 1)
+#endif
+
+ while (vn >= 1)
+ {
+ rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+ if (MAX_LEFT == 1)
+ return;
+ rp += 1, vp += 1, vn -= 1;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mul_fft.c b/vendor/gmp-6.3.0/mpn/generic/mul_fft.c
new file mode 100644
index 0000000..76a2106
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mul_fft.c
@@ -0,0 +1,1105 @@
+/* Schoenhage's fast multiplication modulo 2^N+1.
+
+ Contributed by Paul Zimmermann.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1998-2010, 2012, 2013, 2018, 2020, 2022 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* References:
+
+ Schnelle Multiplikation grosser Zahlen, by Arnold Schoenhage and Volker
+ Strassen, Computing 7, p. 281-292, 1971.
+
+ Asymptotically fast algorithms for the numerical multiplication and division
+ of polynomials with complex coefficients, by Arnold Schoenhage, Computer
+ Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982.
+
+ Tapes versus Pointers, a study in implementing fast algorithms, by Arnold
+ Schoenhage, Bulletin of the EATCS, 30, p. 23-32, 1986.
+
+ TODO:
+
+ Implement some of the tricks published at ISSAC'2007 by Gaudry, Kruppa, and
+ Zimmermann.
+
+ It might be possible to avoid a small number of MPN_COPYs by using a
+ rotating temporary or two.
+
+ Cleanup and simplify the code!
+*/
+
+#ifdef TRACE
+#undef TRACE
+#define TRACE(x) x
+#include <stdio.h>
+#else
+#define TRACE(x)
+#endif
+
+#include "gmp-impl.h"
+
+#ifdef WANT_ADDSUB
+#include "generic/add_n_sub_n.c"
+#define HAVE_NATIVE_mpn_add_n_sub_n 1
+#endif
+
+static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
+ mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
+ mp_size_t, mp_size_t, int **, mp_ptr, int);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, mp_size_t, mp_size_t, mp_srcptr,
+ mp_size_t, mp_size_t, mp_size_t, mp_ptr);
+
+
+/* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
+ We have sqr=0 if for a multiply, sqr=1 for a square.
+ There are three generations of this code; we keep the old ones as long as
+ some gmp-mparam.h is not updated. */
+
+
+/*****************************************************************************/
+
+#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
+
+#ifndef FFT_TABLE3_SIZE /* When tuning this is defined in gmp-impl.h */
+#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
+#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
+#else
+#define FFT_TABLE3_SIZE SQR_FFT_TABLE3_SIZE
+#endif
+#endif
+#endif
+
+#ifndef FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE 200
+#endif
+
+FFT_TABLE_ATTRS struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE] =
+{
+ MUL_FFT_TABLE3,
+ SQR_FFT_TABLE3
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+ const struct fft_table_nk *fft_tab, *tab;
+ mp_size_t tab_n, thres;
+ int last_k;
+
+ fft_tab = mpn_fft_table3[sqr];
+ last_k = fft_tab->k;
+ for (tab = fft_tab + 1; ; tab++)
+ {
+ tab_n = tab->n;
+ thres = tab_n << last_k;
+ if (n <= thres)
+ break;
+ last_k = tab->k;
+ }
+ return last_k;
+}
+
+#define MPN_FFT_BEST_READY 1
+#endif
+
+/*****************************************************************************/
+
+#if ! defined (MPN_FFT_BEST_READY)
+FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] =
+{
+ MUL_FFT_TABLE,
+ SQR_FFT_TABLE
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+ int i;
+
+ for (i = 0; mpn_fft_table[sqr][i] != 0; i++)
+ if (n < mpn_fft_table[sqr][i])
+ return i + FFT_FIRST_K;
+
+ /* treat 4*last as one further entry */
+ if (i == 0 || n < 4 * mpn_fft_table[sqr][i - 1])
+ return i + FFT_FIRST_K;
+ else
+ return i + FFT_FIRST_K + 1;
+}
+#endif
+
+/*****************************************************************************/
+
+
+/* Returns smallest possible number of limbs >= pl for a fft of size 2^k,
+ i.e. smallest multiple of 2^k >= pl.
+
+ Don't declare static: needed by tuneup.
+*/
+
+mp_size_t
+mpn_fft_next_size (mp_size_t pl, int k)
+{
+ pl = 1 + ((pl - 1) >> k); /* ceil (pl/2^k) */
+ return pl << k;
+}
+
+
+/* Initialize l[i][j] with bitrev(j) */
+static void
+mpn_fft_initl (int **l, int k)
+{
+ int i, j, K;
+ int *li;
+
+ l[0][0] = 0;
+ for (i = 1, K = 1; i <= k; i++, K *= 2)
+ {
+ li = l[i];
+ for (j = 0; j < K; j++)
+ {
+ li[j] = 2 * l[i - 1][j];
+ li[K + j] = 1 + li[j];
+ }
+ }
+}
+
+
+/* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
+ Assumes a is semi-normalized, i.e. a[n] <= 1.
+ r and a must have n+1 limbs, and not overlap.
+*/
+static void
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
+{
+ unsigned int sh;
+ mp_size_t m;
+ mp_limb_t cc, rd;
+
+ sh = d % GMP_NUMB_BITS;
+ m = d / GMP_NUMB_BITS;
+
+ if (m >= n) /* negate */
+ {
+ /* r[0..m-1] <-- lshift(a[n-m]..a[n-1], sh)
+ r[m..n-1] <-- -lshift(a[0]..a[n-m-1], sh) */
+
+ m -= n;
+ if (sh != 0)
+ {
+ /* no out shift below since a[n] <= 1 */
+ mpn_lshift (r, a + n - m, m + 1, sh);
+ rd = r[m];
+ cc = mpn_lshiftc (r + m, a, n - m, sh);
+ }
+ else
+ {
+ MPN_COPY (r, a + n - m, m);
+ rd = a[n];
+ mpn_com (r + m, a, n - m);
+ cc = 0;
+ }
+
+ /* add cc to r[0], and add rd to r[m] */
+
+ /* now add 1 in r[m], subtract 1 in r[n], i.e. add 1 in r[0] */
+
+ r[n] = 0;
+ /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
+ ++cc;
+ MPN_INCR_U (r, n + 1, cc);
+
+ ++rd;
+ /* rd might overflow when sh=GMP_NUMB_BITS-1 */
+ cc = rd + (rd == 0);
+ r = r + m + (rd == 0);
+ MPN_INCR_U (r, n + 1 - m - (rd == 0), cc);
+ }
+ else
+ {
+ /* r[0..m-1] <-- -lshift(a[n-m]..a[n-1], sh)
+ r[m..n-1] <-- lshift(a[0]..a[n-m-1], sh) */
+ if (sh != 0)
+ {
+ /* no out bits below since a[n] <= 1 */
+ mpn_lshiftc (r, a + n - m, m + 1, sh);
+ rd = ~r[m];
+ /* {r, m+1} = {a+n-m, m+1} << sh */
+ cc = mpn_lshift (r + m, a, n - m, sh); /* {r+m, n-m} = {a, n-m}<<sh */
+ }
+ else
+ {
+ /* r[m] is not used below, but we save a test for m=0 */
+ mpn_com (r, a + n - m, m + 1);
+ rd = a[n];
+ MPN_COPY (r + m, a, n - m);
+ cc = 0;
+ }
+
+ /* now complement {r, m}, subtract cc from r[0], subtract rd from r[m] */
+
+ /* if m=0 we just have r[0]=a[n] << sh */
+ if (m != 0)
+ {
+ /* now add 1 in r[0], subtract 1 in r[m] */
+ if (cc-- == 0) /* then add 1 to r[0] */
+ cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
+ cc = mpn_sub_1 (r, r, m, cc) + 1;
+ /* add 1 to cc instead of rd since rd might overflow */
+ }
+
+ /* now subtract cc and rd from r[m..n] */
+
+ r[n] = 2; /* Add a value, to avoid borrow propagation */
+ MPN_DECR_U (r + m, n - m + 1, cc);
+ MPN_DECR_U (r + m, n - m + 1, rd);
+ /* Remove the added value, and check for a possible borrow. */
+ if (UNLIKELY ((r[n] -= 2) != 0))
+ {
+ mp_limb_t cy = -r[n];
+ /* cy should always be 1, except in the very unlikely case
+ m=n-1, r[m]=0, cc+rd>GMP_NUMB_MAX+1. Never triggered.
+ Is it actually possible? */
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, cy);
+ }
+ }
+}
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+static inline void
+mpn_fft_add_sub_modF (mp_ptr A0, mp_ptr Ai, mp_srcptr tp, mp_size_t n)
+{
+ mp_limb_t cyas, c, x;
+
+ cyas = mpn_add_n_sub_n (A0, Ai, A0, tp, n);
+
+ c = A0[n] - tp[n] - (cyas & 1);
+ x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+ Ai[n] = x + c;
+ MPN_INCR_U (Ai, n + 1, x);
+
+ c = A0[n] + tp[n] + (cyas >> 1);
+ x = (c - 1) & -(c != 0);
+ A0[n] = c - x;
+ MPN_DECR_U (A0, n + 1, x);
+}
+
+#else /* ! HAVE_NATIVE_mpn_add_n_sub_n */
+
+/* r <- a+b mod 2^(n*GMP_NUMB_BITS)+1.
+ Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+ mp_limb_t c, x;
+
+ c = a[n] + b[n] + mpn_add_n (r, a, b, n);
+ /* 0 <= c <= 3 */
+
+#if 1
+ /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch. The
+ result is slower code, of course. But the following outsmarts GCC. */
+ x = (c - 1) & -(c != 0);
+ r[n] = c - x;
+ MPN_DECR_U (r, n + 1, x);
+#endif
+#if 0
+ if (c > 1)
+ {
+ r[n] = 1; /* r[n] - c = 1 */
+ MPN_DECR_U (r, n + 1, c - 1);
+ }
+ else
+ {
+ r[n] = c;
+ }
+#endif
+}
+
+/* r <- a-b mod 2^(n*GMP_NUMB_BITS)+1.
+ Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+ mp_limb_t c, x;
+
+ c = a[n] - b[n] - mpn_sub_n (r, a, b, n);
+ /* -2 <= c <= 1 */
+
+#if 1
+ /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch. The
+ result is slower code, of course. But the following outsmarts GCC. */
+ x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+ r[n] = x + c;
+ MPN_INCR_U (r, n + 1, x);
+#endif
+#if 0
+ if ((c & GMP_LIMB_HIGHBIT) != 0)
+ {
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, -c);
+ }
+ else
+ {
+ r[n] = c;
+ }
+#endif
+}
+#endif /* HAVE_NATIVE_mpn_add_n_sub_n */
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+ N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+ output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
+ mp_size_t omega, mp_size_t n, mp_size_t inc, mp_ptr tp)
+{
+ if (K == 2)
+ {
+ mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ cy = mpn_add_n_sub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
+#else
+ MPN_COPY (tp, Ap[0], n + 1);
+ mpn_add_n (Ap[0], Ap[0], Ap[inc], n + 1);
+ cy = mpn_sub_n (Ap[inc], tp, Ap[inc], n + 1);
+#endif
+ if (Ap[0][n] > 1) /* can be 2 or 3 */
+ { /* Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1); */
+ mp_limb_t cc = Ap[0][n] - 1;
+ Ap[0][n] = 1;
+ MPN_DECR_U (Ap[0], n + 1, cc);
+ }
+ if (cy) /* Ap[inc][n] can be -1 or -2 */
+ { /* Ap[inc][n] = mpn_add_1 (Ap[inc], Ap[inc], n, ~Ap[inc][n] + 1); */
+ mp_limb_t cc = ~Ap[inc][n] + 1;
+ Ap[inc][n] = 0;
+ MPN_INCR_U (Ap[inc], n + 1, cc);
+ }
+ }
+ else
+ {
+ mp_size_t j, K2 = K >> 1;
+ int *lk = *ll;
+
+ mpn_fft_fft (Ap, K2, ll-1, 2 * omega, n, inc * 2, tp);
+ mpn_fft_fft (Ap+inc, K2, ll-1, 2 * omega, n, inc * 2, tp);
+ /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+ A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+ for (j = 0; j < K2; j++, lk += 2, Ap += 2 * inc)
+ {
+ /* Ap[inc] <- Ap[0] + Ap[inc] * 2^(lk[1] * omega)
+ Ap[0] <- Ap[0] + Ap[inc] * 2^(lk[0] * omega) */
+ mpn_fft_mul_2exp_modF (tp, Ap[inc], lk[0] * omega, n);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_fft_add_sub_modF (Ap[0], Ap[inc], tp, n);
+#else
+ mpn_fft_sub_modF (Ap[inc], Ap[0], tp, n);
+ mpn_fft_add_modF (Ap[0], Ap[0], tp, n);
+#endif
+ }
+ }
+}
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+ N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+ output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1
+ tp must have space for 2*(n+1) limbs.
+*/
+
+
+/* Given ap[0..n] with ap[n]<=1, reduce it modulo 2^(n*GMP_NUMB_BITS)+1,
+ by subtracting that modulus if necessary.
+
+ If ap[0..n] is exactly 2^(n*GMP_NUMB_BITS) then mpn_sub_1 produces a
+ borrow and the limbs must be zeroed out again. This will occur very
+ infrequently. */
+
+static inline void
+mpn_fft_normalize (mp_ptr ap, mp_size_t n)
+{
+ if (ap[n] != 0)
+ {
+ MPN_DECR_U (ap, n + 1, CNST_LIMB(1));
+ if (ap[n] == 0)
+ {
+ /* This happens with very low probability; we have yet to trigger it,
+ and thereby make sure this code is correct. */
+ MPN_ZERO (ap, n);
+ ap[n] = 1;
+ }
+ else
+ ap[n] = 0;
+ }
+}
+
+/* a[i] <- a[i]*b[i] mod 2^(n*GMP_NUMB_BITS)+1 for 0 <= i < K */
+static void
+mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
+{
+ int i;
+ unsigned k;
+ int sqr = (ap == bp);
+ TMP_DECL;
+
+ TMP_MARK;
+
+ if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+ {
+ mp_size_t K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+ int k;
+ int **fft_l, *tmp;
+ mp_ptr *Ap, *Bp, A, B, T;
+
+ k = mpn_fft_best_k (n, sqr);
+ K2 = (mp_size_t) 1 << k;
+ ASSERT_ALWAYS((n & (K2 - 1)) == 0);
+ maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
+ M2 = n * GMP_NUMB_BITS >> k;
+ l = n >> k;
+ Nprime2 = ((2 * M2 + k + 2 + maxLK) / maxLK) * maxLK;
+ /* Nprime2 = ceil((2*M2+k+3)/maxLK)*maxLK*/
+ nprime2 = Nprime2 / GMP_NUMB_BITS;
+
+ /* we should ensure that nprime2 is a multiple of the next K */
+ if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+ {
+ mp_size_t K3;
+ for (;;)
+ {
+ K3 = (mp_size_t) 1 << mpn_fft_best_k (nprime2, sqr);
+ if ((nprime2 & (K3 - 1)) == 0)
+ break;
+ nprime2 = (nprime2 + K3 - 1) & -K3;
+ Nprime2 = nprime2 * GMP_LIMB_BITS;
+ /* warning: since nprime2 changed, K3 may change too! */
+ }
+ }
+ ASSERT_ALWAYS(nprime2 < n); /* otherwise we'll loop */
+
+ Mp2 = Nprime2 >> k;
+
+ Ap = TMP_BALLOC_MP_PTRS (K2);
+ Bp = TMP_BALLOC_MP_PTRS (K2);
+ A = TMP_BALLOC_LIMBS (2 * (nprime2 + 1) << k);
+ T = TMP_BALLOC_LIMBS (2 * (nprime2 + 1));
+ B = A + ((nprime2 + 1) << k);
+ fft_l = TMP_BALLOC_TYPE (k + 1, int *);
+ tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+ for (i = 0; i <= k; i++)
+ {
+ fft_l[i] = tmp;
+ tmp += (mp_size_t) 1 << i;
+ }
+
+ mpn_fft_initl (fft_l, k);
+
+ TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n,
+ n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
+ for (i = 0; i < K; i++, ap++, bp++)
+ {
+ mp_limb_t cy;
+ mpn_fft_normalize (*ap, n);
+ if (!sqr)
+ mpn_fft_normalize (*bp, n);
+
+ mpn_mul_fft_decompose (A, Ap, K2, nprime2, *ap, (l << k) + 1, l, Mp2, T);
+ if (!sqr)
+ mpn_mul_fft_decompose (B, Bp, K2, nprime2, *bp, (l << k) + 1, l, Mp2, T);
+
+ cy = mpn_mul_fft_internal (*ap, n, k, Ap, Bp, A, B, nprime2,
+ l, Mp2, fft_l, T, sqr);
+ (*ap)[n] = cy;
+ }
+ }
+#if ! TUNE_PROGRAM_BUILD
+ else if (MPN_MULMOD_BKNP1_USABLE (n, k, MUL_FFT_MODF_THRESHOLD))
+ {
+ mp_ptr a;
+ mp_size_t n_k = n / k;
+
+ if (sqr)
+ {
+ mp_ptr tp = TMP_SALLOC_LIMBS (mpn_sqrmod_bknp1_itch (n));
+ for (i = 0; i < K; i++)
+ {
+ a = *ap++;
+ mpn_sqrmod_bknp1 (a, a, n_k, k, tp);
+ }
+ }
+ else
+ {
+ mp_ptr b, tp = TMP_SALLOC_LIMBS (mpn_mulmod_bknp1_itch (n));
+ for (i = 0; i < K; i++)
+ {
+ a = *ap++;
+ b = *bp++;
+ mpn_mulmod_bknp1 (a, a, b, n_k, k, tp);
+ }
+ }
+ }
+#endif
+ else
+ {
+ mp_ptr a, b, tp, tpn;
+ mp_limb_t cc;
+ mp_size_t n2 = 2 * n;
+ tp = TMP_BALLOC_LIMBS (n2);
+ tpn = tp + n;
+ TRACE (printf (" mpn_mul_n %ld of %ld limbs\n", K, n));
+ for (i = 0; i < K; i++)
+ {
+ a = *ap++;
+ b = *bp++;
+ if (sqr)
+ mpn_sqr (tp, a, n);
+ else
+ mpn_mul_n (tp, b, a, n);
+ if (a[n] != 0)
+ cc = mpn_add_n (tpn, tpn, b, n);
+ else
+ cc = 0;
+ if (b[n] != 0)
+ cc += mpn_add_n (tpn, tpn, a, n) + a[n];
+ if (cc != 0)
+ {
+ cc = mpn_add_1 (tp, tp, n2, cc);
+ /* If mpn_add_1 give a carry (cc != 0),
+ the result (tp) is at most GMP_NUMB_MAX - 1,
+ so the following addition can't overflow.
+ */
+ tp[0] += cc;
+ }
+ cc = mpn_sub_n (a, tp, tpn, n);
+ a[n] = 0;
+ MPN_INCR_U (a, n + 1, cc);
+ }
+ }
+ TMP_FREE;
+}
+
+
+/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]]
+ output: K*A[0] K*A[K-1] ... K*A[1].
+ Assumes the Ap[] are pseudo-normalized, i.e. 0 <= Ap[][n] <= 1.
+ This condition is also fulfilled at exit.
+*/
+static void
+mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp)
+{
+ if (K == 2)
+ {
+ mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ cy = mpn_add_n_sub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
+#else
+ MPN_COPY (tp, Ap[0], n + 1);
+ mpn_add_n (Ap[0], Ap[0], Ap[1], n + 1);
+ cy = mpn_sub_n (Ap[1], tp, Ap[1], n + 1);
+#endif
+ if (Ap[0][n] > 1) /* can be 2 or 3 */
+ { /* Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1); */
+ mp_limb_t cc = Ap[0][n] - 1;
+ Ap[0][n] = 1;
+ MPN_DECR_U (Ap[0], n + 1, cc);
+ }
+ if (cy) /* Ap[1][n] can be -1 or -2 */
+ { /* Ap[1][n] = mpn_add_1 (Ap[1], Ap[1], n, ~Ap[1][n] + 1); */
+ mp_limb_t cc = ~Ap[1][n] + 1;
+ Ap[1][n] = 0;
+ MPN_INCR_U (Ap[1], n + 1, cc);
+ }
+ }
+ else
+ {
+ mp_size_t j, K2 = K >> 1;
+
+ mpn_fft_fftinv (Ap, K2, 2 * omega, n, tp);
+ mpn_fft_fftinv (Ap + K2, K2, 2 * omega, n, tp);
+ /* A[j] <- A[j] + omega^j A[j+K/2]
+ A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */
+ for (j = 0; j < K2; j++, Ap++)
+ {
+ /* Ap[K2] <- Ap[0] + Ap[K2] * 2^((j + K2) * omega)
+ Ap[0] <- Ap[0] + Ap[K2] * 2^(j * omega) */
+ mpn_fft_mul_2exp_modF (tp, Ap[K2], j * omega, n);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_fft_add_sub_modF (Ap[0], Ap[K2], tp, n);
+#else
+ mpn_fft_sub_modF (Ap[K2], Ap[0], tp, n);
+ mpn_fft_add_modF (Ap[0], Ap[0], tp, n);
+#endif
+ }
+ }
+}
+
+
+/* R <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
+static void
+mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
+{
+ mp_bitcnt_t i;
+
+ ASSERT (r != a);
+ i = (mp_bitcnt_t) 2 * n * GMP_NUMB_BITS - k;
+ mpn_fft_mul_2exp_modF (r, a, i, n);
+ /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */
+ /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */
+ mpn_fft_normalize (r, n);
+}
+
+
+/* {rp,n} <- {ap,an} mod 2^(n*GMP_NUMB_BITS)+1, n <= an <= 3*n.
+ Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
+ then {rp,n}=0.
+*/
+static mp_size_t
+mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
+{
+ mp_size_t l, m, rpn;
+ mp_limb_t cc;
+
+ ASSERT ((n <= an) && (an <= 3 * n));
+ m = an - 2 * n;
+ if (m > 0)
+ {
+ l = n;
+ /* add {ap, m} and {ap+2n, m} in {rp, m} */
+ cc = mpn_add_n (rp, ap, ap + 2 * n, m);
+ /* copy {ap+m, n-m} to {rp+m, n-m} */
+ rpn = mpn_add_1 (rp + m, ap + m, n - m, cc);
+ }
+ else
+ {
+ l = an - n; /* l <= n */
+ MPN_COPY (rp, ap, n);
+ rpn = 0;
+ }
+
+ /* remains to subtract {ap+n, l} from {rp, n+1} */
+ rpn -= mpn_sub (rp, rp, n, ap + n, l);
+ if (rpn < 0) /* necessarily rpn = -1 */
+ rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
+ return rpn;
+}
+
+/* store in A[0..nprime] the first M bits from {n, nl},
+ in A[nprime+1..] the following M bits, ...
+ Assumes M is a multiple of GMP_NUMB_BITS (M = l * GMP_NUMB_BITS).
+ T must have space for at least (nprime + 1) limbs.
+ We must have nl <= 2*K*l.
+*/
+static void
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
+ mp_srcptr n, mp_size_t nl, mp_size_t l, mp_size_t Mp,
+ mp_ptr T)
+{
+ mp_size_t i, j;
+ mp_ptr tmp;
+ mp_size_t Kl = K * l;
+ TMP_DECL;
+ TMP_MARK;
+
+ if (nl > Kl) /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+1 */
+ {
+ mp_size_t dif = nl - Kl;
+
+ tmp = TMP_BALLOC_LIMBS(Kl + 1);
+ tmp[Kl] = 0;
+
+#if ! WANT_OLD_FFT_FULL
+ ASSERT_ALWAYS (dif <= Kl);
+#else
+ /* The comment "We must have nl <= 2*K*l." says that
+ ((dif = nl - Kl) > Kl) should never happen. */
+ if (UNLIKELY (dif > Kl))
+ {
+ mp_limb_signed_t cy;
+ int subp = 0;
+
+ cy = mpn_sub_n (tmp, n, n + Kl, Kl);
+ n += 2 * Kl;
+ dif -= Kl;
+
+ /* now dif > 0 */
+ while (dif > Kl)
+ {
+ if (subp)
+ cy += mpn_sub_n (tmp, tmp, n, Kl);
+ else
+ cy -= mpn_add_n (tmp, tmp, n, Kl);
+ subp ^= 1;
+ n += Kl;
+ dif -= Kl;
+ }
+ /* now dif <= Kl */
+ if (subp)
+ cy += mpn_sub (tmp, tmp, Kl, n, dif);
+ else
+ cy -= mpn_add (tmp, tmp, Kl, n, dif);
+ if (cy >= 0)
+ MPN_INCR_U (tmp, Kl + 1, cy);
+ else
+ {
+ tmp[Kl] = 1;
+ MPN_DECR_U (tmp, Kl + 1, -cy - 1);
+ }
+ }
+ else /* dif <= Kl, i.e. nl <= 2 * Kl */
+#endif
+ {
+ mp_limb_t cy;
+ cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
+ MPN_INCR_U (tmp, Kl + 1, cy);
+ }
+ nl = Kl + 1;
+ n = tmp;
+ }
+ for (i = 0; i < K; i++)
+ {
+ Ap[i] = A;
+ /* store the next M bits of n into A[0..nprime] */
+ if (nl > 0) /* nl is the number of remaining limbs */
+ {
+ j = (l <= nl && i < K - 1) ? l : nl; /* store j next limbs */
+ nl -= j;
+ MPN_COPY (T, n, j);
+ MPN_ZERO (T + j, nprime + 1 - j);
+ n += l;
+ mpn_fft_mul_2exp_modF (A, T, i * Mp, nprime);
+ }
+ else
+ MPN_ZERO (A, nprime + 1);
+ A += nprime + 1;
+ }
+ ASSERT_ALWAYS (nl == 0);
+ TMP_FREE;
+}
+
+/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*GMP_NUMB_BITS
+ op is pl limbs, its high bit is returned.
+ One must have pl = mpn_fft_next_size (pl, k).
+ T must have space for 2 * (nprime + 1) limbs.
+*/
+
+static mp_limb_t
+mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
+ mp_ptr *Ap, mp_ptr *Bp, mp_ptr unusedA, mp_ptr B,
+ mp_size_t nprime, mp_size_t l, mp_size_t Mp,
+ int **fft_l, mp_ptr T, int sqr)
+{
+ mp_size_t K, i, pla, lo, sh, j;
+ mp_ptr p;
+ mp_limb_t cc;
+
+ K = (mp_size_t) 1 << k;
+
+ /* direct fft's */
+ mpn_fft_fft (Ap, K, fft_l + k, 2 * Mp, nprime, 1, T);
+ if (!sqr)
+ mpn_fft_fft (Bp, K, fft_l + k, 2 * Mp, nprime, 1, T);
+
+ /* term to term multiplications */
+ mpn_fft_mul_modF_K (Ap, sqr ? Ap : Bp, nprime, K);
+
+ /* inverse fft's */
+ mpn_fft_fftinv (Ap, K, 2 * Mp, nprime, T);
+
+ /* division of terms after inverse fft */
+ Bp[0] = T + nprime + 1;
+ mpn_fft_div_2exp_modF (Bp[0], Ap[0], k, nprime);
+ for (i = 1; i < K; i++)
+ {
+ Bp[i] = Ap[i - 1];
+ mpn_fft_div_2exp_modF (Bp[i], Ap[i], k + (K - i) * Mp, nprime);
+ }
+
+ /* addition of terms in result p */
+ MPN_ZERO (T, nprime + 1);
+ pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+ p = B; /* B has K*(n' + 1) limbs, which is >= pla, i.e. enough */
+ MPN_ZERO (p, pla);
+ cc = 0; /* will accumulate the (signed) carry at p[pla] */
+ for (i = K - 1, lo = l * i + nprime,sh = l * i; i >= 0; i--,lo -= l,sh -= l)
+ {
+ mp_ptr n = p + sh;
+
+ j = (K - i) & (K - 1);
+
+ cc += mpn_add (n, n, pla - sh, Bp[j], nprime + 1);
+ T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
+ if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
+ { /* subtract 2^N'+1 */
+ cc -= mpn_sub_1 (n, n, pla - sh, CNST_LIMB(1));
+ cc -= mpn_sub_1 (p + lo, p + lo, pla - lo, CNST_LIMB(1));
+ }
+ }
+ if (cc == -CNST_LIMB(1))
+ {
+ if ((cc = mpn_add_1 (p + pla - pl, p + pla - pl, pl, CNST_LIMB(1))))
+ {
+ /* p[pla-pl]...p[pla-1] are all zero */
+ mpn_sub_1 (p + pla - pl - 1, p + pla - pl - 1, pl + 1, CNST_LIMB(1));
+ mpn_sub_1 (p + pla - 1, p + pla - 1, 1, CNST_LIMB(1));
+ }
+ }
+ else if (cc == 1)
+ {
+ if (pla >= 2 * pl)
+ {
+ while ((cc = mpn_add_1 (p + pla - 2 * pl, p + pla - 2 * pl, 2 * pl, cc)))
+ ;
+ }
+ else
+ {
+ MPN_DECR_U (p + pla - pl, pl, cc);
+ }
+ }
+ else
+ ASSERT (cc == 0);
+
+ /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
+ < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
+ < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
+ return mpn_fft_norm_modF (op, pl, p, pla);
+}
+
+/* return the lcm of a and 2^k */
+static mp_bitcnt_t
+mpn_mul_fft_lcm (mp_bitcnt_t a, int k)
+{
+ mp_bitcnt_t l = k;
+
+ while (a % 2 == 0 && k > 0)
+ {
+ a >>= 1;
+ k --;
+ }
+ return a << l;
+}
+
+
+mp_limb_t
+mpn_mul_fft (mp_ptr op, mp_size_t pl,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml,
+ int k)
+{
+ int i;
+ mp_size_t K, maxLK;
+ mp_size_t N, Nprime, nprime, M, Mp, l;
+ mp_ptr *Ap, *Bp, A, T, B;
+ int **fft_l, *tmp;
+ int sqr = (n == m && nl == ml);
+ mp_limb_t h;
+ TMP_DECL;
+
+ TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n", pl, nl, ml, k));
+ ASSERT_ALWAYS (mpn_fft_next_size (pl, k) == pl);
+
+ TMP_MARK;
+ N = pl * GMP_NUMB_BITS;
+ fft_l = TMP_BALLOC_TYPE (k + 1, int *);
+ tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+ for (i = 0; i <= k; i++)
+ {
+ fft_l[i] = tmp;
+ tmp += (mp_size_t) 1 << i;
+ }
+
+ mpn_fft_initl (fft_l, k);
+ K = (mp_size_t) 1 << k;
+ M = N >> k; /* N = 2^k M */
+ l = 1 + (M - 1) / GMP_NUMB_BITS;
+ maxLK = mpn_mul_fft_lcm (GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+
+ Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
+ /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
+ nprime = Nprime / GMP_NUMB_BITS;
+ TRACE (printf ("N=%ld K=%ld, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n",
+ N, K, M, l, maxLK, Nprime, nprime));
+ /* we should ensure that recursively, nprime is a multiple of the next K */
+ if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+ {
+ mp_size_t K2;
+ for (;;)
+ {
+ K2 = (mp_size_t) 1 << mpn_fft_best_k (nprime, sqr);
+ if ((nprime & (K2 - 1)) == 0)
+ break;
+ nprime = (nprime + K2 - 1) & -K2;
+ Nprime = nprime * GMP_LIMB_BITS;
+ /* warning: since nprime changed, K2 may change too! */
+ }
+ TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+ }
+ ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
+
+ T = TMP_BALLOC_LIMBS (2 * (nprime + 1));
+ Mp = Nprime >> k;
+
+ TRACE (printf ("%ldx%ld limbs -> %ld times %ldx%ld limbs (%1.2f)\n",
+ pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K);
+ printf (" temp space %ld\n", 2 * K * (nprime + 1)));
+
+ A = TMP_BALLOC_LIMBS (K * (nprime + 1));
+ Ap = TMP_BALLOC_MP_PTRS (K);
+ Bp = TMP_BALLOC_MP_PTRS (K);
+ mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
+ if (sqr)
+ {
+ mp_size_t pla;
+ pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+ B = TMP_BALLOC_LIMBS (pla);
+ }
+ else
+ {
+ B = TMP_BALLOC_LIMBS (K * (nprime + 1));
+ mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
+ }
+ h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
+
+ TMP_FREE;
+ return h;
+}
+
+#if WANT_OLD_FFT_FULL
+/* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */
+void
+mpn_mul_fft_full (mp_ptr op,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml)
+{
+ mp_ptr pad_op;
+ mp_size_t pl, pl2, pl3, l;
+ mp_size_t cc, c2, oldcc;
+ int k2, k3;
+ int sqr = (n == m && nl == ml);
+
+ pl = nl + ml; /* total number of limbs of the result */
+
+ /* perform a fft mod 2^(2N)+1 and one mod 2^(3N)+1.
+ We must have pl3 = 3/2 * pl2, with pl2 a multiple of 2^k2, and
+ pl3 a multiple of 2^k3. Since k3 >= k2, both are multiples of 2^k2,
+ and pl2 must be an even multiple of 2^k2. Thus (pl2,pl3) =
+ (2*j*2^k2,3*j*2^k2), which works for 3*j <= pl/2^k2 <= 5*j.
+ We need that consecutive intervals overlap, i.e. 5*j >= 3*(j+1),
+ which requires j>=2. Thus this scheme requires pl >= 6 * 2^FFT_FIRST_K. */
+
+ /* ASSERT_ALWAYS(pl >= 6 * (1 << FFT_FIRST_K)); */
+
+ pl2 = (2 * pl - 1) / 5; /* ceil (2pl/5) - 1 */
+ do
+ {
+ pl2++;
+ k2 = mpn_fft_best_k (pl2, sqr); /* best fft size for pl2 limbs */
+ pl2 = mpn_fft_next_size (pl2, k2);
+ pl3 = 3 * pl2 / 2; /* since k>=FFT_FIRST_K=4, pl2 is a multiple of 2^4,
+ thus pl2 / 2 is exact */
+ k3 = mpn_fft_best_k (pl3, sqr);
+ }
+ while (mpn_fft_next_size (pl3, k3) != pl3);
+
+ TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl2=%ld pl3=%ld k=%d\n",
+ nl, ml, pl2, pl3, k2));
+
+ ASSERT_ALWAYS(pl3 <= pl);
+ cc = mpn_mul_fft (op, pl3, n, nl, m, ml, k3); /* mu */
+ ASSERT(cc == 0);
+ pad_op = __GMP_ALLOCATE_FUNC_LIMBS (pl2);
+ cc = mpn_mul_fft (pad_op, pl2, n, nl, m, ml, k2); /* lambda */
+ cc = -cc + mpn_sub_n (pad_op, pad_op, op, pl2); /* lambda - low(mu) */
+ /* 0 <= cc <= 1 */
+ ASSERT(0 <= cc && cc <= 1);
+ l = pl3 - pl2; /* l = pl2 / 2 since pl3 = 3/2 * pl2 */
+ c2 = mpn_add_n (pad_op, pad_op, op + pl2, l);
+ cc = mpn_add_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2) - cc;
+ ASSERT(-1 <= cc && cc <= 1);
+ if (cc < 0)
+ cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+ ASSERT(0 <= cc && cc <= 1);
+ /* now lambda-mu = {pad_op, pl2} - cc mod 2^(pl2*GMP_NUMB_BITS)+1 */
+ oldcc = cc;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ c2 = mpn_add_n_sub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
+ cc += c2 >> 1; /* carry out from high <- low + high */
+ c2 = c2 & 1; /* borrow out from low <- low - high */
+#else
+ {
+ mp_ptr tmp;
+ TMP_DECL;
+
+ TMP_MARK;
+ tmp = TMP_BALLOC_LIMBS (l);
+ MPN_COPY (tmp, pad_op, l);
+ c2 = mpn_sub_n (pad_op, pad_op, pad_op + l, l);
+ cc += mpn_add_n (pad_op + l, tmp, pad_op + l, l);
+ TMP_FREE;
+ }
+#endif
+ c2 += oldcc;
+ /* first normalize {pad_op, pl2} before dividing by 2: c2 is the borrow
+ at pad_op + l, cc is the carry at pad_op + pl2 */
+ /* 0 <= cc <= 2 */
+ cc -= mpn_sub_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2);
+ /* -1 <= cc <= 2 */
+ if (cc > 0)
+ cc = -mpn_sub_1 (pad_op, pad_op, pl2, (mp_limb_t) cc);
+ /* now -1 <= cc <= 0 */
+ if (cc < 0)
+ cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+ /* now {pad_op, pl2} is normalized, with 0 <= cc <= 1 */
+ if (pad_op[0] & 1) /* if odd, add 2^(pl2*GMP_NUMB_BITS)+1 */
+ cc += 1 + mpn_add_1 (pad_op, pad_op, pl2, CNST_LIMB(1));
+ /* now 0 <= cc <= 2, but cc=2 cannot occur since it would give a carry
+ out below */
+ mpn_rshift (pad_op, pad_op, pl2, 1); /* divide by two */
+ if (cc) /* then cc=1 */
+ pad_op [pl2 - 1] |= (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
+ /* now {pad_op,pl2}-cc = (lambda-mu)/(1-2^(l*GMP_NUMB_BITS))
+ mod 2^(pl2*GMP_NUMB_BITS) + 1 */
+ c2 = mpn_add_n (op, op, pad_op, pl2); /* no need to add cc (is 0) */
+ /* since pl2+pl3 >= pl, necessary the extra limbs (including cc) are zero */
+ MPN_COPY (op + pl3, pad_op, pl - pl3);
+ ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
+ __GMP_FREE_FUNC_LIMBS (pad_op, pl2);
+ /* since the final result has at most pl limbs, no carry out below */
+ MPN_INCR_U (op + pl2, pl - pl2, (mp_limb_t) c2);
+}
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/mul_n.c b/vendor/gmp-6.3.0/mpn/generic/mul_n.c
new file mode 100644
index 0000000..36bd923
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mul_n.c
@@ -0,0 +1,96 @@
+/* mpn_mul_n -- multiply natural numbers.
+
+Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+ ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));
+
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ mpn_mul_basecase (p, a, n, b, n);
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))
+ {
+ /* Allocate workspace of fixed size on stack: fast! */
+ mp_limb_t ws[mpn_toom22_mul_itch (MUL_TOOM33_THRESHOLD_LIMIT-1,
+ MUL_TOOM33_THRESHOLD_LIMIT-1)];
+ ASSERT (MUL_TOOM33_THRESHOLD <= MUL_TOOM33_THRESHOLD_LIMIT);
+ mpn_toom22_mul (p, a, n, b, n, ws);
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
+ mpn_toom33_mul (p, a, n, b, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
+ mpn_toom44_mul (p, a, n, b, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom6_mul_n_itch (n));
+ mpn_toom6h_mul (p, a, n, b, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_DECL;
+ TMP_MARK;
+ ws = TMP_ALLOC_LIMBS (mpn_toom8_mul_n_itch (n));
+ mpn_toom8h_mul (p, a, n, b, n, ws);
+ TMP_FREE;
+ }
+ else
+ {
+ /* The current FFT code allocates its own space. That should probably
+ change. */
+ mpn_fft_mul (p, a, n, b, n);
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mullo_basecase.c b/vendor/gmp-6.3.0/mpn/generic/mullo_basecase.c
new file mode 100644
index 0000000..9a4cd3d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mullo_basecase.c
@@ -0,0 +1,90 @@
+/* mpn_mullo_basecase -- Internal routine to multiply two natural
+ numbers of length n and return the low part.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 2000, 2002, 2004, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* FIXME: Should optionally use mpn_mul_2/mpn_addmul_2. */
+
+#ifndef MULLO_VARIANT
+#define MULLO_VARIANT 2
+#endif
+
+
+#if MULLO_VARIANT == 1
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_size_t i;
+
+ mpn_mul_1 (rp, up, n, vp[0]);
+
+ for (i = n - 1; i > 0; i--)
+ {
+ vp++;
+ rp++;
+ mpn_addmul_1 (rp, up, i, vp[0]);
+ }
+}
+#endif
+
+
+#if MULLO_VARIANT == 2
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t h;
+
+ h = up[0] * vp[n - 1];
+
+ if (n != 1)
+ {
+ mp_size_t i;
+ mp_limb_t v0;
+
+ v0 = *vp++;
+ h += up[n - 1] * v0 + mpn_mul_1 (rp, up, n - 1, v0);
+ rp++;
+
+ for (i = n - 2; i > 0; i--)
+ {
+ v0 = *vp++;
+ h += up[i] * v0 + mpn_addmul_1 (rp, up, i, v0);
+ rp++;
+ }
+ }
+
+ rp[0] = h;
+}
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/mullo_n.c b/vendor/gmp-6.3.0/mpn/generic/mullo_n.c
new file mode 100644
index 0000000..6f4e7ae
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mullo_n.c
@@ -0,0 +1,243 @@
+/* mpn_mullo_n -- multiply two n-limb numbers and return the low n limbs
+ of their products.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ THIS IS (FOR NOW) AN INTERNAL FUNCTION. IT IS ONLY SAFE TO REACH THIS
+ FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
+ THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22 1
+#else
+#define MAYBE_range_basecase \
+ ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22 \
+ ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
+#endif
+
+/* THINK: The DC strategy uses different constants in different Toom's
+ ranges. Something smoother?
+*/
+
+/*
+ Compute the least significant half of the product {xy,n}*{yp,n}, or
+ formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+ Above the given threshold, the Divide and Conquer strategy is used.
+ The operands are split in two, and a full product plus two mullo
+ are used to obtain the final result. The more natural strategy is to
+ split in two halves, but this is far from optimal when a
+ sub-quadratic multiplication is used.
+
+ Mulders suggests an unbalanced split in favour of the full product,
+ split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+ To compute the value of a, we assume that the cost of mullo for a
+ given size ML(n) is a fraction of the cost of a full product with
+ same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+ then we can write:
+
+ ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+ Given a value for e, want to minimise the value of k, i.e. the
+ function k=(1-a)^e/(1-2*a^e).
+
+ With e=2, the exponent for schoolbook multiplication, the minimum is
+ given by the values a=1-a=1/2.
+
+ With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+ Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+ Other possible approximations follow:
+ e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+ e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+ e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+ e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+ The values above where obtained with the following trivial commands
+ in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+ ,
+ |\
+ | \
+ +----,
+ | |
+ | |
+ | |\
+ | | \
+ +----+--`
+ ^ n2 ^n1^
+
+ For an actual implementation, the assumption that M(n)=n^e is
+ incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+ with a constant k is wrong.
+
+ But theory suggest us two things:
+ - the best the multiplication product is (lower e), the more k
+ approaches 1, and a approaches 0.
+
+ - A value for a smaller than optimal is probably less bad than a
+ bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+ value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+ k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_mullo_n_itch (mp_size_t n)
+{
+ return 2*n;
+}
+
+/*
+ mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
+ It accepts tp == rp.
+*/
+static void
+mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
+{
+ mp_size_t n2, n1;
+ ASSERT (n >= 2);
+ ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+ ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+ /* Divide-and-conquer */
+
+ /* We need fractional approximation of the value 0 < a <= 1/2
+ giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+ */
+ if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
+ n1 = n >> 1;
+ else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
+ n1 = n * 11 / (size_t) 36; /* n1 ~= n*(1-.694...) */
+ else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
+ n1 = n * 9 / (size_t) 40; /* n1 ~= n*(1-.775...) */
+ else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD*10/9))
+ n1 = n * 7 / (size_t) 39; /* n1 ~= n*(1-.821...) */
+ /* n1 = n * 4 / (size_t) 31; // n1 ~= n*(1-.871...) [TOOM66] */
+ else
+ n1 = n / (size_t) 10; /* n1 ~= n*(1-.899...) [TOOM88] */
+
+ n2 = n - n1;
+
+ /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
+ y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+ /* x0 * y0 */
+ mpn_mul_n (tp, xp, yp, n2);
+ MPN_COPY (rp, tp, n2);
+
+ /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
+ if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+ mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
+ else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+ mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
+ else
+ mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
+ mpn_add_n (rp + n2, tp + n2, tp + n, n1);
+
+ /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+ if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+ mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
+ else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+ mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
+ else
+ mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
+ mpn_add_n (rp + n2, rp + n2, tp + n, n1);
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0. */
+#define MUL_BASECASE_ALLOC \
+ (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_mullow_n
+ accepts a pointer tp, and handle the case tp == rp, do the same here.
+ Maybe recombine the two functions.
+ THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
+ (typically thanks to mpn_addmul_2) should we unconditionally use
+ mpn_mul_n?
+*/
+
+void
+mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+
+ if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
+ {
+ /* Allocate workspace of fixed size on stack: fast! */
+ mp_limb_t tp[MUL_BASECASE_ALLOC];
+ mpn_mul_basecase (tp, xp, n, yp, n);
+ MPN_COPY (rp, tp, n);
+ }
+ else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
+ {
+ mpn_mullo_basecase (rp, xp, yp, n);
+ }
+ else
+ {
+ mp_ptr tp;
+ TMP_DECL;
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
+ if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
+ {
+ mpn_dc_mullo_n (rp, xp, yp, n, tp);
+ }
+ else
+ {
+ /* For really large operands, use plain mpn_mul_n but throw away upper n
+ limbs of result. */
+#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
+ mpn_fft_mul (tp, xp, n, yp, n);
+#else
+ mpn_mul_n (tp, xp, yp, n);
+#endif
+ MPN_COPY (rp, tp, n);
+ }
+ TMP_FREE;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mulmid.c b/vendor/gmp-6.3.0/mpn/generic/mulmid.c
new file mode 100644
index 0000000..f35c5fb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mulmid.c
@@ -0,0 +1,255 @@
+/* mpn_mulmid -- middle product
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
+
+
+void
+mpn_mulmid (mp_ptr rp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn)
+{
+ mp_size_t rn, k;
+ mp_ptr scratch, temp;
+
+ ASSERT (an >= bn);
+ ASSERT (bn >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
+ ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
+
+ if (bn < MULMID_TOOM42_THRESHOLD)
+ {
+ /* region not tall enough to make toom42 worthwhile for any portion */
+
+ if (an < CHUNK)
+ {
+ /* region not too wide either, just call basecase directly */
+ mpn_mulmid_basecase (rp, ap, an, bp, bn);
+ return;
+ }
+
+ /* Region quite wide. For better locality, use basecase on chunks:
+
+ AAABBBCC..
+ .AAABBBCC.
+ ..AAABBBCC
+ */
+
+ k = CHUNK - bn + 1; /* number of diagonals per chunk */
+
+ /* first chunk (marked A in the above diagram) */
+ mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+
+ /* remaining chunks (B, C, etc) */
+ an -= k;
+
+ while (an >= CHUNK)
+ {
+ mp_limb_t t0, t1, cy;
+ ap += k, rp += k;
+ t0 = rp[0], t1 = rp[1];
+ mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+ ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
+ MPN_INCR_U (rp + 1, k + 1, t1 + cy);
+ an -= k;
+ }
+
+ if (an >= bn)
+ {
+ /* last remaining chunk */
+ mp_limb_t t0, t1, cy;
+ ap += k, rp += k;
+ t0 = rp[0], t1 = rp[1];
+ mpn_mulmid_basecase (rp, ap, an, bp, bn);
+ ADDC_LIMB (cy, rp[0], rp[0], t0);
+ MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
+ }
+
+ return;
+ }
+
+ /* region is tall enough for toom42 */
+
+ rn = an - bn + 1;
+
+ if (rn < MULMID_TOOM42_THRESHOLD)
+ {
+ /* region not wide enough to make toom42 worthwhile for any portion */
+
+ TMP_DECL;
+
+ if (bn < CHUNK)
+ {
+ /* region not too tall either, just call basecase directly */
+ mpn_mulmid_basecase (rp, ap, an, bp, bn);
+ return;
+ }
+
+ /* Region quite tall. For better locality, use basecase on chunks:
+
+ AAAAA....
+ .AAAAA...
+ ..BBBBB..
+ ...BBBBB.
+ ....CCCCC
+ */
+
+ TMP_MARK;
+
+ temp = TMP_ALLOC_LIMBS (rn + 2);
+
+ /* first chunk (marked A in the above diagram) */
+ bp += bn - CHUNK, an -= bn - CHUNK;
+ mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
+
+ /* remaining chunks (B, C, etc) */
+ bn -= CHUNK;
+
+ while (bn >= CHUNK)
+ {
+ ap += CHUNK, bp -= CHUNK;
+ mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ bn -= CHUNK;
+ }
+
+ if (bn)
+ {
+ /* last remaining chunk */
+ ap += CHUNK, bp -= bn;
+ mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ }
+
+ TMP_FREE;
+ return;
+ }
+
+ /* we're definitely going to use toom42 somewhere */
+
+ if (bn > rn)
+ {
+ /* slice region into chunks, use toom42 on all chunks except possibly
+ the last:
+
+ AA....
+ .AA...
+ ..BB..
+ ...BB.
+ ....CC
+ */
+
+ TMP_DECL;
+ TMP_MARK;
+
+ temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
+ scratch = temp + rn + 2;
+
+ /* first chunk (marked A in the above diagram) */
+ bp += bn - rn;
+ mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
+
+ /* remaining chunks (B, C, etc) */
+ bn -= rn;
+
+ while (bn >= rn)
+ {
+ ap += rn, bp -= rn;
+ mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ bn -= rn;
+ }
+
+ if (bn)
+ {
+ /* last remaining chunk */
+ ap += rn, bp -= bn;
+ mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ }
+
+ TMP_FREE;
+ }
+ else
+ {
+ /* slice region into chunks, use toom42 on all chunks except possibly
+ the last:
+
+ AAABBBCC..
+ .AAABBBCC.
+ ..AAABBBCC
+ */
+
+ TMP_DECL;
+ TMP_MARK;
+
+ scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
+
+ /* first chunk (marked A in the above diagram) */
+ mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+
+ /* remaining chunks (B, C, etc) */
+ rn -= bn;
+
+ while (rn >= bn)
+ {
+ mp_limb_t t0, t1, cy;
+ ap += bn, rp += bn;
+ t0 = rp[0], t1 = rp[1];
+ mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+ ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
+ MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
+ rn -= bn;
+ }
+
+ TMP_FREE;
+
+ if (rn)
+ {
+ /* last remaining chunk */
+ mp_limb_t t0, t1, cy;
+ ap += bn, rp += bn;
+ t0 = rp[0], t1 = rp[1];
+ mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
+ ADDC_LIMB (cy, rp[0], rp[0], t0);
+ MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mulmid_basecase.c b/vendor/gmp-6.3.0/mpn/generic/mulmid_basecase.c
new file mode 100644
index 0000000..d5434ea
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mulmid_basecase.c
@@ -0,0 +1,82 @@
+/* mpn_mulmid_basecase -- classical middle product algorithm
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
+ Must have un >= vn >= 1.
+
+ Neither input buffer may overlap with the output buffer. */
+
+void
+mpn_mulmid_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ mp_limb_t lo, hi; /* last two limbs of output */
+ mp_limb_t cy;
+
+ ASSERT (un >= vn);
+ ASSERT (vn >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
+ ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
+
+ up += vn - 1;
+ un -= vn - 1;
+
+ /* multiply by first limb, store result */
+ lo = mpn_mul_1 (rp, up, un, vp[0]);
+ hi = 0;
+
+ /* accumulate remaining rows */
+ for (vn--; vn; vn--)
+ {
+ up--, vp++;
+ cy = mpn_addmul_1 (rp, up, un, vp[0]);
+ add_ssaaaa (hi, lo, hi, lo, CNST_LIMB(0), cy);
+ }
+
+ /* store final limbs */
+#if GMP_NAIL_BITS != 0
+ hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
+ lo &= GMP_NUMB_MASK;
+#endif
+
+ rp[un] = lo;
+ rp[un + 1] = hi;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mulmid_n.c b/vendor/gmp-6.3.0/mpn/generic/mulmid_n.c
new file mode 100644
index 0000000..ac7e8f1
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mulmid_n.c
@@ -0,0 +1,61 @@
+/* mpn_mulmid_n -- balanced middle product
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+void
+mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+ if (n < MULMID_TOOM42_THRESHOLD)
+ {
+ mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
+ }
+ else
+ {
+ mp_ptr scratch;
+ TMP_DECL;
+ TMP_MARK;
+ scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
+ mpn_toom42_mulmid (rp, ap, bp, n, scratch);
+ TMP_FREE;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mulmod_bknp1.c b/vendor/gmp-6.3.0/mpn/generic/mulmod_bknp1.c
new file mode 100644
index 0000000..feb10eb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mulmod_bknp1.c
@@ -0,0 +1,502 @@
+/* Mulptiplication mod B^n+1, for small operands.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2020-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#ifndef MOD_BKNP1_USE11
+#define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
+#endif
+#ifndef MOD_BKNP1_ONLY3
+#define MOD_BKNP1_ONLY3 0
+#endif
+
+/* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */
+static void
+_mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)
+{
+ mp_limb_t hl;
+ mp_srcptr hp;
+ unsigned i;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k > 2);
+ ASSERT (k % 2 == 1);
+
+ --k;
+
+ rp += k * n;
+ op += k * n;
+ hp = op;
+ hl = hp[n]; /* initial op[k*n]. */
+ ASSERT (hl < GMP_NUMB_MAX - 1);
+
+#if MOD_BKNP1_ONLY3 == 0
+ /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be
+ rp[n] = cy; */
+ *rp = 0;
+#endif
+
+ i = k >> 1;
+ do
+ {
+ mp_limb_t cy, bw;
+ rp -= n;
+ op -= n;
+ cy = hl + mpn_add_n (rp, op, hp, n);
+#if MOD_BKNP1_ONLY3
+ rp[n] = cy;
+#else
+ MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);
+#endif
+ rp -= n;
+ op -= n;
+ bw = hl + mpn_sub_n (rp, op, hp, n);
+ MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);
+ }
+ while (--i != 0);
+
+ for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */
+ {
+ *rp = 0;
+ i = k >> 1;
+ do
+ {
+ rp -= n;
+ MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);
+ rp -= n;
+ MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);
+ }
+ while (--i != 0);
+ }
+}
+
+static void
+_mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+ ASSERT (r[n] == h);
+
+ /* Fully normalise */
+ MPN_DECR_U (r, n + 1, h);
+ h -= r[n];
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, h);
+}
+
+static void
+_mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, -h);
+ if (UNLIKELY (r[n] != 0))
+ _mpn_modbnp1_pn_ip (r, n, 1);
+}
+
+static void
+_mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+ if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */
+ {
+ _mpn_modbnp1_neg_ip (r, n, h);
+ }
+ else
+ {
+ r[n] = h;
+ if (h)
+ _mpn_modbnp1_pn_ip(r, n, h);
+ }
+}
+
+/* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */
+/* Used when rn < on < 2*rn. */
+static void
+_mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)
+{
+ mp_limb_t bw;
+
+#if 0
+ if (UNLIKELY (on <= rn))
+ {
+ MPN_COPY (rp, op, on);
+ MPN_ZERO (rp + on, rn - on);
+ return;
+ }
+#endif
+
+ ASSERT (on > rn);
+ ASSERT (on <= 2 * rn);
+
+ bw = mpn_sub (rp, op, rn, op + rn, on - rn);
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, bw);
+}
+
+/* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */
+/* With odd k >= 3. */
+static void
+_mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)
+{
+ mp_limb_t cy;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k & 1);
+ k >>= 1;
+ ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3);
+ ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);
+
+ cy = - mpn_sub_n (rp, op, op + rn, rn);
+ for (;;) {
+ op += 2 * rn;
+ cy += mpn_add_n (rp, rp, op, rn);
+ if (--k == 0)
+ break;
+ cy -= mpn_sub_n (rp, rp, op + rn, rn);
+ };
+
+ cy += op[rn];
+ _mpn_modbnp1_nc_ip (rp, rn, cy);
+}
+
+/* For the various mpn_divexact_byN here, fall back to using either
+ mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is
+ faster if it is native. For now, since mpn_divexact_1 is native on
+ platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+ mpn_pi1_bdiv_q_1 unconditionally. FIXME. */
+
+#ifndef mpn_divexact_by5
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_5 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)
+#define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)
+#else
+#define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)
+#endif
+#endif
+
+#ifndef mpn_divexact_by7
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_7 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)
+#define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)
+#else
+#define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)
+#endif
+#endif
+
+#ifndef mpn_divexact_by11
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_11 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)
+#define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)
+#else
+#define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)
+#endif
+#endif
+
+#ifndef mpn_divexact_by13
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_13 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)
+#define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)
+#else
+#define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)
+#endif
+#endif
+
+#ifndef mpn_divexact_by17
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_17 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)
+#define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)
+#else
+#define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)
+#endif
+#endif
+
+/* Thanks to Chinese remainder theorem, store
+ in {rp, k*n+1} the value mod (B^(k*n)+1), given
+ {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and
+ {bp, n+1} mod (B^n+1) .
+ {tp, n+1} is a scratch area.
+ tp == rp or rp == ap are possible.
+*/
+static void
+_mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
+ mp_size_t n, unsigned k, mp_ptr tp)
+{
+ mp_limb_t mod;
+ unsigned i;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ _mpn_modbnp1_kn (tp, ap, n, k);
+ if (mpn_sub_n (tp, bp, tp, n + 1))
+ _mpn_modbnp1_neg_ip (tp, n, tp[n]);
+
+#if MOD_BKNP1_USE11
+ if (UNLIKELY (k == 11))
+ {
+ ASSERT (GMP_NUMB_BITS % 2 == 0);
+ /* mod <- -Mod(B^n+1,11)^-1 */
+ mod = n * (GMP_NUMB_BITS % 5) % 5;
+ if ((mod > 2) || UNLIKELY (mod == 0))
+ mod += 5;
+
+ mod *= mpn_mod_1 (tp, n + 1, 11);
+ }
+ else
+#endif
+ {
+#if GMP_NUMB_BITS % 8 == 0
+ /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1) */
+ /* (2^6 - 1) = 3^2 * 7 */
+ mod = mpn_mod_34lsub1 (tp, n + 1);
+ ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);
+ /* (2^12 - 1) = 3^2 * 5 * 7 * 13 */
+ /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241 */
+ ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);
+
+#if GMP_NUMB_BITS % 3 != 0
+ if (UNLIKELY (k != 3))
+ {
+ ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));
+ if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
+ mod <<= 1; /* k >> 1 = 1 << 1 */
+ else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
+ mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;
+ else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
+ mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;
+ else /* k == 17 */
+ mod <<= 3; /* k >> 1 = 1 << 3 */
+#if 0
+ if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||
+ (GMP_NUMB_BITS == 16) && (k == 13))
+ mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +
+ (mod >> (3 * GMP_NUMB_BITS >> 2)));
+#endif
+ }
+#else
+ ASSERT (GMP_NUMB_MAX % k == 0);
+ /* 2^{GMP_NUMB_BITS} - 1 = 0 (mod k) */
+ /* 2^{GMP_NUMB_BITS} = 1 (mod k) */
+ /* 2^{n*GMP_NUMB_BITS} + 1 = 2 (mod k) */
+ /* -2^{-1} = k >> 1 (mod k) */
+ mod *= k >> 1;
+#endif
+#else
+ ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */
+#endif
+ }
+
+ MPN_INCR_U (tp, n + 1, mod);
+ tp[n] += mod;
+
+ if (LIKELY (k == 3))
+ ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));
+ else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
+ mpn_divexact_by5 (tp, tp, n + 1);
+ else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0))
+ || LIKELY (k == 7))
+ mpn_divexact_by7 (tp, tp, n + 1);
+#if MOD_BKNP1_USE11
+ else if (k == 11)
+ mpn_divexact_by11 (tp, tp, n + 1);
+#endif
+ else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
+ mpn_divexact_by13 (tp, tp, n + 1);
+ else /* (k == 17) */
+ mpn_divexact_by17 (tp, tp, n + 1);
+
+ rp += k * n;
+ ap += k * n; /* tp - 1 */
+
+ rp -= n;
+ ap -= n;
+ ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));
+
+ i = k >> 1;
+ do
+ {
+ mp_limb_t cy, bw;
+ rp -= n;
+ ap -= n;
+ bw = mpn_sub_n (rp, ap, tp, n) + tp[n];
+ MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);
+ rp -= n;
+ ap -= n;
+ cy = mpn_add_n (rp, ap, tp, n) + tp[n];
+ MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);
+ }
+ while (--i != 0);
+
+ /* if (LIKELY (rp[k * n])) */
+ _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);
+}
+
+
+static void
+_mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+ mp_ptr tp)
+{
+ mp_limb_t cy;
+ unsigned k;
+
+ ASSERT (0 < rn);
+ ASSERT ((ap[rn] | bp[rn]) <= 1);
+
+ if (UNLIKELY (ap[rn] | bp[rn]))
+ {
+ if (ap[rn])
+ cy = bp[rn] + mpn_neg (rp, bp, rn);
+ else /* ap[rn] == 0 */
+ cy = mpn_neg (rp, ap, rn);
+ }
+ else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
+ {
+ rn /= k;
+ mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);
+ return;
+ }
+ else
+ {
+ mpn_mul_n (tp, ap, bp, rn);
+ cy = mpn_sub_n (rp, tp, tp + rn, rn);
+ }
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */
+/* tp must point to at least 4*(k-1)*n+1 limbs*/
+void
+mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
+ mp_size_t n, unsigned k, mp_ptr tp)
+{
+ mp_ptr hp;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k > 2);
+ ASSERT (k % 2 == 1);
+
+ /* a % (B^{nn}+1)/(B^{nn/k}+1) */
+ _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
+ /* b % (B^{nn}+1)/(B^{nn/k}+1) */
+ _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);
+ mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);
+ _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
+
+ hp = tp + k * n + 1;
+ /* a % (B^{nn/k}+1) */
+ ASSERT (ap[k * n] <= 1);
+ _mpn_modbnp1_kn (hp, ap, n, k);
+ /* b % (B^{nn/k}+1) */
+ ASSERT (bp[k * n] <= 1);
+ _mpn_modbnp1_kn (hp + n + 1, bp, n, k);
+ _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);
+
+ _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);
+}
+
+
+static void
+_mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,
+ mp_ptr tp)
+{
+ mp_limb_t cy;
+ unsigned k;
+
+ ASSERT (0 < rn);
+
+ if (UNLIKELY (ap[rn]))
+ {
+ ASSERT (ap[rn] == 1);
+ *rp = 1;
+ MPN_FILL (rp + 1, rn, 0);
+ return;
+ }
+ else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
+ {
+ rn /= k;
+ mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);
+ return;
+ }
+ else
+ {
+ mpn_sqr (tp, ap, rn);
+ cy = mpn_sub_n (rp, tp, tp + rn, rn);
+ }
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */
+/* tp must point to at least 3*(k-1)*n+1 limbs*/
+void
+mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,
+ mp_size_t n, unsigned k, mp_ptr tp)
+{
+ mp_ptr hp;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k > 2);
+ ASSERT (k % 2 == 1);
+
+ /* a % (B^{nn}+1)/(B^{nn/k}+1) */
+ _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
+ mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);
+ _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
+
+ hp = tp + k * n + 1;
+ /* a % (B^{nn/k}+1) */
+ ASSERT (ap[k * n] <= 1);
+ _mpn_modbnp1_kn (hp, ap, n, k);
+ _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));
+
+ _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/mulmod_bnm1.c b/vendor/gmp-6.3.0/mpn/generic/mulmod_bnm1.c
new file mode 100644
index 0000000..8229ede
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/mulmod_bnm1.c
@@ -0,0 +1,374 @@
+/* mulmod_bnm1.c -- multiplication mod B^n-1.
+
+ Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+ Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2013, 2020, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
+ mod B^rn - 1, and values are semi-normalised; zero is represented
+ as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp.
+ tp==rp is allowed. */
+void
+mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+ mp_ptr tp)
+{
+ mp_limb_t cy;
+
+ ASSERT (0 < rn);
+
+ mpn_mul_n (tp, ap, bp, rn);
+ cy = mpn_add_n (rp, tp, tp + rn, rn);
+ /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+ * be no overflow when adding in the carry. */
+ MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
+ normalised representation, computation is mod B^rn + 1. Needs
+ a scratch area of 2rn limbs at tp; tp == rp is allowed.
+ Output is normalised. */
+static void
+mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+ mp_ptr tp)
+{
+ mp_limb_t cy;
+ unsigned k;
+
+ ASSERT (0 < rn);
+
+ if (UNLIKELY (ap[rn] | bp [rn]))
+ {
+ if (ap[rn])
+ cy = bp [rn] + mpn_neg (rp, bp, rn);
+ else /* ap[rn] == 0 */
+ cy = mpn_neg (rp, ap, rn);
+ }
+ else if (MPN_MULMOD_BKNP1_USABLE (rn, k, MUL_FFT_MODF_THRESHOLD))
+ {
+ mp_size_t n_k = rn / k;
+ TMP_DECL;
+
+ TMP_MARK;
+ mpn_mulmod_bknp1 (rp, ap, bp, n_k, k,
+ TMP_ALLOC_LIMBS (mpn_mulmod_bknp1_itch (rn)));
+ TMP_FREE;
+ return;
+ }
+ else
+ {
+ mpn_mul_n (tp, ap, bp, rn);
+ cy = mpn_sub_n (rp, tp, tp + rn, rn);
+ }
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, cy);
+}
+
+
+/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if one of the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
+ * combine results and obtain a natural number when one knows in
+ * advance that the final value is less than (B^rn-1).
+ * Moreover it should not be a problem if mulmod_bnm1 is used to
+ * compute the full product with an+bn <= rn, because this condition
+ * implies (B^an-1)(B^bn-1) < (B^rn-1) .
+ *
+ * Requires 0 < bn <= an <= rn and an + bn > rn/2
+ * Scratch need: rn + (need for recursive call OR rn + 4). This gives
+ *
+ * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
+ */
+void
+mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
+{
+ ASSERT (0 < bn);
+ ASSERT (bn <= an);
+ ASSERT (an <= rn);
+
+ if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
+ {
+ if (UNLIKELY (bn < rn))
+ {
+ if (UNLIKELY (an + bn <= rn))
+ {
+ mpn_mul (rp, ap, an, bp, bn);
+ }
+ else
+ {
+ mp_limb_t cy;
+ mpn_mul (tp, ap, an, bp, bn);
+ cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
+ MPN_INCR_U (rp, rn, cy);
+ }
+ }
+ else
+ mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
+ }
+ else
+ {
+ mp_size_t n;
+ mp_limb_t cy;
+ mp_limb_t hi;
+
+ n = rn >> 1;
+
+ /* We need at least an + bn >= n, to be able to fit one of the
+ recursive products at rp. Requiring strict inequality makes
+ the code slightly simpler. If desired, we could avoid this
+ restriction by initially halving rn as long as rn is even and
+ an + bn <= rn/2. */
+
+ ASSERT (an + bn > n);
+
+ /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
+ and crt together as
+
+ x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+ */
+
+#define a0 ap
+#define a1 (ap + n)
+#define b0 bp
+#define b1 (bp + n)
+
+#define xp tp /* 2n + 2 */
+ /* am1 maybe in {xp, n} */
+ /* bm1 maybe in {xp + n, n} */
+#define sp1 (tp + 2*n + 2)
+ /* ap1 maybe in {sp1, n + 1} */
+ /* bp1 maybe in {sp1 + n + 1, n + 1} */
+
+ {
+ mp_srcptr am1, bm1;
+ mp_size_t anm, bnm;
+ mp_ptr so;
+
+ bm1 = b0;
+ bnm = bn;
+ if (LIKELY (an > n))
+ {
+ am1 = xp;
+ cy = mpn_add (xp, a0, n, a1, an - n);
+ MPN_INCR_U (xp, n, cy);
+ anm = n;
+ so = xp + n;
+ if (LIKELY (bn > n))
+ {
+ bm1 = so;
+ cy = mpn_add (so, b0, n, b1, bn - n);
+ MPN_INCR_U (so, n, cy);
+ bnm = n;
+ so += n;
+ }
+ }
+ else
+ {
+ so = xp;
+ am1 = a0;
+ anm = an;
+ }
+
+ mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
+ }
+
+ {
+ int k;
+ mp_srcptr ap1, bp1;
+ mp_size_t anp, bnp;
+
+ bp1 = b0;
+ bnp = bn;
+ if (LIKELY (an > n)) {
+ ap1 = sp1;
+ cy = mpn_sub (sp1, a0, n, a1, an - n);
+ sp1[n] = 0;
+ MPN_INCR_U (sp1, n + 1, cy);
+ anp = n + ap1[n];
+ if (LIKELY (bn > n)) {
+ bp1 = sp1 + n + 1;
+ cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
+ sp1[2*n+1] = 0;
+ MPN_INCR_U (sp1 + n + 1, n + 1, cy);
+ bnp = n + bp1[n];
+ }
+ } else {
+ ap1 = a0;
+ anp = an;
+ }
+
+ if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+ k=0;
+ else
+ {
+ int mask;
+ k = mpn_fft_best_k (n, 0);
+ mask = (1<<k) - 1;
+ while (n & mask) {k--; mask >>=1;};
+ }
+ if (k >= FFT_FIRST_K)
+ xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
+ else if (UNLIKELY (bp1 == b0))
+ {
+ ASSERT (anp + bnp <= 2*n+1);
+ ASSERT (anp + bnp > n);
+ ASSERT (anp >= bnp);
+ mpn_mul (xp, ap1, anp, bp1, bnp);
+ anp = anp + bnp - n;
+ ASSERT (anp <= n || xp[2*n]==0);
+ anp-= anp > n;
+ cy = mpn_sub (xp, xp, n, xp + n, anp);
+ xp[n] = 0;
+ MPN_INCR_U (xp, n+1, cy);
+ }
+ else
+ mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
+ }
+
+ /* Here the CRT recomposition begins.
+
+ xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+ Division by 2 is a bitwise rotation.
+
+ Assumes xp normalised mod (B^n+1).
+
+ The residue class [0] is represented by [B^n-1]; except when
+ both input are ZERO.
+ */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+ cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+ hi = cy << (GMP_NUMB_BITS - 1);
+ cy = 0;
+ /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+ overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+ cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+ hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+ cy >>= 1;
+ /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+ the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+ add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+ cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+ rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+ cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+ cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+ cy += (rp[0]&1);
+ mpn_rshift(rp, rp, n, 1);
+ ASSERT (cy <= 2);
+ hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+ cy >>= 1;
+ /* We can have cy != 0 only if hi = 0... */
+ ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+ rp[n-1] |= hi;
+ /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+ ASSERT (cy <= 1);
+ /* Next increment can not overflow, read the previous comments about cy. */
+ ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+ MPN_INCR_U(rp, n, cy);
+
+ /* Compute the highest half:
+ ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+ */
+ if (UNLIKELY (an + bn < rn))
+ {
+ /* Note that in this case, the only way the result can equal
+ zero mod B^{rn} - 1 is if one of the inputs is zero, and
+ then the output of both the recursive calls and this CRT
+ reconstruction is zero, not B^{rn} - 1. Which is good,
+ since the latter representation doesn't fit in the output
+ area.*/
+ cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
+
+ /* FIXME: This subtraction of the high parts is not really
+ necessary, we do it to get the carry out, and for sanity
+ checking. */
+ cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
+ xp + an + bn - n, rn - (an + bn), cy);
+ ASSERT (an + bn == rn - 1 ||
+ mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
+ cy = mpn_sub_1 (rp, rp, an + bn, cy);
+ ASSERT (cy == (xp + an + bn - n)[0]);
+ }
+ else
+ {
+ cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+ /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+ DECR will affect _at most_ the lowest n limbs. */
+ MPN_DECR_U (rp, 2*n, cy);
+ }
+#undef a0
+#undef a1
+#undef b0
+#undef b1
+#undef xp
+#undef sp1
+ }
+}
+
+mp_size_t
+mpn_mulmod_bnm1_next_size (mp_size_t n)
+{
+ mp_size_t nh;
+
+ if (BELOW_THRESHOLD (n, MULMOD_BNM1_THRESHOLD))
+ return n;
+ if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+ return (n + (2-1)) & (-2);
+ if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+ return (n + (4-1)) & (-4);
+
+ nh = (n + 1) >> 1;
+
+ if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
+ return (n + (8-1)) & (-8);
+
+ return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/neg.c b/vendor/gmp-6.3.0/mpn/generic/neg.c
new file mode 100644
index 0000000..bec2a32
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/neg.c
@@ -0,0 +1,33 @@
+/* mpn_neg - negate an mpn.
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_neg 1
+
+#include "gmp-impl.h"
diff --git a/vendor/gmp-6.3.0/mpn/generic/nussbaumer_mul.c b/vendor/gmp-6.3.0/mpn/generic/nussbaumer_mul.c
new file mode 100644
index 0000000..3e0cf27
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/nussbaumer_mul.c
@@ -0,0 +1,70 @@
+/* mpn_nussbaumer_mul -- Multiply {ap,an} and {bp,bn} using
+ Nussbaumer's negacyclic convolution.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Multiply {ap,an} by {bp,bn}, and put the result in {pp, an+bn} */
+void
+mpn_nussbaumer_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn)
+{
+ mp_size_t rn;
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (an >= bn);
+ ASSERT (bn > 0);
+
+ TMP_MARK;
+
+ if ((ap == bp) && (an == bn))
+ {
+ rn = mpn_sqrmod_bnm1_next_size (2*an);
+ tp = TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (rn, an));
+ mpn_sqrmod_bnm1 (pp, rn, ap, an, tp);
+ }
+ else
+ {
+ rn = mpn_mulmod_bnm1_next_size (an + bn);
+ tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (rn, an, bn));
+ mpn_mulmod_bnm1 (pp, rn, ap, an, bp, bn, tp);
+ }
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/perfpow.c b/vendor/gmp-6.3.0/mpn/generic/perfpow.c
new file mode 100644
index 0000000..9d46477
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/perfpow.c
@@ -0,0 +1,342 @@
+/* mpn_perfect_power_p -- mpn perfect power detection.
+
+ Contributed to the GNU project by Martin Boij.
+
+Copyright 2009, 2010, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define SMALL 20
+#define MEDIUM 100
+
+/* Return non-zero if {np,nn} == {xp,xn} ^ k.
+ Algorithm:
+ For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
+ {xp,xn}^k. Stop if they don't match the s least significant limbs of
+ {np,nn}.
+
+ FIXME: Low xn limbs can be expected to always match, if computed as a mod
+ B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
+ most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
+ compare to {np, nn}. Or use an even cruder approximation based on fix-point
+ base 2 logarithm. */
+static int
+pow_equals (mp_srcptr np, mp_size_t n,
+ mp_srcptr xp,mp_size_t xn,
+ mp_limb_t k, mp_bitcnt_t f,
+ mp_ptr tp)
+{
+ mp_bitcnt_t y, z;
+ mp_size_t bn;
+ mp_limb_t h, l;
+
+ ASSERT (n > 1 || (n == 1 && np[0] > 1));
+ ASSERT (np[n - 1] > 0);
+ ASSERT (xn > 0);
+
+ if (xn == 1 && xp[0] == 1)
+ return 0;
+
+ z = 1 + (n >> 1);
+ for (bn = 1; bn < z; bn <<= 1)
+ {
+ mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
+ if (mpn_cmp (tp, np, bn) != 0)
+ return 0;
+ }
+
+ /* Final check. Estimate the size of {xp,xn}^k before computing the power
+ with full precision. Optimization: It might pay off to make a more
+ accurate estimation of the logarithm of {xp,xn}, rather than using the
+ index of the MSB. */
+
+ MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
+ y -= 1; /* msb_index (xp, xn) */
+
+ umul_ppmm (h, l, k, y);
+ h -= l == 0; --l; /* two-limb decrement */
+
+ z = f - 1; /* msb_index (np, n) */
+ if (h == 0 && l <= z)
+ {
+ mp_limb_t *tp2;
+ mp_size_t i;
+ int ans;
+ mp_limb_t size;
+ TMP_DECL;
+
+ size = l + k;
+ ASSERT_ALWAYS (size >= k);
+
+ TMP_MARK;
+ y = 2 + size / GMP_LIMB_BITS;
+ tp2 = TMP_ALLOC_LIMBS (y);
+
+ i = mpn_pow_1 (tp, xp, xn, k, tp2);
+ if (i == n && mpn_cmp (tp, np, n) == 0)
+ ans = 1;
+ else
+ ans = 0;
+ TMP_FREE;
+ return ans;
+ }
+
+ return 0;
+}
+
+
+/* Return non-zero if N = {np,n} is a kth power.
+ I = {ip,n} = N^(-1) mod B^n. */
+static int
+is_kth_power (mp_ptr rp, mp_srcptr np,
+ mp_limb_t k, mp_srcptr ip,
+ mp_size_t n, mp_bitcnt_t f,
+ mp_ptr tp)
+{
+ mp_bitcnt_t b;
+ mp_size_t rn, xn;
+
+ ASSERT (n > 0);
+ ASSERT ((k & 1) != 0 || k == 2);
+ ASSERT ((np[0] & 1) != 0);
+
+ if (k == 2)
+ {
+ b = (f + 1) >> 1;
+ rn = 1 + b / GMP_LIMB_BITS;
+ if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
+ {
+ rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+ xn = rn;
+ MPN_NORMALIZE (rp, xn);
+ if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
+ return 1;
+
+ /* Check if (2^b - r)^2 == n */
+ mpn_neg (rp, rp, rn);
+ rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+ MPN_NORMALIZE (rp, rn);
+ if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
+ return 1;
+ }
+ }
+ else
+ {
+ b = 1 + (f - 1) / k;
+ rn = 1 + (b - 1) / GMP_LIMB_BITS;
+ mpn_brootinv (rp, ip, rn, k, tp);
+ if ((b % GMP_LIMB_BITS) != 0)
+ rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+ MPN_NORMALIZE (rp, rn);
+ if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
+ return 1;
+ }
+ MPN_ZERO (rp, rn); /* Untrash rp */
+ return 0;
+}
+
+static int
+perfpow (mp_srcptr np, mp_size_t n,
+ mp_limb_t ub, mp_limb_t g,
+ mp_bitcnt_t f, int neg)
+{
+ mp_ptr ip, tp, rp;
+ mp_limb_t k;
+ int ans;
+ mp_bitcnt_t b;
+ gmp_primesieve_t ps;
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT ((np[0] & 1) != 0);
+ ASSERT (ub > 0);
+
+ TMP_MARK;
+ gmp_init_primesieve (&ps);
+ b = (f + 3) >> 1;
+
+ TMP_ALLOC_LIMBS_3 (ip, n, rp, n, tp, 5 * n);
+
+ MPN_ZERO (rp, n);
+
+ /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
+ roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
+ similarly for nth roots. It should be more efficient to compute n^{1/2} as
+ n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
+ similar for kth roots if we switch to an iteration converging to n^{1/k -
+ 1}, and we can then eliminate this binvert call. */
+ mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
+ if (b % GMP_LIMB_BITS)
+ ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+
+ if (neg)
+ gmp_nextprime (&ps);
+
+ ans = 0;
+ if (g > 0)
+ {
+ ub = MIN (ub, g + 1);
+ while ((k = gmp_nextprime (&ps)) < ub)
+ {
+ if ((g % k) == 0)
+ {
+ if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
+ {
+ ans = 1;
+ goto ret;
+ }
+ }
+ }
+ }
+ else
+ {
+ while ((k = gmp_nextprime (&ps)) < ub)
+ {
+ if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
+ {
+ ans = 1;
+ goto ret;
+ }
+ }
+ }
+ ret:
+ TMP_FREE;
+ return ans;
+}
+
+static const unsigned short nrtrial[] = { 100, 500, 1000 };
+
+/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
+ number. */
+static const double logs[] =
+ { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
+
+int
+mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
+{
+ mp_limb_t *nc, factor, g;
+ mp_limb_t exp, d;
+ mp_bitcnt_t twos, count;
+ int ans, where, neg, trial;
+ TMP_DECL;
+
+ neg = n < 0;
+ if (neg)
+ {
+ n = -n;
+ }
+
+ if (n == 0 || (n == 1 && np[0] == 1)) /* Valgrind doesn't like
+ (n <= (np[0] == 1)) */
+ return 1;
+
+ TMP_MARK;
+
+ count = 0;
+
+ twos = mpn_scan1 (np, 0);
+ if (twos != 0)
+ {
+ mp_size_t s;
+ if (twos == 1)
+ {
+ return 0;
+ }
+ s = twos / GMP_LIMB_BITS;
+ if (s + 1 == n && POW2_P (np[s]))
+ {
+ return ! (neg && POW2_P (twos));
+ }
+ count = twos % GMP_LIMB_BITS;
+ n -= s;
+ np += s;
+ if (count > 0)
+ {
+ nc = TMP_ALLOC_LIMBS (n);
+ mpn_rshift (nc, np, n, count);
+ n -= (nc[n - 1] == 0);
+ np = nc;
+ }
+ }
+ g = twos;
+
+ trial = (n > SMALL) + (n > MEDIUM);
+
+ where = 0;
+ factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
+
+ if (factor != 0)
+ {
+ if (count == 0) /* We did not allocate nc yet. */
+ {
+ nc = TMP_ALLOC_LIMBS (n);
+ }
+
+ /* Remove factors found by trialdiv. Optimization: If remove
+ define _itch, we can allocate its scratch just once */
+
+ do
+ {
+ binvert_limb (d, factor);
+
+ /* After the first round we always have nc == np */
+ exp = mpn_remove (nc, &n, np, n, &d, 1, ~(mp_bitcnt_t)0);
+
+ if (g == 0)
+ g = exp;
+ else
+ g = mpn_gcd_1 (&g, 1, exp);
+
+ if (g == 1)
+ {
+ ans = 0;
+ goto ret;
+ }
+
+ if ((n == 1) & (nc[0] == 1))
+ {
+ ans = ! (neg && POW2_P (g));
+ goto ret;
+ }
+
+ np = nc;
+ factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
+ }
+ while (factor != 0);
+ }
+
+ MPN_SIZEINBASE_2EXP(count, np, n, 1); /* log (np) + 1 */
+ d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
+ ans = perfpow (np, n, d, g, count, neg);
+
+ ret:
+ TMP_FREE;
+ return ans;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/perfsqr.c b/vendor/gmp-6.3.0/mpn/generic/perfsqr.c
new file mode 100644
index 0000000..1ea5c84
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/perfsqr.c
@@ -0,0 +1,238 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+ zero otherwise.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "perfsqr.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+
+/* PERFSQR_MOD_* detects non-squares using residue tests.
+
+ A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h. It takes
+ {up,usize} modulo a selected modulus to get a remainder r. For 32-bit or
+ 64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
+ or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
+ used. PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
+ case too.
+
+ PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
+ PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
+ data indicating residues and non-residues modulo those divisors. The
+ table data is in 1 or 2 limbs worth of bits respectively, per the size of
+ each d.
+
+ A "modexact" style remainder is taken to reduce r modulo d.
+ PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
+ the table data. Notice there's just one multiplication by a constant
+ "inv", for each d.
+
+ The modexact doesn't produce a true r%d remainder, instead idx satisfies
+ "-(idx<<PERFSQR_MOD_BITS) == r mod d". Because d is odd, this factor
+ -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
+ accounted for by having the table data suitably permuted.
+
+ The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
+ In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
+ each divisor d meaning the modexact multiply can take place entirely
+ within one limb, giving the compiler the chance to optimize it, in a way
+ that say umul_ppmm would not give.
+
+ There's no need for the divisors d to be prime, in fact gen-psqr.c makes
+ a deliberate effort to combine factors so as to reduce the number of
+ separate tests done on r. But such combining is limited to d <=
+ 2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
+
+ Alternatives:
+
+ It'd be possible to use bigger divisors d, and more than 2 limbs of table
+ data, but this doesn't look like it would be of much help to the prime
+ factors in the usual moduli 2^24-1 or 2^48-1.
+
+ The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
+ just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
+ factors. 2^32-1 and 2^64-1 would be equally easy to calculate, but have
+ fewer prime factors.
+
+ The nails case usually ends up using mpn_mod_1, which is a lot slower
+ than mpn_mod_34lsub1. Perhaps other such special moduli could be found
+ for the nails case. Two-term things like 2^30-2^15-1 might be
+ candidates. Or at worst some on-the-fly de-nailing would allow the plain
+ 2^24-1 to be used. Currently nails are too preliminary to be worried
+ about.
+
+*/
+
+#define PERFSQR_MOD_MASK ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
+
+#define MOD34_BITS (GMP_NUMB_BITS / 4 * 3)
+#define MOD34_MASK ((CNST_LIMB(1) << MOD34_BITS) - 1)
+
+#define PERFSQR_MOD_34(r, up, usize) \
+ do { \
+ (r) = mpn_mod_34lsub1 (up, usize); \
+ (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS); \
+ } while (0)
+
+/* FIXME: The %= here isn't good, and might destroy any savings from keeping
+ the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
+ Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
+ and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our
+ normal case, so lets not worry too much about mod_1. */
+#define PERFSQR_MOD_PP(r, up, usize) \
+ do { \
+ if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD)) \
+ { \
+ (r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM, \
+ PERFSQR_PP_INVERTED); \
+ (r) %= PERFSQR_PP; \
+ } \
+ else \
+ { \
+ (r) = mpn_mod_1 (up, usize, PERFSQR_PP); \
+ } \
+ } while (0)
+
+#define PERFSQR_MOD_IDX(idx, r, d, inv) \
+ do { \
+ mp_limb_t q; \
+ ASSERT ((r) <= PERFSQR_MOD_MASK); \
+ ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1); \
+ ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK); \
+ \
+ q = ((r) * (inv)) & PERFSQR_MOD_MASK; \
+ ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK)); \
+ (idx) = (q * (d)) >> PERFSQR_MOD_BITS; \
+ } while (0)
+
+#define PERFSQR_MOD_1(r, d, inv, mask) \
+ do { \
+ unsigned idx; \
+ ASSERT ((d) <= GMP_LIMB_BITS); \
+ PERFSQR_MOD_IDX(idx, r, d, inv); \
+ TRACE (printf (" PERFSQR_MOD_1 d=%u r=%lu idx=%u\n", \
+ d, r%d, idx)); \
+ if ((((mask) >> idx) & 1) == 0) \
+ { \
+ TRACE (printf (" non-square\n")); \
+ return 0; \
+ } \
+ } while (0)
+
+/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
+ sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */
+#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) \
+ do { \
+ mp_limb_t m; \
+ unsigned idx; \
+ ASSERT ((d) <= 2*GMP_LIMB_BITS); \
+ \
+ PERFSQR_MOD_IDX (idx, r, d, inv); \
+ TRACE (printf (" PERFSQR_MOD_2 d=%u r=%lu idx=%u\n", \
+ d, r%d, idx)); \
+ m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi)); \
+ idx %= GMP_LIMB_BITS; \
+ if (((m >> idx) & 1) == 0) \
+ { \
+ TRACE (printf (" non-square\n")); \
+ return 0; \
+ } \
+ } while (0)
+
+
+int
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+{
+ ASSERT (usize >= 1);
+
+ TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
+
+ /* The first test excludes 212/256 (82.8%) of the perfect square candidates
+ in O(1) time. */
+ {
+ unsigned idx = up[0] % 0x100;
+ if (((sq_res_0x100[idx / GMP_LIMB_BITS]
+ >> (idx % GMP_LIMB_BITS)) & 1) == 0)
+ return 0;
+ }
+
+#if 0
+ /* Check that we have even multiplicity of 2, and then check that the rest is
+ a possible perfect square. Leave disabled until we can determine this
+ really is an improvement. If it is, it could completely replace the
+ simple probe above, since this should throw out more non-squares, but at
+ the expense of somewhat more cycles. */
+ {
+ mp_limb_t lo;
+ int cnt;
+ lo = up[0];
+ while (lo == 0)
+ up++, lo = up[0], usize--;
+ count_trailing_zeros (cnt, lo);
+ if ((cnt & 1) != 0)
+ return 0; /* return of not even multiplicity of 2 */
+ lo >>= cnt; /* shift down to align lowest non-zero bit */
+ if ((lo & 6) != 0)
+ return 0;
+ }
+#endif
+
+
+ /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
+ according to their residues modulo small primes (or powers of
+ primes). See perfsqr.h. */
+ PERFSQR_MOD_TEST (up, usize);
+
+
+ /* For the third and last test, we finally compute the square root,
+ to make sure we've really got a perfect square. */
+ {
+ mp_ptr root_ptr;
+ int res;
+ TMP_DECL;
+
+ TMP_MARK;
+ root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
+
+ /* Iff mpn_sqrtrem returns zero, the square is perfect. */
+ res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+ TMP_FREE;
+
+ return res;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/popham.c b/vendor/gmp-6.3.0/mpn/generic/popham.c
new file mode 100644
index 0000000..87974d7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/popham.c
@@ -0,0 +1,125 @@
+/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
+
+Copyright 1994, 1996, 2000-2002, 2005, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if OPERATION_popcount
+#define FNAME mpn_popcount
+#define POPHAM(u,v) u
+#endif
+
+#if OPERATION_hamdist
+#define FNAME mpn_hamdist
+#define POPHAM(u,v) u ^ v
+#endif
+
+mp_bitcnt_t
+FNAME (mp_srcptr up,
+#if OPERATION_hamdist
+ mp_srcptr vp,
+#endif
+ mp_size_t n) __GMP_NOTHROW
+{
+ mp_bitcnt_t result = 0;
+ mp_limb_t p0, p1, p2, p3, x, p01, p23;
+ mp_size_t i;
+
+ ASSERT (n >= 1); /* Actually, this code handles any n, but some
+ assembly implementations do not. */
+
+ for (i = n >> 2; i != 0; i--)
+ {
+ p0 = POPHAM (up[0], vp[0]);
+ p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3; /* 2 0-2 */
+ p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5); /* 4 0-4 */
+
+ p1 = POPHAM (up[1], vp[1]);
+ p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3; /* 2 0-2 */
+ p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5); /* 4 0-4 */
+
+ p01 = p0 + p1; /* 8 0-8 */
+ p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17); /* 8 0-16 */
+
+ p2 = POPHAM (up[2], vp[2]);
+ p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3; /* 2 0-2 */
+ p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5); /* 4 0-4 */
+
+ p3 = POPHAM (up[3], vp[3]);
+ p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3; /* 2 0-2 */
+ p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5); /* 4 0-4 */
+
+ p23 = p2 + p3; /* 8 0-8 */
+ p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17); /* 8 0-16 */
+
+ x = p01 + p23; /* 8 0-32 */
+ x = (x >> 8) + x; /* 8 0-64 */
+ x = (x >> 16) + x; /* 8 0-128 */
+#if GMP_LIMB_BITS > 32
+ x = ((x >> 32) & 0xff) + (x & 0xff); /* 8 0-256 */
+ result += x;
+#else
+ result += x & 0xff;
+#endif
+ up += 4;
+#if OPERATION_hamdist
+ vp += 4;
+#endif
+ }
+
+ n &= 3;
+ if (n != 0)
+ {
+ x = 0;
+ do
+ {
+ p0 = POPHAM (up[0], vp[0]);
+ p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3; /* 2 0-2 */
+ p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5); /* 4 0-4 */
+ p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17; /* 8 0-8 */
+
+ x += p0;
+ up += 1;
+#if OPERATION_hamdist
+ vp += 1;
+#endif
+ }
+ while (--n);
+
+ x = (x >> 8) + x;
+ x = (x >> 16) + x;
+#if GMP_LIMB_BITS > 32
+ x = (x >> 32) + x;
+#endif
+ result += x & 0xff;
+ }
+
+ return result;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/pow_1.c b/vendor/gmp-6.3.0/mpn/generic/pow_1.c
new file mode 100644
index 0000000..de11cd2
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/pow_1.c
@@ -0,0 +1,135 @@
+/* mpn_pow_1 -- Compute powers R = U^exp.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_size_t
+mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
+{
+ mp_limb_t x;
+ int cnt, i;
+ mp_size_t rn;
+ int par;
+
+ ASSERT (bn >= 1);
+ /* FIXME: Add operand overlap criteria */
+
+ if (exp <= 1)
+ {
+ if (exp == 0)
+ {
+ rp[0] = 1;
+ return 1;
+ }
+ else
+ {
+ MPN_COPY (rp, bp, bn);
+ return bn;
+ }
+ }
+
+ /* Count number of bits in exp, and compute where to put initial square in
+ order to magically get results in the entry rp. Use simple code,
+ optimized for small exp. For large exp, the bignum operations will take
+ so much time that the slowness of this code will be negligible. */
+ par = 0;
+ cnt = GMP_LIMB_BITS;
+ x = exp;
+ do
+ {
+ par ^= x;
+ cnt--;
+ x >>= 1;
+ } while (x != 0);
+ exp <<= cnt;
+
+ if (bn == 1)
+ {
+ mp_limb_t rl, rh, bl = bp[0];
+
+ if ((cnt & 1) != 0)
+ MP_PTR_SWAP (rp, tp);
+
+ umul_ppmm (rh, rl, bl, bl << GMP_NAIL_BITS);
+ rp[0] = rl >> GMP_NAIL_BITS;
+ rp[1] = rh;
+ rn = 1 + (rh != 0);
+
+ for (i = GMP_LIMB_BITS - cnt - 1;;)
+ {
+ exp <<= 1;
+ if ((exp & GMP_LIMB_HIGHBIT) != 0)
+ {
+ rp[rn] = rh = mpn_mul_1 (rp, rp, rn, bl);
+ rn += rh != 0;
+ }
+
+ if (--i == 0)
+ break;
+
+ mpn_sqr (tp, rp, rn);
+ rn = 2 * rn; rn -= tp[rn - 1] == 0;
+ MP_PTR_SWAP (rp, tp);
+ }
+ }
+ else
+ {
+ if (((par ^ cnt) & 1) == 0)
+ MP_PTR_SWAP (rp, tp);
+
+ mpn_sqr (rp, bp, bn);
+ rn = 2 * bn; rn -= rp[rn - 1] == 0;
+
+ for (i = GMP_LIMB_BITS - cnt - 1;;)
+ {
+ exp <<= 1;
+ if ((exp & GMP_LIMB_HIGHBIT) != 0)
+ {
+ rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0);
+ MP_PTR_SWAP (rp, tp);
+ }
+
+ if (--i == 0)
+ break;
+
+ mpn_sqr (tp, rp, rn);
+ rn = 2 * rn; rn -= tp[rn - 1] == 0;
+ MP_PTR_SWAP (rp, tp);
+ }
+ }
+
+ return rn;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/powlo.c b/vendor/gmp-6.3.0/mpn/generic/powlo.c
new file mode 100644
index 0000000..c109512
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/powlo.c
@@ -0,0 +1,188 @@
+/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
+
+Copyright 2007-2009, 2012, 2015, 2016, 2018, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#define getbit(p,bi) \
+ ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, unsigned nbits)
+{
+ unsigned nbits_in_r;
+ mp_limb_t r;
+ mp_size_t i;
+
+ if (bi <= nbits)
+ {
+ return p[0] & (((mp_limb_t) 1 << bi) - 1);
+ }
+ else
+ {
+ bi -= nbits; /* bit index of low bit to extract */
+ i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
+ bi %= GMP_NUMB_BITS; /* bit index in low word */
+ r = p[i] >> bi; /* extract (low) bits */
+ nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
+ if (nbits_in_r < nbits) /* did we get enough bits? */
+ r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
+ return r & (((mp_limb_t) 1 << nbits) - 1);
+ }
+}
+
+static inline unsigned
+win_size (mp_bitcnt_t eb)
+{
+ unsigned k;
+ static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+ ASSERT (eb > 1);
+ for (k = 0; eb > x[k++];)
+ ;
+ return k;
+}
+
+/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
+ Requires that ep[en-1] is non-zero.
+ Uses scratch space tp[3n-1..0], i.e., 3n words. */
+/* We only use n words in the scratch space, we should pass tp + n to
+ mullo/sqrlo as a temporary area, it is needed. */
+void
+mpn_powlo (mp_ptr rp, mp_srcptr bp,
+ mp_srcptr ep, mp_size_t en,
+ mp_size_t n, mp_ptr tp)
+{
+ unsigned cnt;
+ mp_bitcnt_t ebi;
+ unsigned windowsize, this_windowsize;
+ mp_limb_t expbits;
+ mp_limb_t *pp;
+ long i;
+ int flipflop;
+ TMP_DECL;
+
+ ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+
+ TMP_MARK;
+
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+
+ windowsize = win_size (ebi);
+ if (windowsize > 1)
+ {
+ mp_limb_t *this_pp, *last_pp;
+ ASSERT (windowsize < ebi);
+
+ pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)));
+
+ this_pp = pp;
+
+ MPN_COPY (this_pp, bp, n);
+
+ /* Store b^2 in tp. */
+ mpn_sqrlo (tp, bp, n);
+
+ /* Precompute odd powers of b and put them in the temporary area at pp. */
+ i = (1 << (windowsize - 1)) - 1;
+ do
+ {
+ last_pp = this_pp;
+ this_pp += n;
+ mpn_mullo_n (this_pp, last_pp, tp, n);
+ } while (--i != 0);
+
+ expbits = getbits (ep, ebi, windowsize);
+ ebi -= windowsize;
+
+ /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */
+ count_trailing_zeros (cnt, expbits);
+ ebi += cnt;
+ expbits >>= cnt;
+
+ MPN_COPY (rp, pp + n * (expbits >> 1), n);
+ }
+ else
+ {
+ pp = tp + n;
+ MPN_COPY (pp, bp, n);
+ MPN_COPY (rp, bp, n);
+ --ebi;
+ }
+
+ flipflop = 0;
+
+ do
+ {
+ while (getbit (ep, ebi) == 0)
+ {
+ mpn_sqrlo (tp, rp, n);
+ MP_PTR_SWAP (rp, tp);
+ flipflop = ! flipflop;
+ if (--ebi == 0)
+ goto done;
+ }
+
+ /* The next bit of the exponent is 1. Now extract the largest block of
+ bits <= windowsize, and such that the least significant bit is 1. */
+
+ expbits = getbits (ep, ebi, windowsize);
+ this_windowsize = MIN (windowsize, ebi);
+
+ count_trailing_zeros (cnt, expbits);
+ this_windowsize -= cnt;
+ ebi -= this_windowsize;
+ expbits >>= cnt;
+
+ while (this_windowsize > 1)
+ {
+ mpn_sqrlo (tp, rp, n);
+ mpn_sqrlo (rp, tp, n);
+ this_windowsize -= 2;
+ }
+
+ if (this_windowsize != 0)
+ mpn_sqrlo (tp, rp, n);
+ else
+ {
+ MP_PTR_SWAP (rp, tp);
+ flipflop = ! flipflop;
+ }
+
+ mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n);
+ } while (ebi != 0);
+
+ done:
+ if (flipflop)
+ MPN_COPY (tp, rp, n);
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/powm.c b/vendor/gmp-6.3.0/mpn/generic/powm.c
new file mode 100644
index 0000000..1e30f2f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/powm.c
@@ -0,0 +1,1003 @@
+/* mpn_powm -- Compute R = U^E mod M.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+ 1. W <- U
+
+ 2. T <- (B^n * U) mod M Convert to REDC form
+
+ 3. Compute table U^1, U^3, U^5... of E-dependent size
+
+ 4. While there are more bits in E
+ W <- power left-to-right base-k
+
+
+ TODO:
+
+ * Make getbits a macro, thereby allowing it to update the index operand.
+ That will simplify the code using getbits. (Perhaps make getbits' sibling
+ getbit then have similar form, for symmetry.)
+
+ * Write an itch function. Or perhaps get rid of tp parameter since the huge
+ pp area is allocated locally anyway?
+
+ * Choose window size without looping. (Superoptimize or think(tm).)
+
+ * Handle small bases with initial, reduction-free exponentiation.
+
+ * Call new division functions, not mpn_tdiv_qr.
+
+ * Consider special code for one-limb M.
+
+ * How should we handle the redc1/redc2/redc_n choice?
+ - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
+ - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
+ - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
+ This disregards the addmul_N constant term, but we could think of
+ that as part of the respective mullo.
+
+ * When U (the base) is small, we should start the exponentiation with plain
+ operations, then convert that partial result to REDC form.
+
+ * When U is just one limb, should it be handled without the k-ary tricks?
+ We could keep a factor of B^n in W, but use U' = BU as base. After
+ multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
+ mod M.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#undef MPN_REDC_0
+#define MPN_REDC_0(r0, u1, u0, m0, invm) \
+ do { \
+ mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy; \
+ _u0 = (u0); \
+ _m0 = (m0); \
+ umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK); \
+ ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0); \
+ _u1 = (u1); \
+ _r0 = _u1 - _p1; \
+ _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */ \
+ (r0) = _r0 & GMP_NUMB_MASK; \
+ } while (0)
+
+#undef MPN_REDC_1
+#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#define MPN_REDC_1(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \
+ if (cy != 0) \
+ mpn_sub_n (rp, up + n, mp, n); \
+ else \
+ MPN_COPY (rp, up + n, n); \
+ } while (0)
+#else
+#define MPN_REDC_1(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_1 (rp, up, mp, n, invm); \
+ if (cy != 0) \
+ mpn_sub_n (rp, rp, mp, n); \
+ } while (0)
+#endif
+
+#undef MPN_REDC_2
+#define MPN_REDC_2(rp, up, mp, n, mip) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_2 (rp, up, mp, n, mip); \
+ if (cy != 0) \
+ mpn_sub_n (rp, rp, mp, n); \
+ } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
+#define getbit(p,bi) \
+ ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+ int nbits_in_r;
+ mp_limb_t r;
+ mp_size_t i;
+
+ if (bi <= nbits)
+ {
+ return p[0] & (((mp_limb_t) 1 << bi) - 1);
+ }
+ else
+ {
+ bi -= nbits; /* bit index of low bit to extract */
+ i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
+ bi %= GMP_NUMB_BITS; /* bit index in low word */
+ r = p[i] >> bi; /* extract (low) bits */
+ nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
+ if (nbits_in_r < nbits) /* did we get enough bits? */
+ r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
+ return r & (((mp_limb_t) 1 << nbits) - 1);
+ }
+}
+
+static inline int
+win_size (mp_bitcnt_t eb)
+{
+ int k;
+ static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+ for (k = 0; eb > x[k++]; )
+ ;
+ return k;
+}
+
+/* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
+{
+ mp_ptr tp, qp;
+ TMP_DECL;
+ TMP_MARK;
+
+ TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
+
+ MPN_ZERO (tp, n);
+ MPN_COPY (tp + n, up, un);
+ mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+ TMP_FREE;
+}
+
+#if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
+#undef mpn_rsblsh1_n_ip2
+#if HAVE_NATIVE_mpn_rsblsh1_n
+#define mpn_rsblsh1_n_ip2(a,b,n) mpn_rsblsh1_n(a,b,a,n)
+#else
+#define mpn_rsblsh1_n_ip2(a,b,n) \
+ do \
+ { \
+ mpn_lshift (a, a, n, 1); \
+ mpn_sub_n (a, a, b, n); \
+ } while (0)
+#endif
+#endif
+
+#define INNERLOOP2 \
+ do \
+ { \
+ MPN_SQR (tp, rp, n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ if (mpn_cmp (rp, mp, n) >= 0) \
+ ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n)); \
+ if (getbit (ep, ebi) != 0) \
+ { \
+ if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0) \
+ ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1)); \
+ else \
+ mpn_rsblsh1_n_ip2 (rp, mp, n); \
+ } \
+ } while (--ebi != 0)
+
+/* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
+ Requires that mp[n-1..0] is odd and > 1.
+ Requires that ep[en-1..0] is > 1.
+ Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
+static void
+mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
+ mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+ mp_limb_t ip[2], *mip;
+ mp_bitcnt_t ebi, mbi, tbi;
+ mp_size_t tn;
+ int count;
+ TMP_DECL;
+
+ ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+ ASSERT (n > 0 && (mp[0] & 1) != 0);
+
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+ MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
+
+ if (LIKELY (mbi <= GMP_NUMB_MAX))
+ {
+ count_leading_zeros(count, (mp_limb_t) mbi);
+ count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS);
+ }
+ else
+ {
+ mp_bitcnt_t tc = mbi;
+
+ count = 0;
+ do { ++count; } while ((tc >>= 1) != 0);
+ }
+
+ tbi = getbits (ep, ebi, count);
+ if (tbi >= mbi)
+ {
+ --count;
+ ASSERT ((tbi >> count) == 1);
+ tbi >>= 1;
+ ASSERT (tbi < mbi);
+ ASSERT (ebi > count);
+ }
+ else if (ebi <= count)
+ {
+ MPN_FILL (rp, n, 0);
+ rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+ return;
+ }
+ ebi -= count;
+
+ if (n == 1)
+ {
+ mp_limb_t r0, m0, invm;
+ m0 = *mp;
+
+ /* redcify (rp, tp, tn + 1, mp, n); */
+ /* TODO: test direct use of udiv_qrnnd */
+ ASSERT (tbi < GMP_LIMB_BITS);
+ tp[1] = CNST_LIMB (1) << tbi;
+ tp[0] = CNST_LIMB (0);
+ r0 = mpn_mod_1 (tp, 2, m0);
+
+ binvert_limb (invm, m0);
+ do
+ {
+ mp_limb_t t0, t1, t2;
+ /* MPN_SQR (tp, rp, n); */
+ umul_ppmm (t1, t0, r0, r0);
+ /* MPN_REDUCE (rp, tp, mp, n, mip); */
+ MPN_REDC_0(r0, t1, t0, m0, invm);
+
+ t2 = r0 << 1;
+ t2 = r0 > (m0 >> 1) ? t2 - m0 : t2;
+ r0 = getbit (ep, ebi) != 0 ? t2 : r0;
+ } while (--ebi != 0);
+
+ /* tp[1] = 0; tp[0] = r0; */
+ /* MPN_REDUCE (rp, tp, mp, n, mip); */
+ MPN_REDC_0(*rp, 0, r0, m0, invm);
+
+ return;
+ }
+
+ TMP_MARK;
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (ip[0], mp[0]);
+ ip[0] = -ip[0];
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+ mip = ip;
+ mpn_binvert (ip, mp, 2, tp);
+ ip[0] = -ip[0]; ip[1] = ~ip[1];
+ }
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (ip[0], mp[0]);
+ ip[0] = -ip[0];
+ }
+#endif
+ else
+ {
+ mip = TMP_ALLOC_LIMBS (n);
+ mpn_binvert (mip, mp, n, tp);
+ }
+
+ tn = tbi / GMP_LIMB_BITS;
+ MPN_ZERO (tp, tn);
+ tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+
+ redcify (rp, tp, tn + 1, mp, n);
+
+#if WANT_REDC_2
+ if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+ {
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+
+#else /* WANT_REDC_2 */
+
+ if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+ {
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+ if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+#endif /* WANT_REDC_2 */
+
+ MPN_COPY (tp, rp, n);
+ MPN_FILL (tp + n, n, 0);
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, ip[0]);
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, ip[0]);
+#endif
+ else
+ mpn_redc_n (rp, tp, mp, n, mip);
+
+ if (mpn_cmp (rp, mp, n) >= 0)
+ mpn_sub_n (rp, rp, mp, n);
+
+ TMP_FREE;
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+ Requires that mp[n-1..0] is odd.
+ Requires that ep[en-1..0] is > 1.
+ Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
+void
+mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+ mp_srcptr ep, mp_size_t en,
+ mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+ mp_limb_t ip[2], *mip;
+ int cnt;
+ mp_bitcnt_t ebi;
+ int windowsize, this_windowsize;
+ mp_limb_t expbits;
+ mp_ptr pp, this_pp;
+ long i;
+ TMP_DECL;
+
+ ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+ ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+ if (bn == 1 && bp[0] == 2)
+ {
+ mpn_2powm (rp, ep, en, mp, n, tp);
+ return;
+ }
+
+ TMP_MARK;
+
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+
+#if 0
+ if (bn < n)
+ {
+ /* Do the first few exponent bits without mod reductions,
+ until the result is greater than the mod argument. */
+ for (;;)
+ {
+ mpn_sqr (tp, this_pp, tn);
+ tn = tn * 2 - 1, tn += tp[tn] != 0;
+ if (getbit (ep, ebi) != 0)
+ mpn_mul (..., tp, tn, bp, bn);
+ ebi--;
+ }
+ }
+#endif
+
+ windowsize = win_size (ebi);
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+ mip = ip;
+ mpn_binvert (mip, mp, 2, tp);
+ mip[0] = -mip[0]; mip[1] = ~mip[1];
+ }
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+ }
+#endif
+ else
+ {
+ mip = TMP_ALLOC_LIMBS (n);
+ mpn_binvert (mip, mp, n, tp);
+ }
+
+ pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
+
+ this_pp = pp;
+ redcify (this_pp, bp, bn, mp, n);
+
+ /* Store b^2 at rp. */
+ mpn_sqr (tp, this_pp, n);
+#if 0
+ if (n == 1) {
+ MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]);
+ } else
+#endif
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+ else
+ mpn_redc_n (rp, tp, mp, n, mip);
+
+ /* Precompute odd powers of b and put them in the temporary area at pp. */
+ for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
+#if 1
+ if (n == 1) {
+ umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
+ ++this_pp ;
+ MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]);
+ } else
+#endif
+ {
+ mpn_mul_n (tp, this_pp, rp, n);
+ this_pp += n;
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ MPN_REDC_2 (this_pp, tp, mp, n, mip);
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
+#endif
+ else
+ mpn_redc_n (this_pp, tp, mp, n, mip);
+ }
+
+ expbits = getbits (ep, ebi, windowsize);
+ ebi -= windowsize;
+
+ /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */
+ count_trailing_zeros (cnt, expbits);
+ ebi += cnt;
+ expbits >>= cnt;
+
+ MPN_COPY (rp, pp + n * (expbits >> 1), n);
+
+#define INNERLOOP \
+ while (ebi != 0) \
+ { \
+ while (getbit (ep, ebi) == 0) \
+ { \
+ MPN_SQR (tp, rp, n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ if (--ebi == 0) \
+ goto done; \
+ } \
+ \
+ /* The next bit of the exponent is 1. Now extract the largest \
+ block of bits <= windowsize, and such that the least \
+ significant bit is 1. */ \
+ \
+ expbits = getbits (ep, ebi, windowsize); \
+ this_windowsize = MIN (ebi, windowsize); \
+ \
+ count_trailing_zeros (cnt, expbits); \
+ this_windowsize -= cnt; \
+ ebi -= this_windowsize; \
+ expbits >>= cnt; \
+ \
+ do \
+ { \
+ MPN_SQR (tp, rp, n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ } \
+ while (--this_windowsize != 0); \
+ \
+ MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ }
+
+
+ if (n == 1)
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b))
+#define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a))
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip))
+ INNERLOOP;
+ }
+ else
+#if WANT_REDC_2
+ if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+ {
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ }
+
+#else /* WANT_REDC_2 */
+
+ if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+ {
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+ if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ }
+#endif /* WANT_REDC_2 */
+
+ done:
+
+ MPN_COPY (tp, rp, n);
+ MPN_ZERO (tp + n, n);
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+ else
+ mpn_redc_n (rp, tp, mp, n, mip);
+
+ if (mpn_cmp (rp, mp, n) >= 0)
+ mpn_sub_n (rp, rp, mp, n);
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/pre_divrem_1.c b/vendor/gmp-6.3.0/mpn/generic/pre_divrem_1.c
new file mode 100644
index 0000000..3b29d77
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/pre_divrem_1.c
@@ -0,0 +1,145 @@
+/* mpn_preinv_divrem_1 -- mpn by limb division with pre-inverted divisor.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Don't bloat a shared library with unused code. */
+#if USE_PREINV_DIVREM_1
+
+/* Same test here for skipping one divide step as in mpn_divrem_1.
+
+ The main reason for a separate shift==0 case is that not all CPUs give
+ zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
+ code used on shift==0. shift==0 is also reasonably common in mp_bases
+ big_base, for instance base==10 on a 64-bit limb.
+
+ Under shift!=0 it would be possible to call mpn_lshift to adjust the
+ dividend all in one go (into the quotient space say), rather than
+ limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
+ than what the compiler can generate for EXTRACT. But this is left to CPU
+ specific implementations to consider, especially since EXTRACT isn't on
+ the dependent chain.
+
+ If size==0 then the result is simply xsize limbs of zeros, but nothing
+ special is done for that, since it wouldn't be a usual call, and
+ certainly never arises from mpn_get_str which is our main caller. */
+
+mp_limb_t
+mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
+ mp_srcptr ap, mp_size_t size, mp_limb_t d_unnorm,
+ mp_limb_t dinv, int shift)
+{
+ mp_limb_t ahigh, qhigh, r;
+ mp_size_t i;
+ mp_limb_t n1, n0;
+ mp_limb_t d;
+
+ ASSERT (xsize >= 0);
+ ASSERT (size >= 1);
+ ASSERT (d_unnorm != 0);
+#if WANT_ASSERT
+ {
+ int want_shift;
+ mp_limb_t want_dinv;
+ count_leading_zeros (want_shift, d_unnorm);
+ ASSERT (shift == want_shift);
+ invert_limb (want_dinv, d_unnorm << shift);
+ ASSERT (dinv == want_dinv);
+ }
+#endif
+ /* FIXME: What's the correct overlap rule when xsize!=0? */
+ ASSERT (MPN_SAME_OR_SEPARATE_P (qp+xsize, ap, size));
+
+ ahigh = ap[size-1];
+ d = d_unnorm << shift;
+ qp += (size + xsize - 1); /* dest high limb */
+
+ if (shift == 0)
+ {
+ /* High quotient limb is 0 or 1, and skip a divide step. */
+ r = ahigh;
+ qhigh = (r >= d);
+ r = (qhigh ? r-d : r);
+ *qp-- = qhigh;
+ size--;
+
+ for (i = size-1; i >= 0; i--)
+ {
+ n0 = ap[i];
+ udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+ qp--;
+ }
+ }
+ else
+ {
+ r = 0;
+ if (ahigh < d_unnorm)
+ {
+ r = ahigh << shift;
+ *qp-- = 0;
+ size--;
+ if (size == 0)
+ goto done_integer;
+ }
+
+ n1 = ap[size-1];
+ r |= n1 >> (GMP_LIMB_BITS - shift);
+
+ for (i = size-2; i >= 0; i--)
+ {
+ ASSERT (r < d);
+ n0 = ap[i];
+ udiv_qrnnd_preinv (*qp, r, r,
+ ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
+ d, dinv);
+ qp--;
+ n1 = n0;
+ }
+ udiv_qrnnd_preinv (*qp, r, r, n1 << shift, d, dinv);
+ qp--;
+ }
+
+ done_integer:
+ for (i = 0; i < xsize; i++)
+ {
+ udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+ qp--;
+ }
+
+ return r >> shift;
+}
+
+#endif /* USE_PREINV_DIVREM_1 */
diff --git a/vendor/gmp-6.3.0/mpn/generic/pre_mod_1.c b/vendor/gmp-6.3.0/mpn/generic/pre_mod_1.c
new file mode 100644
index 0000000..78ae308
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/pre_mod_1.c
@@ -0,0 +1,61 @@
+/* mpn_preinv_mod_1 (up, un, d, dinv) -- Divide (UP,,UN) by the normalized D.
+ DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.
+ Return the single-limb remainder.
+
+Copyright 1991, 1993, 1994, 2000-2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function used to be documented, but is now considered obsolete. It
+ continues to exist for binary compatibility, even when not required
+ internally. */
+
+mp_limb_t
+mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
+{
+ mp_size_t i;
+ mp_limb_t n0, r;
+
+ ASSERT (un >= 1);
+ ASSERT (d & GMP_LIMB_HIGHBIT);
+
+ r = up[un - 1];
+ if (r >= d)
+ r -= d;
+
+ for (i = un - 2; i >= 0; i--)
+ {
+ n0 = up[i];
+ udiv_rnnd_preinv (r, r, n0, d, dinv);
+ }
+ return r;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/random.c b/vendor/gmp-6.3.0/mpn/generic/random.c
new file mode 100644
index 0000000..485f9eb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/random.c
@@ -0,0 +1,50 @@
+/* mpn_random -- Generate random numbers.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_random (mp_ptr ptr, mp_size_t size)
+{
+ gmp_randstate_ptr rands;
+
+ /* FIXME: Is size==0 supposed to be allowed? */
+ ASSERT (size >= 0);
+
+ if (size == 0)
+ return;
+
+ rands = RANDS;
+ _gmp_rand (ptr, rands, size * GMP_NUMB_BITS);
+
+ /* Make sure the most significant limb is non-zero. */
+ while (ptr[size-1] == 0)
+ _gmp_rand (&ptr[size-1], rands, GMP_NUMB_BITS);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/random2.c b/vendor/gmp-6.3.0/mpn/generic/random2.c
new file mode 100644
index 0000000..1eede67
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/random2.c
@@ -0,0 +1,105 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+ of ones and zeroes. Suitable for border testing.
+
+Copyright 1992-1994, 1996, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
+
+/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
+ Thus, we get the same random number sequence in the common cases.
+ FIXME: We should always generate the same random number sequence! */
+#if GMP_NUMB_BITS < 32
+#define BITS_PER_RANDCALL GMP_NUMB_BITS
+#else
+#define BITS_PER_RANDCALL 32
+#endif
+
+void
+mpn_random2 (mp_ptr rp, mp_size_t n)
+{
+ gmp_randstate_ptr rstate = RANDS;
+ int bit_pos; /* bit number of least significant bit where
+ next bit field to be inserted */
+ mp_limb_t ran, ranm; /* buffer for random bits */
+
+ /* FIXME: Is n==0 supposed to be allowed? */
+ ASSERT (n >= 0);
+
+ _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+ ran = ranm;
+
+ /* Start off at a random bit position in the most significant limb. */
+ bit_pos = ran % GMP_NUMB_BITS;
+
+ gmp_rrandomb (rp, rstate, n * GMP_NUMB_BITS - bit_pos);
+}
+
+static void
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+ mp_bitcnt_t bi;
+ mp_limb_t ranm; /* buffer for random bits */
+ unsigned cap_chunksize, chunksize;
+ mp_size_t i;
+
+ /* Set entire result to 111..1 */
+ i = BITS_TO_LIMBS (nbits) - 1;
+ rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
+ for (i = i - 1; i >= 0; i--)
+ rp[i] = GMP_NUMB_MAX;
+
+ _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+ cap_chunksize = nbits / (ranm % 4 + 1);
+ cap_chunksize += cap_chunksize == 0; /* make it at least 1 */
+
+ bi = nbits;
+
+ for (;;)
+ {
+ _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+ chunksize = 1 + ranm % cap_chunksize;
+ bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+ if (bi == 0)
+ break; /* low chunk is ...1 */
+
+ rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;
+
+ _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+ chunksize = 1 + ranm % cap_chunksize;
+ bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+ mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);
+
+ if (bi == 0)
+ break; /* low chunk is ...0 */
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/redc_1.c b/vendor/gmp-6.3.0/mpn/generic/redc_1.c
new file mode 100644
index 0000000..eab128f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/redc_1.c
@@ -0,0 +1,56 @@
+/* mpn_redc_1. Set rp[] <- up[]/R^n mod mp[]. Clobber up[].
+ mp[] is n limbs; up[] is 2n limbs.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000-2002, 2004, 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+ mp_size_t j;
+ mp_limb_t cy;
+
+ ASSERT (n > 0);
+ ASSERT_MPN (up, 2*n);
+
+ for (j = n - 1; j >= 0; j--)
+ {
+ cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+ ASSERT (up[0] == 0);
+ up[0] = cy;
+ up++;
+ }
+
+ cy = mpn_add_n (rp, up, up - n, n);
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/redc_2.c b/vendor/gmp-6.3.0/mpn/generic/redc_2.c
new file mode 100644
index 0000000..8d15589
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/redc_2.c
@@ -0,0 +1,110 @@
+/* mpn_redc_2. Set rp[] <- up[]/R^n mod mp[]. Clobber up[].
+ mp[] is n limbs; up[] is 2n limbs.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000-2002, 2004, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS != 0
+you lose
+#endif
+
+/* For testing purposes, define our own mpn_addmul_2 if there is none already
+ available. */
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#undef mpn_addmul_2
+static mp_limb_t
+mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
+{
+ rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
+ return mpn_addmul_1 (rp + 1, up, n, vp[1]);
+}
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+ && defined (__ia64) && W_TYPE_SIZE == 64
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+ do { \
+ mp_limb_t _ph, _pl; \
+ __asm__ ("xma.hu %0 = %3, %5, f0\n\t" \
+ "xma.l %1 = %3, %5, f0\n\t" \
+ ";;\n\t" \
+ "xma.l %0 = %3, %4, %0\n\t" \
+ ";;\n\t" \
+ "xma.l %0 = %2, %5, %0" \
+ : "=&f" (ph), "=&f" (pl) \
+ : "f" (uh), "f" (ul), "f" (vh), "f" (vl)); \
+ } while (0)
+#endif
+
+#ifndef umul2low
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+ do { \
+ mp_limb_t _ph, _pl; \
+ umul_ppmm (_ph, _pl, ul, vl); \
+ (ph) = _ph + (ul) * (vh) + (uh) * (vl); \
+ (pl) = _pl; \
+ } while (0)
+#endif
+
+mp_limb_t
+mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
+{
+ mp_limb_t q[2];
+ mp_size_t j;
+ mp_limb_t upn;
+ mp_limb_t cy;
+
+ ASSERT (n > 0);
+ ASSERT_MPN (up, 2*n);
+
+ if ((n & 1) != 0)
+ {
+ up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);
+ up++;
+ }
+
+ for (j = n - 2; j >= 0; j -= 2)
+ {
+ umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);
+ upn = up[n]; /* mpn_addmul_2 overwrites this */
+ up[1] = mpn_addmul_2 (up, mp, n, q);
+ up[0] = up[n];
+ up[n] = upn;
+ up += 2;
+ }
+
+ cy = mpn_add_n (rp, up, up - n, n);
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/redc_n.c b/vendor/gmp-6.3.0/mpn/generic/redc_n.c
new file mode 100644
index 0000000..0c94b7c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/redc_n.c
@@ -0,0 +1,80 @@
+/* mpn_redc_n. Set rp[] <- up[]/R^n mod mp[]. Clobber up[].
+ mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ TODO
+
+ * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a
+ future mpn_mulhi) for the range we will be called. Follow up that
+ assumption.
+
+ * Decrease scratch usage.
+
+ * Consider removing the residue canonicalisation.
+*/
+
+void
+mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
+{
+ mp_ptr xp, yp, scratch;
+ mp_limb_t cy;
+ mp_size_t rn;
+ TMP_DECL;
+ TMP_MARK;
+
+ ASSERT (n > 8);
+
+ rn = mpn_mulmod_bnm1_next_size (n);
+
+ scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
+
+ xp = scratch;
+ mpn_mullo_n (xp, up, ip, n);
+
+ yp = scratch + n;
+ mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);
+
+ ASSERT_ALWAYS (2 * n > rn); /* could handle this */
+
+ cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn); /* undo wrap around */
+ MPN_DECR_U (yp + 2*n - rn, rn, cy);
+
+ cy = mpn_sub_n (rp, up + n, yp + n, n);
+ if (cy != 0)
+ mpn_add_n (rp, rp, mp, n);
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/remove.c b/vendor/gmp-6.3.0/mpn/generic/remove.c
new file mode 100644
index 0000000..cbb0742
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/remove.c
@@ -0,0 +1,182 @@
+/* mpn_remove -- divide out all multiples of odd mpn number from another mpn
+ number.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2012-2014, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if GMP_LIMB_BITS > 50
+#define LOG 50
+#else
+#define LOG GMP_LIMB_BITS
+#endif
+
+
+/* Input: U = {up,un}, V = {vp,vn} must be odd, cap
+ Ouput W = {wp,*wn} allocation need is exactly *wn
+
+ Set W = U / V^k, where k is the largest integer <= cap such that the
+ division yields an integer.
+
+ FIXME: We currently allow any operand overlap. This is quite non mpn-ish
+ and might be changed, since it cost significant temporary space.
+ * If we require W to have space for un + 1 limbs, we could save qp or qp2
+ (but we will still need to copy things into wp 50% of the time).
+ * If we allow ourselves to clobber U, we could save the other of qp and qp2,
+ and the initial COPY (but also here we would need un + 1 limbs).
+*/
+
+/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface. This need
+ indicates a flaw in the current itch mechanism: Which operands not greater
+ than un,un will incur the worst itch? We need a parallel foo_maxitch set
+ of functions. */
+static void
+mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+{
+ mp_ptr scratch_out;
+ TMP_DECL;
+
+ TMP_MARK;
+ scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
+ mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
+
+ TMP_FREE;
+}
+
+mp_bitcnt_t
+mpn_remove (mp_ptr wp, mp_size_t *wn,
+ mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn,
+ mp_bitcnt_t cap)
+{
+ mp_srcptr pwpsp[LOG];
+ mp_size_t pwpsn[LOG];
+ mp_size_t npowers;
+ mp_ptr tp, qp, np, qp2;
+ mp_srcptr pp;
+ mp_size_t pn, nn, qn, i;
+ mp_bitcnt_t pwr;
+ TMP_DECL;
+
+ ASSERT (un > 0);
+ ASSERT (vn > 0);
+ ASSERT (vp[0] % 2 != 0); /* 2-adic division wants odd numbers */
+ ASSERT (vn > 1 || vp[0] > 1); /* else we would loop indefinitely */
+
+ TMP_MARK;
+
+ TMP_ALLOC_LIMBS_3 (qp, un + 1, /* quotient, alternating */
+ qp2, un + 1, /* quotient, alternating */
+ tp, (un + 1 + vn) / 2); /* remainder */
+ pp = vp;
+ pn = vn;
+
+ MPN_COPY (qp, up, un);
+ qn = un;
+
+ npowers = 0;
+ while (qn >= pn)
+ {
+ qp[qn] = 0;
+ mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
+ if (!mpn_zero_p (tp, pn))
+ {
+ if (mpn_cmp (tp, pp, pn) != 0)
+ break; /* could not divide by V^npowers */
+ }
+
+ MP_PTR_SWAP (qp, qp2);
+ qn = qn - pn;
+ mpn_neg (qp, qp, qn+1);
+
+ qn += qp[qn] != 0;
+
+ pwpsp[npowers] = pp;
+ pwpsn[npowers] = pn;
+ ++npowers;
+
+ if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
+ break;
+
+ nn = 2 * pn - 1; /* next power will be at least this large */
+ if (nn > qn)
+ break; /* next power would be overlarge */
+
+ if (npowers == 1) /* Alloc once, but only if it's needed */
+ np = TMP_ALLOC_LIMBS (qn + LOG); /* powers of V */
+ else
+ np += pn;
+
+ mpn_sqr (np, pp, pn);
+ pn = nn + (np[nn] != 0);
+ pp = np;
+ }
+
+ pwr = ((mp_bitcnt_t) 1 << npowers) - 1;
+
+ for (i = npowers; --i >= 0;)
+ {
+ pn = pwpsn[i];
+ if (qn < pn)
+ continue;
+
+ if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
+ continue; /* V^i would bring us past cap */
+
+ qp[qn] = 0;
+ mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn);
+ if (!mpn_zero_p (tp, pn))
+ {
+ if (mpn_cmp (tp, pwpsp[i], pn) != 0)
+ continue; /* could not divide by V^i */
+ }
+
+ MP_PTR_SWAP (qp, qp2);
+ qn = qn - pn;
+ mpn_neg (qp, qp, qn+1);
+
+ qn += qp[qn] != 0;
+
+ pwr += (mp_bitcnt_t) 1 << i;
+ }
+
+ MPN_COPY (wp, qp, qn);
+ *wn = qn;
+
+ TMP_FREE;
+
+ return pwr;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/rootrem.c b/vendor/gmp-6.3.0/mpn/generic/rootrem.c
new file mode 100644
index 0000000..a79099e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/rootrem.c
@@ -0,0 +1,515 @@
+/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
+ store the truncated integer part at rootp and the remainder at remp.
+
+ Contributed by Paul Zimmermann (algorithm) and
+ Paul Zimmermann and Torbjorn Granlund (implementation).
+ Marco Bodrato wrote logbased_root to seed the loop.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* FIXME:
+ This implementation is not optimal when remp == NULL, since the complexity
+ is M(n), whereas it should be M(n/k) on average.
+*/
+
+#include <stdio.h> /* for NULL */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
+ mp_limb_t, int);
+
+#define MPN_RSHIFT(rp,up,un,cnt) \
+ do { \
+ if ((cnt) != 0) \
+ mpn_rshift (rp, up, un, cnt); \
+ else \
+ { \
+ MPN_COPY_INCR (rp, up, un); \
+ } \
+ } while (0)
+
+#define MPN_LSHIFT(cy,rp,up,un,cnt) \
+ do { \
+ if ((cnt) != 0) \
+ cy = mpn_lshift (rp, up, un, cnt); \
+ else \
+ { \
+ MPN_COPY_DECR (rp, up, un); \
+ cy = 0; \
+ } \
+ } while (0)
+
+
+/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
+ If remp <> NULL, put in {remp, un} the remainder.
+ Return the size (in limbs) of the remainder if remp <> NULL,
+ or a non-zero value iff the remainder is non-zero when remp = NULL.
+ Assumes:
+ (a) up[un-1] is not zero
+ (b) rootp has at least space for ceil(un/k) limbs
+ (c) remp has at least space for un limbs (in case remp <> NULL)
+ (d) the operands do not overlap.
+
+ The auxiliary memory usage is 3*un+2 if remp = NULL,
+ and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment.
+*/
+mp_size_t
+mpn_rootrem (mp_ptr rootp, mp_ptr remp,
+ mp_srcptr up, mp_size_t un, mp_limb_t k)
+{
+ ASSERT (un > 0);
+ ASSERT (up[un - 1] != 0);
+ ASSERT (k > 1);
+
+ if (UNLIKELY (k == 2))
+ return mpn_sqrtrem (rootp, remp, up, un);
+ /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
+ if (remp == NULL && (un + 2) / 3 > k)
+ /* Pad {up,un} with k zero limbs. This will produce an approximate root
+ with one more limb, allowing us to compute the exact integral result. */
+ {
+ mp_ptr sp, wp;
+ mp_size_t rn, sn, wn;
+ TMP_DECL;
+ TMP_MARK;
+ wn = un + k;
+ sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
+ TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
+ sp, sn); /* approximate root of padded input */
+ MPN_COPY (wp + k, up, un);
+ MPN_FILL (wp, k, 0);
+ rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
+ /* The approximate root S = {sp,sn} is either the correct root of
+ {sp,sn}, or 1 too large. Thus unless the least significant limb of
+ S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
+ limb. (In case sp[0]=1, we can deduce the root, but not decide
+ whether it is exact or not.) */
+ MPN_COPY (rootp, sp + 1, sn - 1);
+ TMP_FREE;
+ return rn;
+ }
+ else
+ {
+ return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
+ }
+}
+
+#define LOGROOT_USED_BITS 8
+#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
+#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
+/* Puts in *rootp some bits of the k^nt root of the number
+ 2^bitn * 1.op ; where op represents the "fractional" bits.
+
+ The returned value is the number of bits of the root minus one;
+ i.e. an approximation of the root will be
+ (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
+
+ Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
+ one is not counted).
+ */
+static unsigned
+logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
+{
+ /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
+ static const
+ unsigned char vlog[] = {1, 2, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 18, 19, 21, 22,
+ 23, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 42, 43,
+ 44, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 58, 59, 61, 62, 63,
+ 64, 65, 67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 80, 81, 82,
+ 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99, 100,
+ 101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+ 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
+ 194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
+ 207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
+ 220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
+ 232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
+ 245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
+
+ /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
+ static const
+ unsigned char vexp[] = {0, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 10, 11,
+ 12, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23,
+ 23, 24, 25, 26, 26, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35,
+ 36, 37, 37, 38, 39, 40, 41, 41, 42, 43, 44, 45, 45, 46, 47, 48,
+ 49, 50, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 61,
+ 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75,
+ 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90,
+ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
+ 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
+ 139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
+ 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
+ 175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
+ 194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
+ 214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
+ 235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
+ mp_bitcnt_t retval;
+
+ if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
+ {
+ /* In the unlikely case, we use two divisions and a modulo. */
+ retval = bitn / k;
+ bitn %= k;
+ bitn = (bitn << LOGROOT_USED_BITS |
+ vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
+ }
+ else
+ {
+ bitn = (bitn << LOGROOT_USED_BITS |
+ vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
+ retval = bitn >> LOGROOT_USED_BITS;
+ bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
+ }
+ ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
+ *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
+ | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
+ return retval;
+}
+
+/* if approx is non-zero, does not compute the final remainder */
+static mp_size_t
+mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
+ mp_limb_t k, int approx)
+{
+ mp_ptr qp, rp, sp, wp, scratch;
+ mp_size_t qn, rn, sn, wn, nl, bn;
+ mp_limb_t save, save2, cy, uh;
+ mp_bitcnt_t unb; /* number of significant bits of {up,un} */
+ mp_bitcnt_t xnb; /* number of significant bits of the result */
+ mp_bitcnt_t b, kk;
+ mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
+ int ni;
+ int perf_pow;
+ unsigned ulz, snb, c, logk;
+ TMP_DECL;
+
+ /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
+ uh = up[un - 1];
+ count_leading_zeros (ulz, uh);
+ ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
+ unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
+ /* unb is the (truncated) logarithm of the input U in base 2*/
+
+ if (unb < k) /* root is 1 */
+ {
+ rootp[0] = 1;
+ if (remp == NULL)
+ un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
+ else
+ {
+ mpn_sub_1 (remp, up, un, CNST_LIMB (1));
+ un -= (remp [un - 1] == 0); /* There should be at most one zero limb,
+ if we demand u to be normalized */
+ }
+ return un;
+ }
+ /* if (unb - k < k/2 + k/16) // root is 2 */
+
+ if (ulz == GMP_NUMB_BITS)
+ uh = up[un - 2];
+ else
+ uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
+ ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
+
+ xnb = logbased_root (rootp, uh, unb, k);
+ snb = LOGROOT_RETURNED_BITS - 1;
+ /* xnb+1 is the number of bits of the root R */
+ /* snb+1 is the number of bits of the current approximation S */
+
+ kk = k * xnb; /* number of truncated bits in the input */
+
+ /* FIXME: Should we skip the next two loops when xnb <= snb ? */
+ for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
+ ;
+ /* logk = ceil(log(k)/log(2)) + 1 */
+
+ /* xnb is the number of remaining bits to determine in the kth root */
+ for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
+ {
+ /* invariant: here we want xnb+1 total bits for the kth root */
+
+ /* if c is the new value of xnb, this means that we'll go from a
+ root of c+1 bits (say s') to a root of xnb+1 bits.
+ It is proved in the book "Modern Computer Arithmetic" by Brent
+ and Zimmermann, Chapter 1, that
+ if s' >= k*beta, then at most one correction is necessary.
+ Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
+ c >= ceil((xnb + log2(k))/2). */
+ if (xnb > logk)
+ xnb = (xnb + logk) / 2;
+ else
+ --xnb; /* add just one bit at a time */
+ }
+
+ *rootp >>= snb - xnb;
+ kk -= xnb;
+
+ ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
+ /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
+ sizes[i] <= 2 * sizes[i+1].
+ Newton iteration will first compute sizes[ni-1] extra bits,
+ then sizes[ni-2], ..., then sizes[0] = b. */
+
+ TMP_MARK;
+ /* qp and wp need enough space to store S'^k where S' is an approximate
+ root. Since S' can be as large as S+2, the worst case is when S=2 and
+ S'=4. But then since we know the number of bits of S in advance, S'
+ can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+ So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+ fits in un limbs, the number of extra limbs needed is bounded by
+ ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+ /* THINK: with the use of logbased_root, maybe the constant is
+ 258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+ TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
+ qp, un + EXTRA, /* will contain quotient and remainder
+ of R/(k*S^(k-1)), and S^k */
+ wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+ and temporary for mpn_pow_1 */
+
+ if (remp == NULL)
+ rp = scratch; /* will contain the remainder */
+ else
+ rp = remp;
+ sp = rootp;
+
+ sn = 1; /* Initial approximation has one limb */
+
+ for (b = xnb; ni != 0; --ni)
+ {
+ /* 1: loop invariant:
+ {sp, sn} is the current approximation of the root, which has
+ exactly 1 + sizes[ni] bits.
+ {rp, rn} is the current remainder
+ {wp, wn} = {sp, sn}^(k-1)
+ kk = number of truncated bits of the input
+ */
+
+ /* Since each iteration treats b bits from the root and thus k*b bits
+ from the input, and we already considered b bits from the input,
+ we now have to take another (k-1)*b bits from the input. */
+ kk -= (k - 1) * b; /* remaining input bits */
+ /* {rp, rn} = floor({up, un} / 2^kk) */
+ rn = un - kk / GMP_NUMB_BITS;
+ MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
+ rn -= rp[rn - 1] == 0;
+
+ /* 9: current buffers: {sp,sn}, {rp,rn} */
+
+ for (c = 0;; c++)
+ {
+ /* Compute S^k in {qp,qn}. */
+ /* W <- S^(k-1) for the next iteration,
+ and S^k = W * S. */
+ wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
+ mpn_mul (qp, wp, wn, sp, sn);
+ qn = wn + sn;
+ qn -= qp[qn - 1] == 0;
+
+ perf_pow = 1;
+ /* if S^k > floor(U/2^kk), the root approximation was too large */
+ if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
+ MPN_DECR_U (sp, sn, 1);
+ else
+ break;
+ }
+
+ /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
+
+ /* sometimes two corrections are needed with logbased_root*/
+ ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
+ ASSERT_ALWAYS (rn >= qn);
+
+ b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
+ next iteration */
+ bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
+
+ kk = kk - b;
+ /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
+ nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
+ /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
+ - floor(kk / GMP_NUMB_BITS)
+ <= 1 + (kk + b - 1) / GMP_NUMB_BITS
+ - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
+ = 2 + (b - 2) / GMP_NUMB_BITS
+ thus since nl is an integer:
+ nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
+
+ /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+ /* R = R - Q = floor(U/2^kk) - S^k */
+ if (perf_pow != 0)
+ {
+ mpn_sub (rp, rp, rn, qp, qn);
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+ /* first multiply the remainder by 2^b */
+ MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
+ rn = rn + bn;
+ if (cy != 0)
+ {
+ rp[rn] = cy;
+ rn++;
+ }
+
+ save = rp[bn];
+ /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
+ if (nl - 1 > bn)
+ save2 = rp[bn + 1];
+ }
+ else
+ {
+ rn = bn;
+ save2 = save = 0;
+ }
+ /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+ /* Now insert bits [kk,kk+b-1] from the input U */
+ MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
+ /* set to zero high bits of rp[bn] */
+ rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
+ /* restore corresponding bits */
+ rp[bn] |= save;
+ if (nl - 1 > bn)
+ rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
+ they start by bit 0 in rp[0], so they use
+ at most ceil(b/GMP_NUMB_BITS) limbs */
+ /* FIXME: Should we normalise {rp,rn} here ?*/
+
+ /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+ /* compute {wp, wn} = k * {sp, sn}^(k-1) */
+ cy = mpn_mul_1 (wp, wp, wn, k);
+ wp[wn] = cy;
+ wn += cy != 0;
+
+ /* 6: current buffers: {sp,sn}, {qp,qn} */
+
+ /* multiply the root approximation by 2^b */
+ MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
+ sn = sn + b / GMP_NUMB_BITS;
+ if (cy != 0)
+ {
+ sp[sn] = cy;
+ sn++;
+ }
+
+ save = sp[b / GMP_NUMB_BITS];
+
+ /* Number of limbs used by b bits, when least significant bit is
+ aligned to least limb */
+ bn = (b - 1) / GMP_NUMB_BITS + 1;
+
+ /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+ /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
+ if (UNLIKELY (rn < wn))
+ {
+ MPN_FILL (sp, bn, 0);
+ }
+ else
+ {
+ qn = rn - wn; /* expected quotient size */
+ if (qn <= bn) { /* Divide only if result is not too big. */
+ mpn_div_q (qp, rp, rn, wp, wn, scratch);
+ qn += qp[qn] != 0;
+ }
+
+ /* 5: current buffers: {sp,sn}, {qp,qn}.
+ Note: {rp,rn} is not needed any more since we'll compute it from
+ scratch at the end of the loop.
+ */
+
+ /* the quotient should be smaller than 2^b, since the previous
+ approximation was correctly rounded toward zero */
+ if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
+ qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
+ {
+ for (qn = 1; qn < bn; ++qn)
+ sp[qn - 1] = GMP_NUMB_MAX;
+ sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
+ }
+ else
+ {
+ /* 7: current buffers: {sp,sn}, {qp,qn} */
+
+ /* Combine sB and q to form sB + q. */
+ MPN_COPY (sp, qp, qn);
+ MPN_ZERO (sp + qn, bn - qn);
+ }
+ }
+ sp[b / GMP_NUMB_BITS] |= save;
+
+ /* 8: current buffer: {sp,sn} */
+
+ }
+
+ /* otherwise we have rn > 0, thus the return value is ok */
+ if (!approx || sp[0] <= CNST_LIMB (1))
+ {
+ for (c = 0;; c++)
+ {
+ /* Compute S^k in {qp,qn}. */
+ /* Last iteration: we don't need W anymore. */
+ /* mpn_pow_1 requires that both qp and wp have enough
+ space to store the result {sp,sn}^k + 1 limb */
+ qn = mpn_pow_1 (qp, sp, sn, k, wp);
+
+ perf_pow = 1;
+ if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
+ MPN_DECR_U (sp, sn, 1);
+ else
+ break;
+ };
+
+ /* sometimes two corrections are needed with logbased_root*/
+ ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
+
+ rn = perf_pow != 0;
+ if (rn != 0 && remp != NULL)
+ {
+ mpn_sub (remp, up, un, qp, qn);
+ rn = un;
+ MPN_NORMALIZE_NOT_ZERO (remp, rn);
+ }
+ }
+
+ TMP_FREE;
+ return rn;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/rshift.c b/vendor/gmp-6.3.0/mpn/generic/rshift.c
new file mode 100644
index 0000000..15d427d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/rshift.c
@@ -0,0 +1,69 @@
+/* mpn_rshift -- Shift right low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and N limbs long) cnt bits to the right
+ and store the n least significant limbs of the result at rp.
+ The bits shifted out to the right are returned.
+
+ Argument constraints:
+ 1. 0 < cnt < GMP_NUMB_BITS.
+ 2. If the result is to be written over the input, rp must be <= up.
+*/
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+ mp_limb_t high_limb, low_limb;
+ unsigned int tnc;
+ mp_size_t i;
+ mp_limb_t retval;
+
+ ASSERT (n >= 1);
+ ASSERT (cnt >= 1);
+ ASSERT (cnt < GMP_NUMB_BITS);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+ tnc = GMP_NUMB_BITS - cnt;
+ high_limb = *up++;
+ retval = (high_limb << tnc) & GMP_NUMB_MASK;
+ low_limb = high_limb >> cnt;
+
+ for (i = n - 1; i != 0; i--)
+ {
+ high_limb = *up++;
+ *rp++ = low_limb | ((high_limb << tnc) & GMP_NUMB_MASK);
+ low_limb = high_limb >> cnt;
+ }
+ *rp = low_limb;
+
+ return retval;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_q.c b/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_q.c
new file mode 100644
index 0000000..850e593
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_q.c
@@ -0,0 +1,96 @@
+/* mpn_sbpi1_bdiv_q -- schoolbook Hensel division with precomputed inverse,
+ returning quotient only.
+
+ Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+ IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* Computes Q = - U / D mod B^un, destroys U.
+
+ D must be odd. dinv is (-D)^-1 mod B.
+
+*/
+
+void
+mpn_sbpi1_bdiv_q (mp_ptr qp,
+ mp_ptr up, mp_size_t un,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv)
+{
+ mp_size_t i;
+ mp_limb_t q;
+
+ ASSERT (dn > 0);
+ ASSERT (un >= dn);
+ ASSERT ((dp[0] & 1) != 0);
+ ASSERT (-(dp[0] * dinv) == 1);
+ ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
+
+ if (un > dn)
+ {
+ mp_limb_t cy, hi;
+ for (i = un - dn - 1, cy = 0; i > 0; i--)
+ {
+ q = dinv * up[0];
+ hi = mpn_addmul_1 (up, dp, dn, q);
+
+ ASSERT (up[0] == 0);
+ *qp++ = q;
+ hi += cy;
+ cy = hi < cy;
+ hi += up[dn];
+ cy += hi < up[dn];
+ up[dn] = hi;
+ up++;
+ }
+ q = dinv * up[0];
+ hi = cy + mpn_addmul_1 (up, dp, dn, q);
+ ASSERT (up[0] == 0);
+ *qp++ = q;
+ up[dn] += hi;
+ up++;
+ }
+ for (i = dn; i > 1; i--)
+ {
+ mp_limb_t q = dinv * up[0];
+ mpn_addmul_1 (up, dp, i, q);
+ ASSERT (up[0] == 0);
+ *qp++ = q;
+ up++;
+ }
+
+ /* Final limb */
+ *qp = dinv * up[0];
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_qr.c b/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_qr.c
new file mode 100644
index 0000000..6146c45
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_qr.c
@@ -0,0 +1,82 @@
+/* mpn_sbpi1_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
+ returning quotient and remainder.
+
+ Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+ IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = un - dn.
+ Output:
+
+ Q = -U * D^{-1} mod B^qn,
+
+ R = (U + Q * D) * B^(-qn)
+
+ Stores the dn least significant limbs of R at {up + un - dn, dn},
+ and returns the carry from the addition N + Q*D.
+
+ D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_qr (mp_ptr qp,
+ mp_ptr up, mp_size_t un,
+ mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+ mp_size_t i;
+ mp_limb_t cy;
+
+ ASSERT (dn > 0);
+ ASSERT (un > dn);
+ ASSERT ((dp[0] & 1) != 0);
+ ASSERT (-(dp[0] * dinv) == 1);
+ ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
+
+ for (i = un - dn, cy = 0; i != 0; i--)
+ {
+ mp_limb_t q = dinv * up[0];
+ mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
+ *qp++ = q;
+
+ hi += cy;
+ cy = hi < cy;
+ hi += up[dn];
+ cy += hi < up[dn];
+ up[dn] = hi;
+ up++;
+ }
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_r.c b/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_r.c
new file mode 100644
index 0000000..a609951
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sbpi1_bdiv_r.c
@@ -0,0 +1,79 @@
+/* mpn_sbpi1_bdiv_r -- schoolbook Hensel division with precomputed inverse,
+ returning remainder.
+
+ Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+ IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = un - dn.
+ Output:
+
+ Q = -U * D^{-1} mod B^qn,
+
+ R = (U + Q * D) * B^(-qn)
+
+ Stores the dn least significant limbs of R at {up + un - dn, dn},
+ and returns the carry from the addition N + Q*D.
+
+ D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_r (mp_ptr up, mp_size_t un,
+ mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+ mp_size_t i;
+ mp_limb_t cy;
+
+ ASSERT (dn > 0);
+ ASSERT (un > dn);
+ ASSERT ((dp[0] & 1) != 0);
+ ASSERT (-(dp[0] * dinv) == 1);
+
+ for (i = un - dn, cy = 0; i != 0; i--)
+ {
+ mp_limb_t q = dinv * up[0];
+ mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
+
+ hi += cy;
+ cy = hi < cy;
+ hi += up[dn];
+ cy += hi < up[dn];
+ up[dn] = hi;
+ up++;
+ }
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sbpi1_div_q.c b/vendor/gmp-6.3.0/mpn/generic/sbpi1_div_q.c
new file mode 100644
index 0000000..a9975eb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sbpi1_div_q.c
@@ -0,0 +1,302 @@
+/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
+ division algorithm.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_q (mp_ptr qp,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv)
+{
+ mp_limb_t qh;
+ mp_size_t qn, i;
+ mp_limb_t n1, n0;
+ mp_limb_t d1, d0;
+ mp_limb_t cy, cy1;
+ mp_limb_t q;
+ mp_limb_t flag;
+
+ mp_size_t dn_orig = dn;
+ mp_srcptr dp_orig = dp;
+ mp_ptr np_orig = np;
+
+ ASSERT (dn > 2);
+ ASSERT (nn >= dn);
+ ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+ np += nn;
+
+ qn = nn - dn;
+ if (qn + 1 < dn)
+ {
+ dp += dn - (qn + 1);
+ dn = qn + 1;
+ }
+
+ qh = mpn_cmp (np - dn, dp, dn) >= 0;
+ if (qh != 0)
+ mpn_sub_n (np - dn, np - dn, dp, dn);
+
+ qp += qn;
+
+ dn -= 2; /* offset dn by 2 for main division loops,
+ saving two iterations in mpn_submul_1. */
+ d1 = dp[dn + 1];
+ d0 = dp[dn + 0];
+
+ np -= 2;
+
+ n1 = np[1];
+
+ for (i = qn - (dn + 2); i >= 0; i--)
+ {
+ np--;
+ if (UNLIKELY (n1 == d1) && np[1] == d0)
+ {
+ q = GMP_NUMB_MASK;
+ mpn_submul_1 (np - dn, dp, dn + 2, q);
+ n1 = np[1]; /* update n1, last loop's value will now be invalid */
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 -= cy1;
+ np[0] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+ q--;
+ }
+ }
+
+ *--qp = q;
+ }
+
+ flag = ~CNST_LIMB(0);
+
+ if (dn >= 0)
+ {
+ for (i = dn; i > 0; i--)
+ {
+ np--;
+ if (UNLIKELY (n1 >= (d1 & flag)))
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+ if (UNLIKELY (n1 != cy))
+ {
+ if (n1 < (cy & flag))
+ {
+ q--;
+ mpn_add_n (np - dn, np - dn, dp, dn + 2);
+ }
+ else
+ flag = 0;
+ }
+ n1 = np[1];
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 -= cy1;
+ np[0] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+ q--;
+ }
+ }
+
+ *--qp = q;
+
+ /* Truncate operands. */
+ dn--;
+ dp++;
+ }
+
+ np--;
+ if (UNLIKELY (n1 >= (d1 & flag)))
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np, dp, 2, q);
+
+ if (UNLIKELY (n1 != cy))
+ {
+ if (n1 < (cy & flag))
+ {
+ q--;
+ add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+ }
+ else
+ flag = 0;
+ }
+ n1 = np[1];
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ np[0] = n0;
+ np[1] = n1;
+ }
+
+ *--qp = q;
+ }
+ ASSERT_ALWAYS (np[1] == n1);
+ np += 2;
+
+
+ dn = dn_orig;
+ if (UNLIKELY (n1 < (dn & flag)))
+ {
+ mp_limb_t q, x;
+
+ /* The quotient may be too large if the remainder is small. Recompute
+ for above ignored operand parts, until the remainder spills.
+
+ FIXME: The quality of this code isn't the same as the code above.
+ 1. We don't compute things in an optimal order, high-to-low, in order
+ to terminate as quickly as possible.
+ 2. We mess with pointers and sizes, adding and subtracting and
+ adjusting to get things right. It surely could be streamlined.
+ 3. The only termination criteria are that we determine that the
+ quotient needs to be adjusted, or that we have recomputed
+ everything. We should stop when the remainder is so large
+ that no additional subtracting could make it spill.
+ 4. If nothing else, we should not do two loops of submul_1 over the
+ data, instead handle both the triangularization and chopping at
+ once. */
+
+ x = n1;
+
+ if (dn > 2)
+ {
+ /* Compensate for triangularization. */
+ mp_limb_t y;
+
+ dp = dp_orig;
+ if (qn + 1 < dn)
+ {
+ dp += dn - (qn + 1);
+ dn = qn + 1;
+ }
+
+ y = np[-2];
+
+ for (i = dn - 3; i >= 0; i--)
+ {
+ q = qp[i];
+ cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
+
+ if (y < cy)
+ {
+ if (x == 0)
+ {
+ cy = mpn_sub_1 (qp, qp, qn, 1);
+ ASSERT_ALWAYS (cy == 0);
+ return qh - cy;
+ }
+ x--;
+ }
+ y -= cy;
+ }
+ np[-2] = y;
+ }
+
+ dn = dn_orig;
+ if (qn + 1 < dn)
+ {
+ /* Compensate for ignored dividend and divisor tails. */
+
+ dp = dp_orig;
+ np = np_orig;
+
+ if (qh != 0)
+ {
+ cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
+ if (cy != 0)
+ {
+ if (x == 0)
+ {
+ if (qn != 0)
+ cy = mpn_sub_1 (qp, qp, qn, 1);
+ return qh - cy;
+ }
+ x--;
+ }
+ }
+
+ if (qn == 0)
+ return qh;
+
+ for (i = dn - qn - 2; i >= 0; i--)
+ {
+ cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
+ cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
+ if (cy != 0)
+ {
+ if (x == 0)
+ {
+ cy = mpn_sub_1 (qp, qp, qn, 1);
+ return qh;
+ }
+ x--;
+ }
+ }
+ }
+ }
+
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sbpi1_div_qr.c b/vendor/gmp-6.3.0/mpn/generic/sbpi1_div_qr.c
new file mode 100644
index 0000000..7330a77
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sbpi1_div_qr.c
@@ -0,0 +1,109 @@
+/* mpn_sbpi1_div_qr -- Schoolbook division using the Möller-Granlund 3/2
+ division algorithm.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_qr (mp_ptr qp,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv)
+{
+ mp_limb_t qh;
+ mp_size_t i;
+ mp_limb_t n1, n0;
+ mp_limb_t d1, d0;
+ mp_limb_t cy, cy1;
+ mp_limb_t q;
+
+ ASSERT (dn > 2);
+ ASSERT (nn >= dn);
+ ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+ np += nn;
+
+ qh = mpn_cmp (np - dn, dp, dn) >= 0;
+ if (qh != 0)
+ mpn_sub_n (np - dn, np - dn, dp, dn);
+
+ qp += nn - dn;
+
+ dn -= 2; /* offset dn by 2 for main division loops,
+ saving two iterations in mpn_submul_1. */
+ d1 = dp[dn + 1];
+ d0 = dp[dn + 0];
+
+ np -= 2;
+
+ n1 = np[1];
+
+ for (i = nn - (dn + 2); i > 0; i--)
+ {
+ np--;
+ if (UNLIKELY (n1 == d1) && np[1] == d0)
+ {
+ q = GMP_NUMB_MASK;
+ mpn_submul_1 (np - dn, dp, dn + 2, q);
+ n1 = np[1]; /* update n1, last loop's value will now be invalid */
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 = (n1 - cy1) & GMP_NUMB_MASK;
+ np[0] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+ q--;
+ }
+ }
+
+ *--qp = q;
+ }
+ np[1] = n1;
+
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sbpi1_divappr_q.c b/vendor/gmp-6.3.0/mpn/generic/sbpi1_divappr_q.c
new file mode 100644
index 0000000..ef7ca26
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sbpi1_divappr_q.c
@@ -0,0 +1,198 @@
+/* mpn_sbpi1_divappr_q -- Schoolbook division using the Möller-Granlund 3/2
+ division algorithm, returning approximate quotient. The quotient returned
+ is either correct, or one too large.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_divappr_q (mp_ptr qp,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv)
+{
+ mp_limb_t qh;
+ mp_size_t qn, i;
+ mp_limb_t n1, n0;
+ mp_limb_t d1, d0;
+ mp_limb_t cy, cy1;
+ mp_limb_t q;
+ mp_limb_t flag;
+
+ ASSERT (dn > 2);
+ ASSERT (nn >= dn);
+ ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+ np += nn;
+
+ qn = nn - dn;
+ if (qn + 1 < dn)
+ {
+ dp += dn - (qn + 1);
+ dn = qn + 1;
+ }
+
+ qh = mpn_cmp (np - dn, dp, dn) >= 0;
+ if (qh != 0)
+ mpn_sub_n (np - dn, np - dn, dp, dn);
+
+ qp += qn;
+
+ dn -= 2; /* offset dn by 2 for main division loops,
+ saving two iterations in mpn_submul_1. */
+ d1 = dp[dn + 1];
+ d0 = dp[dn + 0];
+
+ np -= 2;
+
+ n1 = np[1];
+
+ for (i = qn - (dn + 2); i >= 0; i--)
+ {
+ np--;
+ if (UNLIKELY (n1 == d1) && np[1] == d0)
+ {
+ q = GMP_NUMB_MASK;
+ mpn_submul_1 (np - dn, dp, dn + 2, q);
+ n1 = np[1]; /* update n1, last loop's value will now be invalid */
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 -= cy1;
+ np[0] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+ q--;
+ }
+ }
+
+ *--qp = q;
+ }
+
+ flag = ~CNST_LIMB(0);
+
+ if (dn >= 0)
+ {
+ for (i = dn; i > 0; i--)
+ {
+ np--;
+ if (UNLIKELY (n1 >= (d1 & flag)))
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+ if (UNLIKELY (n1 != cy))
+ {
+ if (n1 < (cy & flag))
+ {
+ q--;
+ mpn_add_n (np - dn, np - dn, dp, dn + 2);
+ }
+ else
+ flag = 0;
+ }
+ n1 = np[1];
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 -= cy1;
+ np[0] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+ q--;
+ }
+ }
+
+ *--qp = q;
+
+ /* Truncate operands. */
+ dn--;
+ dp++;
+ }
+
+ np--;
+ if (UNLIKELY (n1 >= (d1 & flag)))
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np, dp, 2, q);
+
+ if (UNLIKELY (n1 != cy))
+ {
+ if (n1 < (cy & flag))
+ {
+ q--;
+ add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+ }
+ else
+ flag = 0;
+ }
+ n1 = np[1];
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ np[1] = n1;
+ np[0] = n0;
+ }
+
+ *--qp = q;
+ }
+
+ ASSERT_ALWAYS (np[1] == n1);
+
+ return qh;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/scan0.c b/vendor/gmp-6.3.0/mpn/generic/scan0.c
new file mode 100644
index 0000000..d71832e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/scan0.c
@@ -0,0 +1,59 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+ 1. U must sooner or later have a limb with a clear bit.
+ */
+
+mp_bitcnt_t
+mpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+ mp_size_t starting_word;
+ mp_limb_t alimb;
+ int cnt;
+ mp_srcptr p;
+
+ /* Start at the word implied by STARTING_BIT. */
+ starting_word = starting_bit / GMP_NUMB_BITS;
+ p = up + starting_word;
+ alimb = *p++ ^ GMP_NUMB_MASK;
+
+ /* Mask off any bits before STARTING_BIT in the first limb. */
+ alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+ while (alimb == 0)
+ alimb = *p++ ^ GMP_NUMB_MASK;
+
+ count_trailing_zeros (cnt, alimb);
+ return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/scan1.c b/vendor/gmp-6.3.0/mpn/generic/scan1.c
new file mode 100644
index 0000000..09e8060
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/scan1.c
@@ -0,0 +1,59 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+ 1. U must sooner or later have a limb != 0.
+ */
+
+mp_bitcnt_t
+mpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+ mp_size_t starting_word;
+ mp_limb_t alimb;
+ int cnt;
+ mp_srcptr p;
+
+ /* Start at the word implied by STARTING_BIT. */
+ starting_word = starting_bit / GMP_NUMB_BITS;
+ p = up + starting_word;
+ alimb = *p++;
+
+ /* Mask off any bits before STARTING_BIT in the first limb. */
+ alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+ while (alimb == 0)
+ alimb = *p++;
+
+ count_trailing_zeros (cnt, alimb);
+ return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_aors_1.c b/vendor/gmp-6.3.0/mpn/generic/sec_aors_1.c
new file mode 100644
index 0000000..6480fa1
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_aors_1.c
@@ -0,0 +1,59 @@
+/* mpn_sec_add_1, mpn_sec_sub_1
+
+ Contributed to the GNU project by Niels Möller
+
+Copyright 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if OPERATION_sec_add_1
+#define FNAME mpn_sec_add_1
+#define FNAME_itch mpn_sec_add_1_itch
+#define OP_N mpn_add_n
+#endif
+#if OPERATION_sec_sub_1
+#define FNAME mpn_sec_sub_1
+#define FNAME_itch mpn_sec_sub_1_itch
+#define OP_N mpn_sub_n
+#endif
+
+/* It's annoying to that we need scratch space */
+mp_size_t
+FNAME_itch (mp_size_t n)
+{
+ return n;
+}
+
+mp_limb_t
+FNAME (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_ptr scratch)
+{
+ scratch[0] = b;
+ MPN_ZERO (scratch + 1, n-1);
+ return OP_N (rp, ap, scratch, n);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_div.c b/vendor/gmp-6.3.0/mpn/generic/sec_div.c
new file mode 100644
index 0000000..1f08649
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_div.c
@@ -0,0 +1,131 @@
+/* mpn_sec_div_qr, mpn_sec_div_r -- Compute Q = floor(U / V), U = U mod V.
+ Side-channel silent under the assumption that the used instructions are
+ side-channel silent.
+
+ Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2011-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if OPERATION_sec_div_qr
+#define FNAME mpn_sec_div_qr
+#define FNAME_itch mpn_sec_div_qr_itch
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sec_div_r
+#define FNAME mpn_sec_div_r
+#define FNAME_itch mpn_sec_div_r_itch
+#define Q(q)
+#define RETTYPE void
+#endif
+
+mp_size_t
+FNAME_itch (mp_size_t nn, mp_size_t dn)
+{
+#if OPERATION_sec_div_qr
+/* Needs (nn + dn + 1) + mpn_sec_pi1_div_qr's needs of (2nn' - dn + 1) for a
+ total of 3nn + 4 limbs at tp. Note that mpn_sec_pi1_div_qr's nn is one
+ greater than ours, therefore +4 and not just +2. */
+ return 3 * nn + 4;
+#endif
+#if OPERATION_sec_div_r
+/* Needs (nn + dn + 1) + mpn_sec_pi1_div_r's needs of (dn + 1) for a total of
+ nn + 2dn + 2 limbs at tp. */
+ return nn + 2 * dn + 2;
+#endif
+}
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr tp)
+{
+ mp_limb_t d1, d0;
+ unsigned int cnt;
+ mp_limb_t inv32;
+
+ ASSERT (dn >= 1);
+ ASSERT (nn >= dn);
+ ASSERT (dp[dn - 1] != 0);
+
+ d1 = dp[dn - 1];
+ count_leading_zeros (cnt, d1);
+
+ if (cnt != 0)
+ {
+ mp_limb_t qh, cy;
+ mp_ptr np2, dp2;
+ dp2 = tp; /* dn limbs */
+ mpn_lshift (dp2, dp, dn, cnt);
+
+ np2 = tp + dn; /* (nn + 1) limbs */
+ cy = mpn_lshift (np2, np, nn, cnt);
+ np2[nn++] = cy;
+
+ d0 = dp2[dn - 1];
+ d0 += (~d0 != 0);
+ invert_limb (inv32, d0);
+
+ /* We add nn + dn to tp here, not nn + 1 + dn, as expected. This is
+ since nn here will have been incremented. */
+#if OPERATION_sec_div_qr
+ qh = mpn_sec_pi1_div_qr (np2 + dn, np2, nn, dp2, dn, inv32, tp + nn + dn);
+ ASSERT (qh == 0); /* FIXME: this indicates inefficiency! */
+ MPN_COPY (qp, np2 + dn, nn - dn - 1);
+ qh = np2[nn - 1];
+#else
+ mpn_sec_pi1_div_r (np2, nn, dp2, dn, inv32, tp + nn + dn);
+#endif
+
+ mpn_rshift (np, np2, dn, cnt);
+
+#if OPERATION_sec_div_qr
+ return qh;
+#endif
+ }
+ else
+ {
+ /* FIXME: Consider copying np => np2 here, adding a 0-limb at the top.
+ That would simplify the underlying pi1 function, since then it could
+ assume nn > dn. */
+ d0 = dp[dn - 1];
+ d0 += (~d0 != 0);
+ invert_limb (inv32, d0);
+
+#if OPERATION_sec_div_qr
+ return mpn_sec_pi1_div_qr (qp, np, nn, dp, dn, inv32, tp);
+#else
+ mpn_sec_pi1_div_r (np, nn, dp, dn, inv32, tp);
+#endif
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_invert.c b/vendor/gmp-6.3.0/mpn/generic/sec_invert.c
new file mode 100644
index 0000000..07665d1
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_invert.c
@@ -0,0 +1,177 @@
+/* mpn_sec_invert
+
+ Contributed to the GNU project by Niels Möller
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if 0
+/* Currently unused. Should be resurrected once mpn_cnd_neg is
+ advertised. */
+static mp_size_t
+mpn_cnd_neg_itch (mp_size_t n)
+{
+ return n;
+}
+#endif
+
+/* FIXME: Ought to return carry */
+static void
+mpn_cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n,
+ mp_ptr scratch)
+{
+ mpn_lshift (scratch, ap, n, 1);
+ mpn_cnd_sub_n (cnd, rp, ap, scratch, n);
+}
+
+static int
+mpn_sec_eq_ui (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+ mp_limb_t d;
+ ASSERT (n > 0);
+
+ d = ap[0] ^ b;
+
+ while (--n > 0)
+ d |= ap[n];
+
+ return d == 0;
+}
+
+mp_size_t
+mpn_sec_invert_itch (mp_size_t n)
+{
+ return 4*n;
+}
+
+/* Compute V <-- A^{-1} (mod M), in data-independent time. M must be
+ odd. Returns 1 on success, and 0 on failure (i.e., if gcd (A, m) !=
+ 1). Inputs and outputs of size n, and no overlap allowed. The {ap,
+ n} area is destroyed. For arbitrary inputs, bit_size should be
+ 2*n*GMP_NUMB_BITS, but if A or M are known to be smaller, e.g., if
+ M = 2^521 - 1 and A < M, bit_size can be any bound on the sum of
+ the bit sizes of A and M. */
+int
+mpn_sec_invert (mp_ptr vp, mp_ptr ap, mp_srcptr mp,
+ mp_size_t n, mp_bitcnt_t bit_size,
+ mp_ptr scratch)
+{
+ ASSERT (n > 0);
+ ASSERT (bit_size > 0);
+ ASSERT (mp[0] & 1);
+ ASSERT (! MPN_OVERLAP_P (ap, n, vp, n));
+#define bp (scratch + n)
+#define up (scratch + 2*n)
+#define m1hp (scratch + 3*n)
+
+ /* Maintain
+
+ a = u * orig_a (mod m)
+ b = v * orig_a (mod m)
+
+ and b odd at all times. Initially,
+
+ a = a_orig, u = 1
+ b = m, v = 0
+ */
+
+
+ up[0] = 1;
+ mpn_zero (up+1, n - 1);
+ mpn_copyi (bp, mp, n);
+ mpn_zero (vp, n);
+
+ ASSERT_CARRY (mpn_rshift (m1hp, mp, n, 1));
+ ASSERT_NOCARRY (mpn_sec_add_1 (m1hp, m1hp, n, 1, scratch));
+
+ while (bit_size-- > 0)
+ {
+ mp_limb_t odd, swap, cy;
+
+ /* Always maintain b odd. The logic of the iteration is as
+ follows. For a, b:
+
+ odd = a & 1
+ a -= odd * b
+ if (underflow from a-b)
+ {
+ b += a, assigns old a
+ a = B^n-a
+ }
+
+ a /= 2
+
+ For u, v:
+
+ if (underflow from a - b)
+ swap u, v
+ u -= odd * v
+ if (underflow from u - v)
+ u += m
+
+ u /= 2
+ if (a one bit was shifted out)
+ u += (m+1)/2
+
+ As long as a > 0, the quantity
+
+ (bitsize of a) + (bitsize of b)
+
+ is reduced by at least one bit per iteration, hence after (bit_size of
+ orig_a) + (bit_size of m) - 1 iterations we surely have a = 0. Then b
+ = gcd(orig_a, m) and if b = 1 then also v = orig_a^{-1} (mod m).
+ */
+
+ ASSERT (bp[0] & 1);
+ odd = ap[0] & 1;
+
+ swap = mpn_cnd_sub_n (odd, ap, ap, bp, n);
+ mpn_cnd_add_n (swap, bp, bp, ap, n);
+ mpn_cnd_neg (swap, ap, ap, n, scratch);
+
+ mpn_cnd_swap (swap, up, vp, n);
+ cy = mpn_cnd_sub_n (odd, up, up, vp, n);
+ cy -= mpn_cnd_add_n (cy, up, up, mp, n);
+ ASSERT (cy == 0);
+
+ cy = mpn_rshift (ap, ap, n, 1);
+ ASSERT (cy == 0);
+ cy = mpn_rshift (up, up, n, 1);
+ cy = mpn_cnd_add_n (cy, up, up, m1hp, n);
+ ASSERT (cy == 0);
+ }
+ /* Should be all zeros, but check only extreme limbs */
+ ASSERT ( (ap[0] | ap[n-1]) == 0);
+ /* Check if indeed gcd == 1. */
+ return mpn_sec_eq_ui (bp, n, 1);
+#undef bp
+#undef up
+#undef m1hp
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_mul.c b/vendor/gmp-6.3.0/mpn/generic/sec_mul.c
new file mode 100644
index 0000000..4bbfa61
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_mul.c
@@ -0,0 +1,48 @@
+/* mpn_sec_mul.
+
+ Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_sec_mul (mp_ptr rp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr tp)
+{
+ mpn_mul_basecase (rp, ap, an, bp, bn);
+}
+
+mp_size_t
+mpn_sec_mul_itch (mp_size_t an, mp_size_t bn)
+{
+ return 0;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_pi1_div.c b/vendor/gmp-6.3.0/mpn/generic/sec_pi1_div.c
new file mode 100644
index 0000000..29d01e7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_pi1_div.c
@@ -0,0 +1,172 @@
+/* mpn_sec_pi1_div_qr, mpn_sec_pi1_div_r -- Compute Q = floor(U / V), U = U
+ mod V. Side-channel silent under the assumption that the used instructions
+ are side-channel silent.
+
+ Contributed to the GNU project by Torbjörn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011-2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This side-channel silent division algorithm reduces the partial remainder by
+ GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
+ division algorithm. We actually do not insist on reducing by exactly
+ GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
+ too large (B is the limb base, D is the divisor, and i is the induction
+ variable); the subsequent step will handle the extra partial remainder bits.
+
+ With that partial remainder reduction, each step generates a quotient "half
+ limb". The outer loop generates two quotient half limbs, an upper (q1h) and
+ a lower (q0h) which are stored sparsely in separate limb arrays. These
+ arrays are added at the end; using separate arrays avoids data-dependent
+ carry propagation which could else pose a side-channel leakage problem.
+
+ The quotient half limbs may be between -3 to 0 from the accurate value
+ ("accurate" being the one which corresponds to a reduction to a principal
+ partial remainder). Too small quotient half limbs correspond to too large
+ remainders, which we reduce later, as described above.
+
+ In order to keep quotients from getting too big, corresponding to a negative
+ partial remainder, we use an inverse which is slightly smaller than usually.
+*/
+
+#if OPERATION_sec_pi1_div_qr
+/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
+#define FNAME mpn_sec_pi1_div_qr
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sec_pi1_div_r
+/* Needs (dn + 1) limbs at tp. */
+#define FNAME mpn_sec_pi1_div_r
+#define Q(q)
+#define RETTYPE void
+#endif
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv,
+ mp_ptr tp)
+{
+ mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
+ mp_size_t i;
+ mp_ptr hp;
+#if OPERATION_sec_pi1_div_qr
+ mp_limb_t qh;
+ mp_ptr qlp, qhp;
+#endif
+
+ ASSERT (dn >= 1);
+ ASSERT (nn >= dn);
+ ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
+
+ if (nn == dn)
+ {
+ cy = mpn_sub_n (np, np, dp, dn);
+ mpn_cnd_add_n (cy, np, np, dp, dn);
+#if OPERATION_sec_pi1_div_qr
+ return 1 - cy;
+#else
+ return;
+#endif
+ }
+
+ /* Create a divisor copy shifted half a limb. */
+ hp = tp; /* (dn + 1) limbs */
+ hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+
+#if OPERATION_sec_pi1_div_qr
+ qlp = tp + (dn + 1); /* (nn - dn) limbs */
+ qhp = tp + (nn + 1); /* (nn - dn) limbs */
+#endif
+
+ np += nn - dn;
+ nh = 0;
+
+ for (i = nn - dn - 1; i >= 0; i--)
+ {
+ np--;
+
+ nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
+ umul_ppmm (q1h, dummy, nh, dinv);
+ q1h += nh;
+#if OPERATION_sec_pi1_div_qr
+ qhp[i] = q1h;
+#endif
+ mpn_submul_1 (np, hp, dn + 1, q1h);
+
+ nh = np[dn];
+ umul_ppmm (q0h, dummy, nh, dinv);
+ q0h += nh;
+#if OPERATION_sec_pi1_div_qr
+ qlp[i] = q0h;
+#endif
+ nh -= mpn_submul_1 (np, dp, dn, q0h);
+ }
+
+ /* 1st adjustment depends on extra high remainder limb. */
+ cnd = nh != 0; /* FIXME: cmp-to-int */
+#if OPERATION_sec_pi1_div_qr
+ qlp[0] += cnd;
+#endif
+ nh -= mpn_cnd_sub_n (cnd, np, np, dp, dn);
+
+ /* 2nd adjustment depends on remainder/divisor comparison as well as whether
+ extra remainder limb was nullified by previous subtract. */
+ cy = mpn_sub_n (np, np, dp, dn);
+ cy = cy - nh;
+#if OPERATION_sec_pi1_div_qr
+ qlp[0] += 1 - cy;
+#endif
+ mpn_cnd_add_n (cy, np, np, dp, dn);
+
+ /* 3rd adjustment depends on remainder/divisor comparison. */
+ cy = mpn_sub_n (np, np, dp, dn);
+#if OPERATION_sec_pi1_div_qr
+ qlp[0] += 1 - cy;
+#endif
+ mpn_cnd_add_n (cy, np, np, dp, dn);
+
+#if OPERATION_sec_pi1_div_qr
+ /* Combine quotient halves into final quotient. */
+ qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+ qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+
+ return qh;
+#else
+ return;
+#endif
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_powm.c b/vendor/gmp-6.3.0/mpn/generic/sec_powm.c
new file mode 100644
index 0000000..bba11cf
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_powm.c
@@ -0,0 +1,430 @@
+/* mpn_sec_powm -- Compute R = U^E mod M. Secure variant, side-channel silent
+ under the assumption that the multiply instruction is side channel silent.
+
+ Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2007-2009, 2011-2014, 2018-2019, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/*
+ BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+ 1. T <- (B^n * U) mod M; convert to REDC form
+
+ 2. Compute table U^0, U^1, U^2... of floor(log(E))-dependent size
+
+ 3. While there are more bits in E
+ W <- power left-to-right base-k
+
+ The article "Defeating modexp side-channel attacks with data-independent
+ execution traces", https://gmplib.org/~tege/modexp-silent.pdf, has details.
+
+
+ TODO:
+
+ * Make getbits a macro, thereby allowing it to update the index operand.
+ That will simplify the code using getbits. (Perhaps make getbits' sibling
+ getbit then have similar form, for symmetry.)
+
+ * Choose window size without looping. (Superoptimize or think(tm).)
+
+ * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+ redc_1 and redc_n. On such systems, we will switch to redc_2 causing
+ slowdown.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#undef MPN_REDC_1_SEC
+#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \
+ mpn_cnd_sub_n (cy, rp, up + n, mp, n); \
+ } while (0)
+#else
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_1 (rp, up, mp, n, invm); \
+ mpn_cnd_sub_n (cy, rp, rp, mp, n); \
+ } while (0)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_2 (rp, up, mp, n, mip); \
+ mpn_cnd_sub_n (cy, rp, rp, mp, n); \
+ } while (0)
+#else
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip) /* empty */
+#undef REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD MP_SIZE_T_MAX
+#endif
+
+/* Define our own mpn squaring function. We do this since we cannot use a
+ native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
+ SQR_TOOM2_THRESHOLD. This is so because of fixed size stack allocations
+ made inside mpn_sqr_basecase. */
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD. */
+#define SQR_BASECASE_LIM SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here. If it is set for an assembly
+ mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+ file. An assembly mpn_sqr_basecase that does not define it should allow
+ any size. */
+#define SQR_BASECASE_LIM SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+ __gmpn_cpuvec. Perhaps any possible sqr_basecase.asm allow any size, and we
+ limit the use unnecessarily. We cannot tell, so play it safe. FIXME. */
+#define SQR_BASECASE_LIM SQR_TOOM2_THRESHOLD
+#endif
+
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+ size. */
+#define SQR_BASECASE_LIM MP_SIZE_T_MAX
+#endif
+
+#define mpn_local_sqr(rp,up,n) \
+ do { \
+ if (ABOVE_THRESHOLD (n, SQR_BASECASE_THRESHOLD) \
+ && BELOW_THRESHOLD (n, SQR_BASECASE_LIM)) \
+ mpn_sqr_basecase (rp, up, n); \
+ else \
+ mpn_mul_basecase(rp, up, n, up, n); \
+ } while (0)
+
+#define getbit(p,bi) \
+ ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
+
+/* FIXME: Maybe some things would get simpler if all callers ensure
+ that bi >= nbits. As far as I understand, with the current code bi
+ < nbits can happen only for the final iteration. */
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+ int nbits_in_r;
+ mp_limb_t r;
+ mp_size_t i;
+
+ if (bi < nbits)
+ {
+ return p[0] & (((mp_limb_t) 1 << bi) - 1);
+ }
+ else
+ {
+ bi -= nbits; /* bit index of low bit to extract */
+ i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
+ bi %= GMP_NUMB_BITS; /* bit index in low word */
+ r = p[i] >> bi; /* extract (low) bits */
+ nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
+ if (nbits_in_r < nbits) /* did we get enough bits? */
+ r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
+ return r & (((mp_limb_t ) 1 << nbits) - 1);
+ }
+}
+
+#ifndef POWM_SEC_TABLE
+#if GMP_NUMB_BITS < 50
+#define POWM_SEC_TABLE 2,33,96,780,2741
+#else
+#define POWM_SEC_TABLE 2,130,524,2578
+#endif
+#endif
+
+#if TUNE_PROGRAM_BUILD
+extern int win_size (mp_bitcnt_t);
+#else
+static inline int
+win_size (mp_bitcnt_t enb)
+{
+ int k;
+ /* Find k, such that x[k-1] < enb <= x[k].
+
+ We require that x[k] >= k, then it follows that enb > x[k-1] >=
+ k-1, which implies k <= enb.
+ */
+ static const mp_bitcnt_t x[] = {POWM_SEC_TABLE,~(mp_bitcnt_t)0};
+ for (k = 0; enb > x[k++]; )
+ ;
+ ASSERT (k <= enb);
+ return k;
+}
+#endif
+
+/* Convert U to REDC form, U_r = B^n * U mod M.
+ Uses scratch space at tp of size 2un + n + 1. */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+ MPN_ZERO (tp, n);
+ MPN_COPY (tp + n, up, un);
+
+ mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
+ MPN_COPY (rp, tp, n);
+}
+
+static mp_limb_t
+sec_binvert_limb (mp_limb_t n)
+{
+ mp_limb_t inv, t;
+ ASSERT ((n & 1) == 1);
+ /* 3 + 2 -> 5 */
+ inv = n + (((n + 1) << 1) & 0x18);
+
+ t = n * inv;
+#if GMP_NUMB_BITS <= 10
+ /* 5 x 2 -> 10 */
+ inv = 2 * inv - inv * t;
+#else /* GMP_NUMB_BITS > 10 */
+ /* 5 x 2 + 2 -> 12 */
+ inv = 2 * inv - inv * t + ((inv<<10)&-(t&(1<<5)));
+#endif /* GMP_NUMB_BITS <= 10 */
+
+ if (GMP_NUMB_BITS > 12)
+ {
+ t = n * inv - 1;
+ if (GMP_NUMB_BITS <= 36)
+ {
+ /* 12 x 3 -> 36 */
+ inv += inv * t * (t - 1);
+ }
+ else /* GMP_NUMB_BITS > 36 */
+ {
+ mp_limb_t t2 = t * t;
+#if GMP_NUMB_BITS <= 60
+ /* 12 x 5 -> 60 */
+ inv += inv * (t2 + 1) * (t2 - t);
+#else /* GMP_NUMB_BITS > 60 */
+ /* 12 x 5 + 4 -> 64 */
+ inv *= (t2 + 1) * (t2 - t) + 1 - ((t<<48)&-(t&(1<<12)));
+
+ /* 64 -> 128 -> 256 -> ... */
+ for (int todo = (GMP_NUMB_BITS - 1) >> 6; todo != 0; todo >>= 1)
+ inv = 2 * inv - inv * inv * n;
+#endif /* GMP_NUMB_BITS <= 60 */
+ }
+ }
+
+ ASSERT ((inv * n & GMP_NUMB_MASK) == 1);
+ return inv & GMP_NUMB_MASK;
+}
+
+/* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
+ where en = ceil (enb / GMP_NUMB_BITS)
+ Requires that {mp, n} is odd (and hence also mp[0] odd).
+ Uses scratch space at tp as defined by mpn_sec_powm_itch. */
+void
+mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+ mp_srcptr ep, mp_bitcnt_t enb,
+ mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+ mp_limb_t ip[2], *mip;
+ int windowsize, this_windowsize;
+ mp_limb_t expbits;
+ mp_ptr pp, this_pp, ps;
+ long i;
+ int cnd;
+
+ ASSERT (enb > 0);
+ ASSERT (n > 0);
+ /* The code works for bn = 0, but the defined scratch space is 2 limbs
+ greater than we supply, when converting 1 to redc form . */
+ ASSERT (bn > 0);
+ ASSERT ((mp[0] & 1) != 0);
+
+ windowsize = win_size (enb);
+
+ mip = ip;
+ mip[0] = sec_binvert_limb (mp[0]);
+ if (ABOVE_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ mp_limb_t t, dummy, mip0 = mip[0];
+
+ umul_ppmm (t, dummy, mip0, mp[0]);
+ ASSERT (dummy == 1);
+ t += mip0 * mp[1]; /* t = (mp * mip0)[1] */
+
+ mip[1] = t * mip0 - 1; /* ~( - t * mip0) */
+ }
+ mip[0] = -mip[0];
+
+ pp = tp;
+ tp += (n << windowsize); /* put tp after power table */
+
+ /* Compute pp[0] table entry */
+ /* scratch: | n | 1 | n+2 | */
+ /* | pp[0] | 1 | redcify | */
+ this_pp = pp;
+ this_pp[n] = 1;
+ redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
+ this_pp += n;
+
+ /* Compute pp[1] table entry. To avoid excessive scratch usage in the
+ degenerate situation where B >> M, we let redcify use scratch space which
+ will later be used by the pp table (element 2 and up). */
+ /* scratch: | n | n | bn + n + 1 | */
+ /* | pp[0] | pp[1] | redcify | */
+ redcify (this_pp, bp, bn, mp, n, this_pp + n);
+
+ /* Precompute powers of b and put them in the temporary area at pp. */
+ /* scratch: | n | n | ... | | 2n | */
+ /* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | product | */
+ ps = pp + n; /* initially B^1 */
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ for (i = (1 << windowsize) - 2; i > 0; i -= 2)
+ {
+ mpn_local_sqr (tp, ps, n);
+ ps += n;
+ this_pp += n;
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+
+ mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+ this_pp += n;
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+ }
+ }
+ else
+ {
+ for (i = (1 << windowsize) - 2; i > 0; i -= 2)
+ {
+ mpn_local_sqr (tp, ps, n);
+ ps += n;
+ this_pp += n;
+ MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+
+ mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+ this_pp += n;
+ MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+ }
+ }
+
+ expbits = getbits (ep, enb, windowsize);
+ ASSERT_ALWAYS (enb >= windowsize);
+ enb -= windowsize;
+
+ mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
+
+ /* Main exponentiation loop. */
+ /* scratch: | n | n | ... | | 3n-4n | */
+ /* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | loop scratch | */
+
+#define INNERLOOP \
+ while (enb != 0) \
+ { \
+ expbits = getbits (ep, enb, windowsize); \
+ this_windowsize = windowsize; \
+ if (enb < windowsize) \
+ { \
+ this_windowsize -= windowsize - enb; \
+ enb = 0; \
+ } \
+ else \
+ enb -= windowsize; \
+ \
+ do \
+ { \
+ mpn_local_sqr (tp, rp, n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ this_windowsize--; \
+ } \
+ while (this_windowsize != 0); \
+ \
+ mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits); \
+ mpn_mul_basecase (tp, rp, n, tp + 2*n, n); \
+ \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ }
+
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+#undef MPN_REDUCE
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_REDUCE
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+
+ MPN_COPY (tp, rp, n);
+ MPN_ZERO (tp + n, n);
+
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+ else
+ MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+
+ cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */
+ mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
+}
+
+mp_size_t
+mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
+{
+ int windowsize;
+ mp_size_t redcify_itch, itch;
+
+ /* FIXME: no more _local/_basecase difference. */
+ /* The top scratch usage will either be when reducing B in the 2nd redcify
+ call, or more typically n*2^windowsize + 3n or 4n, in the main loop. (It
+ is 3n or 4n depending on if we use mpn_local_sqr or a native
+ mpn_sqr_basecase. We assume 4n always for now.) */
+
+ windowsize = win_size (enb);
+
+ /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+ the (bn + n) term is due to redcify's own usage, and the rest is due to
+ mpn_sec_div_r's usage when called from redcify. */
+ redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
+
+ /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+ scratch needs of squaring/multiplication in the exponentiation loop. */
+ itch = (n << windowsize) + (4 * n);
+
+ return MAX (itch, redcify_itch);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_sqr.c b/vendor/gmp-6.3.0/mpn/generic/sec_sqr.c
new file mode 100644
index 0000000..83fc7d9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_sqr.c
@@ -0,0 +1,76 @@
+/* mpn_sec_sqr.
+
+ Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD. */
+#define SQR_BASECASE_LIM SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here. If it is set for an assembly
+ mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+ file. An assembly mpn_sqr_basecase that does not define it should allow
+ any size. */
+#define SQR_BASECASE_LIM SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+ __gmpn_cpuvec. Perhaps any possible sqr_basecase.asm allow any size, and we
+ limit the use unnecessarily. We cannot tell, so play it safe. FIXME. */
+#define SQR_BASECASE_LIM SQR_TOOM2_THRESHOLD
+#endif
+
+void
+mpn_sec_sqr (mp_ptr rp,
+ mp_srcptr ap, mp_size_t an,
+ mp_ptr tp)
+{
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+ size. */
+ mpn_sqr_basecase (rp, ap, an);
+#else
+/* Else use mpn_mul_basecase. */
+ mpn_mul_basecase (rp, ap, an, ap, an);
+#endif
+}
+
+mp_size_t
+mpn_sec_sqr_itch (mp_size_t an)
+{
+ return 0;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sec_tabselect.c b/vendor/gmp-6.3.0/mpn/generic/sec_tabselect.c
new file mode 100644
index 0000000..f50bdac
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sec_tabselect.c
@@ -0,0 +1,134 @@
+/* mpn_sec_tabselect.
+
+Copyright 2007-2009, 2011, 2013, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+#ifndef SEC_TABSELECT_METHOD
+#define SEC_TABSELECT_METHOD 1
+#endif
+
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+ limbs. Store the selected entry at rp. Reads entire table to avoid
+ side-channel information leaks. O(n*nents). */
+
+#if SEC_TABSELECT_METHOD == 1
+void
+mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
+ mp_size_t n, mp_size_t nents, mp_size_t which)
+{
+ mp_size_t k, i;
+ mp_limb_t mask;
+ volatile const mp_limb_t *tp;
+
+ tp = tab;
+
+ /* Place first entry into result area. */
+ for (i = 0; i < n; i++)
+ rp[i] = tp[i];
+
+ /* Conditionally replace entry in result area by entry 1...(nents-1) using
+ masking trickery. */
+ for (k = 1; k < nents; k++)
+ {
+ /* Generate a mask using an expression which all compilers should compile
+ into branch-free code. The convoluted expression is designed to both
+ allow mp_limb_t greater and mp_limb_t smaller than mp_size_t. */
+ mask = -(mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1));
+ tp += n;
+ for (i = 0; i < n; i++)
+ rp[i] = (rp[i] & mask) | (tp[i] & ~mask);
+ }
+}
+#endif
+
+#if SEC_TABSELECT_METHOD == 2
+void
+mpn_sec_tabselect (volatile mp_limb_t * restrict rp,
+ volatile const mp_limb_t * restrict tab,
+ mp_size_t n, mp_size_t nents, mp_size_t which)
+{
+ mp_size_t k, i;
+ mp_limb_t mask, r0, r1, r2, r3;
+ volatile const mp_limb_t * restrict tp;
+
+ if (n & 1)
+ {
+ tp = tab;
+ r0 = 0;
+ for (k = 0; k < nents; k++)
+ {
+ mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+ r0 += tp[0] & mask;
+ tp += n;
+ }
+ rp[0] = r0;
+ rp += 1;
+ tab += 1;
+ }
+
+ if (n & 2)
+ {
+ tp = tab;
+ r0 = r1 = 0;
+ for (k = 0; k < nents; k++)
+ {
+ mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+ r0 += tp[0] & mask;
+ r1 += tp[1] & mask;
+ tp += n;
+ }
+ rp[0] = r0;
+ rp[1] = r1;
+ rp += 2;
+ tab += 2;
+ }
+
+ for (i = 0; i <= n - 4; i += 4)
+ {
+ tp = tab + i;
+ r0 = r1 = r2 = r3 = 0;
+ for (k = 0; k < nents; k++)
+ {
+ mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+ r0 += tp[0] & mask;
+ r1 += tp[1] & mask;
+ r2 += tp[2] & mask;
+ r3 += tp[3] & mask;
+ tp += n;
+ }
+ rp[0] = r0;
+ rp[1] = r1;
+ rp[2] = r2;
+ rp[3] = r3;
+ rp += 4;
+ }
+}
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/set_str.c b/vendor/gmp-6.3.0/mpn/generic/set_str.c
new file mode 100644
index 0000000..2bd584c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/set_str.c
@@ -0,0 +1,290 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) --
+ Convert a STR_LEN long base BASE byte string pointed to by STR to a limb
+ vector pointed to by RES_PTR. Return the number of limbs in RES_PTR.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+ FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* TODO:
+
+ Perhaps do not compute the highest power?
+ Instead, multiply twice by the 2nd highest power:
+
+ _______
+ |_______| hp
+ |_______| pow
+ _______________
+ |_______________| final result
+
+
+ _______
+ |_______| hp
+ |___| pow[-1]
+ ___________
+ |___________| intermediate result
+ |___| pow[-1]
+ _______________
+ |_______________| final result
+
+ Generalizing that idea, perhaps we should make powtab contain successive
+ cubes, not squares.
+*/
+
+#include "gmp-impl.h"
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+ if (POW2_P (base))
+ {
+ /* The base is a power of 2. Read the input string from least to most
+ significant character/digit. */
+
+ const unsigned char *s;
+ int next_bitpos;
+ mp_limb_t res_digit;
+ mp_size_t size;
+ int bits_per_indigit = mp_bases[base].big_base;
+
+ size = 0;
+ res_digit = 0;
+ next_bitpos = 0;
+
+ for (s = str + str_len - 1; s >= str; s--)
+ {
+ int inp_digit = *s;
+
+ res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;
+ next_bitpos += bits_per_indigit;
+ if (next_bitpos >= GMP_NUMB_BITS)
+ {
+ rp[size++] = res_digit;
+ next_bitpos -= GMP_NUMB_BITS;
+ res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+ }
+ }
+
+ if (res_digit != 0)
+ rp[size++] = res_digit;
+ return size;
+ }
+
+ if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))
+ return mpn_bc_set_str (rp, str, str_len, base);
+ else
+ {
+ mp_ptr powtab_mem, tp;
+ powers_t powtab[GMP_LIMB_BITS];
+ int chars_per_limb;
+ mp_size_t size;
+ mp_size_t un;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ chars_per_limb = mp_bases[base].chars_per_limb;
+
+ un = str_len / chars_per_limb + 1; /* FIXME: scalar integer division */
+
+ /* Allocate one large block for the powers of big_base. */
+ powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+
+ size_t n_pows = mpn_compute_powtab (powtab, powtab_mem, un, base);
+ powers_t *pt = powtab + n_pows;
+
+ tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+ size = mpn_dc_set_str (rp, str, str_len, pt, tp);
+
+ TMP_FREE;
+ return size;
+ }
+}
+
+mp_size_t
+mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
+ const powers_t *powtab, mp_ptr tp)
+{
+ size_t len_lo, len_hi;
+ mp_limb_t cy;
+ mp_size_t ln, hn, n, sn;
+
+ len_lo = powtab->digits_in_base;
+
+ if (str_len <= len_lo)
+ {
+ if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))
+ return mpn_bc_set_str (rp, str, str_len, powtab->base);
+ else
+ return mpn_dc_set_str (rp, str, str_len, powtab - 1, tp);
+ }
+
+ len_hi = str_len - len_lo;
+ ASSERT (len_lo >= len_hi);
+
+ if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))
+ hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);
+ else
+ hn = mpn_dc_set_str (tp, str, len_hi, powtab - 1, rp);
+
+ sn = powtab->shift;
+
+ if (hn == 0)
+ {
+ /* Zero +1 limb here, to avoid reading an allocated but uninitialised
+ limb in mpn_incr_u below. */
+ MPN_ZERO (rp, powtab->n + sn + 1);
+ }
+ else
+ {
+ if (powtab->n > hn)
+ mpn_mul (rp + sn, powtab->p, powtab->n, tp, hn);
+ else
+ mpn_mul (rp + sn, tp, hn, powtab->p, powtab->n);
+ MPN_ZERO (rp, sn);
+ }
+
+ str = str + str_len - len_lo;
+ if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))
+ ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);
+ else
+ ln = mpn_dc_set_str (tp, str, len_lo, powtab - 1, tp + powtab->n + sn + 1);
+
+ if (ln != 0)
+ {
+ cy = mpn_add_n (rp, rp, tp, ln);
+ mpn_incr_u (rp + ln, cy);
+ }
+ n = hn + powtab->n + sn;
+ return n - (rp[n - 1] == 0);
+}
+
+mp_size_t
+mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+ mp_size_t size;
+ size_t i;
+ long j;
+ mp_limb_t cy_limb;
+
+ mp_limb_t big_base;
+ int chars_per_limb;
+ mp_limb_t res_digit;
+
+ ASSERT (base >= 2);
+ ASSERT (base < numberof (mp_bases));
+ ASSERT (str_len >= 1);
+
+ big_base = mp_bases[base].big_base;
+ chars_per_limb = mp_bases[base].chars_per_limb;
+
+ size = 0;
+ for (i = chars_per_limb; i < str_len; i += chars_per_limb)
+ {
+ res_digit = *str++;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
+ res_digit = res_digit * 10 + *str++;
+ }
+ else
+ {
+ for (j = chars_per_limb - 1; j != 0; j--)
+ res_digit = res_digit * base + *str++;
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ rp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_mul_1c
+ cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+ cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+ cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+ if (cy_limb != 0)
+ rp[size++] = cy_limb;
+ }
+ }
+
+ big_base = base;
+ res_digit = *str++;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
+ {
+ res_digit = res_digit * 10 + *str++;
+ big_base *= 10;
+ }
+ }
+ else
+ {
+ for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
+ {
+ res_digit = res_digit * base + *str++;
+ big_base *= base;
+ }
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ rp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_mul_1c
+ cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+ cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+ cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+ if (cy_limb != 0)
+ rp[size++] = cy_limb;
+ }
+ return size;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sizeinbase.c b/vendor/gmp-6.3.0/mpn/generic/sizeinbase.c
new file mode 100644
index 0000000..faee947
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sizeinbase.c
@@ -0,0 +1,49 @@
+/* mpn_sizeinbase -- approximation to chars required for an mpn.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993-1995, 2001, 2002, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Same as mpz_sizeinbase, meaning exact for power-of-2 bases, and either
+ exact or 1 too big for other bases. */
+
+size_t
+mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
+{
+ size_t result;
+ MPN_SIZEINBASE (result, xp, xsize, base);
+ return result;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sqr.c b/vendor/gmp-6.3.0/mpn/generic/sqr.c
new file mode 100644
index 0000000..74fbff0
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sqr.c
@@ -0,0 +1,98 @@
+/* mpn_sqr -- square natural numbers.
+
+Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
+{
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+
+ if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
+ mpn_mul_basecase (p, a, n, a, n);
+ }
+ else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
+ {
+ mpn_sqr_basecase (p, a, n);
+ }
+ else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
+ {
+ /* Allocate workspace of fixed size on stack: fast! */
+ mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
+ ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
+ mpn_toom2_sqr (p, a, n, ws);
+ }
+ else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
+ mpn_toom3_sqr (p, a, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+ mpn_toom4_sqr (p, a, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+ mpn_toom6_sqr (p, a, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_DECL;
+ TMP_MARK;
+ ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+ mpn_toom8_sqr (p, a, n, ws);
+ TMP_FREE;
+ }
+ else
+ {
+ /* The current FFT code allocates its own space. That should probably
+ change. */
+ mpn_fft_mul (p, a, n, a, n);
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sqr_basecase.c b/vendor/gmp-6.3.0/mpn/generic/sqr_basecase.c
new file mode 100644
index 0000000..2645bad
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sqr_basecase.c
@@ -0,0 +1,361 @@
+/* mpn_sqr_basecase -- Internal routine to square a natural number
+ of length n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n) \
+ mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n) \
+ do { \
+ mp_size_t _i; \
+ for (_i = 0; _i < (n); _i++) \
+ { \
+ mp_limb_t ul, lpl; \
+ ul = (up)[_i]; \
+ umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
+ (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
+ } \
+ } while (0)
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ mpn_sqr_diag_addlsh1 (rp, tp, up, n)
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ mp_limb_t cy; \
+ MPN_SQR_DIAGONAL (rp, up, n); \
+ cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
+ rp[2 * n - 1] += cy; \
+ } while (0)
+#else
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ mp_limb_t cy; \
+ MPN_SQR_DIAGONAL (rp, up, n); \
+ cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
+ cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
+ rp[2 * n - 1] += cy; \
+ } while (0)
+#endif
+#endif
+
+
+#undef READY_WITH_mpn_sqr_basecase
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_size_t i;
+ mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+ mp_ptr tp = tarr;
+ mp_limb_t cy;
+
+ /* must fit 2*n limbs in tarr */
+ ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+ if ((n & 1) != 0)
+ {
+ if (n == 1)
+ {
+ mp_limb_t ul, lpl;
+ ul = up[0];
+ umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+ rp[0] = lpl >> GMP_NAIL_BITS;
+ return;
+ }
+
+ MPN_ZERO (tp, n);
+
+ for (i = 0; i <= n - 2; i += 2)
+ {
+ cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+ tp[n + i] = cy;
+ }
+ }
+ else
+ {
+ if (n == 2)
+ {
+#if HAVE_NATIVE_mpn_mul_2
+ rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
+ rp[0] = 0;
+ rp[1] = 0;
+ rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
+ return;
+ }
+
+ MPN_ZERO (tp, n);
+
+ for (i = 0; i <= n - 4; i += 2)
+ {
+ cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+ tp[n + i] = cy;
+ }
+ cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+ tp[2 * n - 3] = cy;
+ }
+
+ MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
+
+/* mpn_sqr_basecase using plain mpn_addmul_2.
+
+ This is tricky, since we have to let mpn_addmul_2 make some undesirable
+ multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
+ This forces us to conditionally add or subtract the mpn_sqr_diagonal
+ results. Examples of the product we form:
+
+ n = 4 n = 5 n = 6
+ u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
+ u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
+ u4 * u5
+ add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
+ sub: u1 sub: u1 u3 sub: u1 u3
+*/
+
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_size_t i;
+ mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+ mp_ptr tp = tarr;
+ mp_limb_t cy;
+
+ /* must fit 2*n limbs in tarr */
+ ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+ if ((n & 1) != 0)
+ {
+ mp_limb_t x0, x1;
+
+ if (n == 1)
+ {
+ mp_limb_t ul, lpl;
+ ul = up[0];
+ umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+ rp[0] = lpl >> GMP_NAIL_BITS;
+ return;
+ }
+
+ /* The code below doesn't like unnormalized operands. Since such
+ operands are unusual, handle them with a dumb recursion. */
+ if (up[n - 1] == 0)
+ {
+ rp[2 * n - 2] = 0;
+ rp[2 * n - 1] = 0;
+ mpn_sqr_basecase (rp, up, n - 1);
+ return;
+ }
+
+ MPN_ZERO (tp, n);
+
+ for (i = 0; i <= n - 2; i += 2)
+ {
+ cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+ tp[n + i] = cy;
+ }
+
+ MPN_SQR_DIAGONAL (rp, up, n);
+
+ for (i = 2;; i += 4)
+ {
+ x0 = rp[i + 0];
+ rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+ x1 = rp[i + 1];
+ rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+ __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+ if (i + 4 >= 2 * n)
+ break;
+ mpn_incr_u (rp + i + 4, cy);
+ }
+ }
+ else
+ {
+ mp_limb_t x0, x1;
+
+ if (n == 2)
+ {
+#if HAVE_NATIVE_mpn_mul_2
+ rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
+ rp[0] = 0;
+ rp[1] = 0;
+ rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
+ return;
+ }
+
+ /* The code below doesn't like unnormalized operands. Since such
+ operands are unusual, handle them with a dumb recursion. */
+ if (up[n - 1] == 0)
+ {
+ rp[2 * n - 2] = 0;
+ rp[2 * n - 1] = 0;
+ mpn_sqr_basecase (rp, up, n - 1);
+ return;
+ }
+
+ MPN_ZERO (tp, n);
+
+ for (i = 0; i <= n - 4; i += 2)
+ {
+ cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+ tp[n + i] = cy;
+ }
+ cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+ tp[2 * n - 3] = cy;
+
+ MPN_SQR_DIAGONAL (rp, up, n);
+
+ for (i = 2;; i += 4)
+ {
+ x0 = rp[i + 0];
+ rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+ x1 = rp[i + 1];
+ rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+ if (i + 6 >= 2 * n)
+ break;
+ __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+ mpn_incr_u (rp + i + 4, cy);
+ }
+ mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
+ }
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+ cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+ cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+ rp[2 * n - 1] += cy;
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
+
+/* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
+ allocation. */
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ if (n == 1)
+ {
+ mp_limb_t ul, lpl;
+ ul = up[0];
+ umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+ rp[0] = lpl >> GMP_NAIL_BITS;
+ }
+ else
+ {
+ mp_size_t i;
+ mp_ptr xp;
+
+ rp += 1;
+ rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
+ for (i = n - 2; i != 0; i--)
+ {
+ up += 1;
+ rp += 2;
+ rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
+ }
+
+ xp = rp - 2 * n + 3;
+ mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
+ }
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase)
+
+/* Default mpn_sqr_basecase using mpn_addmul_1. */
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_size_t i;
+
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
+
+ if (n == 1)
+ {
+ mp_limb_t ul, lpl;
+ ul = up[0];
+ umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+ rp[0] = lpl >> GMP_NAIL_BITS;
+ }
+ else
+ {
+ mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+ mp_ptr tp = tarr;
+ mp_limb_t cy;
+
+ /* must fit 2*n limbs in tarr */
+ ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+ cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+ tp[n - 1] = cy;
+ for (i = 2; i < n; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+ tp[n + i - 2] = cy;
+ }
+
+ MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+ }
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/sqrlo.c b/vendor/gmp-6.3.0/mpn/generic/sqrlo.c
new file mode 100644
index 0000000..71530b6
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sqrlo.c
@@ -0,0 +1,239 @@
+/* mpn_sqrlo -- squares an n-limb number and returns the low n limbs
+ of the result.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ THIS IS (FOR NOW) AN INTERNAL FUNCTION. IT IS ONLY SAFE TO REACH THIS
+ FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
+ THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22 1
+#else
+#define MAYBE_range_basecase \
+ ((SQRLO_DC_THRESHOLD == 0 ? SQRLO_BASECASE_THRESHOLD : SQRLO_DC_THRESHOLD) < SQR_TOOM2_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22 \
+ ((SQRLO_DC_THRESHOLD == 0 ? SQRLO_BASECASE_THRESHOLD : SQRLO_DC_THRESHOLD) < SQR_TOOM3_THRESHOLD*36/(36-11) )
+#endif
+
+/* THINK: The DC strategy uses different constants in different Toom's
+ ranges. Something smoother?
+*/
+
+/*
+ Compute the least significant half of the product {xy,n}*{yp,n}, or
+ formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+ Above the given threshold, the Divide and Conquer strategy is used.
+ The operand is split in two, and a full square plus a mullo
+ is used to obtain the final result. The more natural strategy is to
+ split in two halves, but this is far from optimal when a
+ sub-quadratic multiplication is used.
+
+ Mulders suggests an unbalanced split in favour of the full product,
+ split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+ To compute the value of a, we assume that the cost of mullo for a
+ given size ML(n) is a fraction of the cost of a full product with
+ same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+ then we can write:
+
+ ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+ Given a value for e, want to minimise the value of k, i.e. the
+ function k=(1-a)^e/(1-2*a^e).
+
+ With e=2, the exponent for schoolbook multiplication, the minimum is
+ given by the values a=1-a=1/2.
+
+ With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+ Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+ Other possible approximations follow:
+ e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+ e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+ e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+ e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+ The values above where obtained with the following trivial commands
+ in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+ ,
+ |\
+ | \
+ +----,
+ | |
+ | |
+ | |\
+ | | \
+ +----+--`
+ ^ n2 ^n1^
+
+ For an actual implementation, the assumption that M(n)=n^e is
+ incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+ with a constant k is wrong.
+
+ But theory suggest us two things:
+ - the best the multiplication product is (lower e), the more k
+ approaches 1, and a approaches 0.
+
+ - A value for a smaller than optimal is probably less bad than a
+ bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+ value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+ k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_sqrlo_itch (mp_size_t n)
+{
+ return 2*n;
+}
+
+/*
+ mpn_dc_sqrlo requires a scratch space of 2*n limbs at tp.
+ It accepts tp == rp.
+*/
+static void
+mpn_dc_sqrlo (mp_ptr rp, mp_srcptr xp, mp_size_t n, mp_ptr tp)
+{
+ mp_size_t n2, n1;
+ ASSERT (n >= 2);
+ ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+ ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+ /* Divide-and-conquer */
+
+ /* We need fractional approximation of the value 0 < a <= 1/2
+ giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+ */
+ if (MAYBE_range_basecase && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD*36/(36-11)))
+ n1 = n >> 1;
+ else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD*36/(36-11)))
+ n1 = n * 11 / (size_t) 36; /* n1 ~= n*(1-.694...) */
+ else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD*40/(40-9)))
+ n1 = n * 9 / (size_t) 40; /* n1 ~= n*(1-.775...) */
+ else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD*10/9))
+ n1 = n * 7 / (size_t) 39; /* n1 ~= n*(1-.821...) */
+ /* n1 = n * 4 / (size_t) 31; // n1 ~= n*(1-.871...) [TOOM66] */
+ else
+ n1 = n / (size_t) 10; /* n1 ~= n*(1-.899...) [TOOM88] */
+
+ n2 = n - n1;
+
+ /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0 */
+
+ /* x0 ^ 2 */
+ mpn_sqr (tp, xp, n2);
+ MPN_COPY (rp, tp, n2);
+
+ /* x1 * x0 * 2^(n2 GMP_NUMB_BITS) */
+ if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+ mpn_mul_basecase (tp + n, xp + n2, n1, xp, n1);
+ else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+ mpn_mullo_basecase (tp + n, xp + n2, xp, n1);
+ else
+ mpn_mullo_n (tp + n, xp + n2, xp, n1);
+ /* mpn_dc_mullo_n (tp + n, xp + n2, xp, n1, tp + n); */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ mpn_addlsh1_n (rp + n2, tp + n2, tp + n, n1);
+#else
+ mpn_lshift (rp + n2, tp + n, n1, 1);
+ mpn_add_n (rp + n2, rp + n2, tp + n2, n1);
+#endif
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0. */
+#define SQR_BASECASE_ALLOC \
+ (SQRLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*SQRLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_sqrlo
+ accepts a pointer tp, and handle the case tp == rp, do the same here.
+*/
+
+void
+mpn_sqrlo (mp_ptr rp, mp_srcptr xp, mp_size_t n)
+{
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+
+ if (BELOW_THRESHOLD (n, SQRLO_BASECASE_THRESHOLD))
+ {
+ /* FIXME: smarter criteria? */
+#if HAVE_NATIVE_mpn_mullo_basecase || ! HAVE_NATIVE_mpn_sqr_basecase
+ /* mullo computes as many products as sqr, but directly writes
+ on the result area. */
+ mpn_mullo_basecase (rp, xp, xp, n);
+#else
+ /* Allocate workspace of fixed size on stack: fast! */
+ mp_limb_t tp[SQR_BASECASE_ALLOC];
+ mpn_sqr_basecase (tp, xp, n);
+ MPN_COPY (rp, tp, n);
+#endif
+ }
+ else if (BELOW_THRESHOLD (n, SQRLO_DC_THRESHOLD))
+ {
+ mpn_sqrlo_basecase (rp, xp, n);
+ }
+ else
+ {
+ mp_ptr tp;
+ TMP_DECL;
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (mpn_sqrlo_itch (n));
+ if (BELOW_THRESHOLD (n, SQRLO_SQR_THRESHOLD))
+ {
+ mpn_dc_sqrlo (rp, xp, n, tp);
+ }
+ else
+ {
+ /* For really large operands, use plain mpn_mul_n but throw away upper n
+ limbs of result. */
+#if !TUNE_PROGRAM_BUILD && (SQRLO_SQR_THRESHOLD > SQR_FFT_THRESHOLD)
+ mpn_fft_mul (tp, xp, n, xp, n);
+#else
+ mpn_sqr (tp, xp, n);
+#endif
+ MPN_COPY (rp, tp, n);
+ }
+ TMP_FREE;
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sqrlo_basecase.c b/vendor/gmp-6.3.0/mpn/generic/sqrlo_basecase.c
new file mode 100644
index 0000000..3148609
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sqrlo_basecase.c
@@ -0,0 +1,194 @@
+/* mpn_sqrlo_basecase -- Internal routine to square a natural number
+ of length n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
+2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
+#if HAVE_NATIVE_mpn_addmul_1
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
+#else
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n) \
+ mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n) \
+ do { \
+ mp_size_t _i; \
+ for (_i = 0; _i < (n); _i++) \
+ { \
+ mp_limb_t ul, lpl; \
+ ul = (up)[_i]; \
+ umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
+ (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
+ } \
+ } while (0)
+#endif
+
+#define MPN_SQRLO_DIAGONAL(rp, up, n) \
+ do { \
+ mp_size_t nhalf; \
+ nhalf = (n) >> 1; \
+ MPN_SQR_DIAGONAL ((rp), (up), nhalf); \
+ if (((n) & 1) != 0) \
+ { \
+ mp_limb_t op; \
+ op = (up)[nhalf]; \
+ (rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK; \
+ } \
+ } while (0)
+
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ MPN_SQRLO_DIAGONAL((rp), (up), (n)); \
+ mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1); \
+ } while (0)
+#else
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ MPN_SQRLO_DIAGONAL((rp), (up), (n)); \
+ mpn_lshift ((tp), (tp), (n) - 1, 1); \
+ mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1); \
+ } while (0)
+#endif
+
+/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
+#define SQRLO_BASECASE_ALLOC \
+ (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
+
+/* Default mpn_sqrlo_basecase using mpn_addmul_1. */
+#ifndef SQRLO_SPECIAL_CASES
+#define SQRLO_SPECIAL_CASES 2
+#endif
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_special_cases 1
+#else
+#define MAYBE_special_cases \
+ ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
+#endif
+
+void
+mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+ mp_limb_t ul;
+
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
+
+ ul = up[0];
+
+ if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
+ {
+#if SQRLO_SPECIAL_CASES == 1
+ rp[0] = (ul * ul) & GMP_NUMB_MASK;
+#else
+ if (n == 1)
+ rp[0] = (ul * ul) & GMP_NUMB_MASK;
+ else
+ {
+ mp_limb_t hi, lo, ul1;
+ umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
+ rp[0] = lo >> GMP_NAIL_BITS;
+ ul1 = up[1];
+#if SQRLO_SPECIAL_CASES == 2
+ rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+#else
+ if (n == 2)
+ rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+ else
+ {
+ mp_limb_t hi1;
+#if GMP_NAIL_BITS != 0
+ ul <<= 1;
+#endif
+ umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
+ hi1 += ul * up[2];
+#if GMP_NAIL_BITS == 0
+ hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
+ add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
+#else
+ hi += lo >> GMP_NAIL_BITS;
+ rp[1] = hi & GMP_NUMB_MASK;
+ rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
+#endif
+ }
+#endif
+ }
+#endif
+ }
+ else
+ {
+ mp_limb_t tp[SQRLO_BASECASE_ALLOC];
+ mp_size_t i;
+
+ /* must fit n-1 limbs in tp */
+ ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
+
+ --n;
+#if SQRLO_SHORTCUT_MULTIPLICATIONS
+ {
+ mp_limb_t cy;
+
+ cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
+ for (i = 1; 2 * i + 1 < n; ++i)
+ {
+ ul = up[i];
+ cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
+ }
+ tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
+ }
+#else
+ mpn_mul_1 (tp, up + 1, n, ul);
+ for (i = 1; 2 * i < n; ++i)
+ mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
+#endif
+
+ MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
+ }
+}
+#undef SQRLO_SPECIAL_CASES
+#undef MAYBE_special_cases
+#undef SQRLO_BASECASE_ALLOC
+#undef SQRLO_SHORTCUT_MULTIPLICATIONS
+#undef MPN_SQR_DIAGONAL
+#undef MPN_SQRLO_DIAGONAL
+#undef MPN_SQRLO_DIAG_ADDLSH1
diff --git a/vendor/gmp-6.3.0/mpn/generic/sqrmod_bnm1.c b/vendor/gmp-6.3.0/mpn/generic/sqrmod_bnm1.c
new file mode 100644
index 0000000..0acbe12
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sqrmod_bnm1.c
@@ -0,0 +1,328 @@
+/* sqrmod_bnm1.c -- squaring mod B^n-1.
+
+ Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+ Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2020, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Input is {ap,rn}; output is {rp,rn}, computation is
+ mod B^rn - 1, and values are semi-normalised; zero is represented
+ as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp.
+ tp==rp is allowed. */
+static void
+mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+ mp_limb_t cy;
+
+ ASSERT (0 < rn);
+
+ mpn_sqr (tp, ap, rn);
+ cy = mpn_add_n (rp, tp, tp + rn, rn);
+ /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+ * be no overflow when adding in the carry. */
+ MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Input is {ap,rn+1}; output is {rp,rn+1}, in
+ normalised representation, computation is mod B^rn + 1. Needs
+ a scratch area of 2rn limbs at tp; tp == rp is allowed.
+ Output is normalised. */
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+ mp_limb_t cy;
+ unsigned k;
+
+ ASSERT (0 < rn);
+
+ if (UNLIKELY (ap[rn]))
+ {
+ *rp = 1;
+ MPN_FILL (rp + 1, rn, 0);
+ return;
+ }
+ else if (MPN_SQRMOD_BKNP1_USABLE (rn, k, MUL_FFT_MODF_THRESHOLD))
+ {
+ mp_size_t n_k = rn / k;
+ TMP_DECL;
+
+ TMP_MARK;
+ mpn_sqrmod_bknp1 (rp, ap, n_k, k,
+ TMP_ALLOC_LIMBS (mpn_sqrmod_bknp1_itch (rn)));
+ TMP_FREE;
+ return;
+ }
+ mpn_sqr (tp, ap, rn);
+ cy = mpn_sub_n (rp, tp, tp + rn, rn);
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, cy);
+}
+
+
+/* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1.
+ * It should not be a problem if sqrmod_bnm1 is used to
+ * compute the full square with an <= 2*rn, because this condition
+ * implies (B^an-1)^2 < (B^rn-1) .
+ *
+ * Requires rn/4 < an <= rn
+ * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
+ *
+ * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
+ */
+void
+mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
+{
+ ASSERT (0 < an);
+ ASSERT (an <= rn);
+
+ if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
+ {
+ if (UNLIKELY (an < rn))
+ {
+ if (UNLIKELY (2*an <= rn))
+ {
+ mpn_sqr (rp, ap, an);
+ }
+ else
+ {
+ mp_limb_t cy;
+ mpn_sqr (tp, ap, an);
+ cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
+ MPN_INCR_U (rp, rn, cy);
+ }
+ }
+ else
+ mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
+ }
+ else
+ {
+ mp_size_t n;
+ mp_limb_t cy;
+ mp_limb_t hi;
+
+ n = rn >> 1;
+
+ ASSERT (2*an > n);
+
+ /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
+ and crt together as
+
+ x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+ */
+
+#define a0 ap
+#define a1 (ap + n)
+
+#define xp tp /* 2n + 2 */
+ /* am1 maybe in {xp, n} */
+#define sp1 (tp + 2*n + 2)
+ /* ap1 maybe in {sp1, n + 1} */
+
+ {
+ mp_srcptr am1;
+ mp_size_t anm;
+ mp_ptr so;
+
+ if (LIKELY (an > n))
+ {
+ so = xp + n;
+ am1 = xp;
+ cy = mpn_add (xp, a0, n, a1, an - n);
+ MPN_INCR_U (xp, n, cy);
+ anm = n;
+ }
+ else
+ {
+ so = xp;
+ am1 = a0;
+ anm = an;
+ }
+
+ mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
+ }
+
+ {
+ int k;
+ mp_srcptr ap1;
+ mp_size_t anp;
+
+ if (LIKELY (an > n)) {
+ ap1 = sp1;
+ cy = mpn_sub (sp1, a0, n, a1, an - n);
+ sp1[n] = 0;
+ MPN_INCR_U (sp1, n + 1, cy);
+ anp = n + ap1[n];
+ } else {
+ ap1 = a0;
+ anp = an;
+ }
+
+ if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+ k=0;
+ else
+ {
+ int mask;
+ k = mpn_fft_best_k (n, 1);
+ mask = (1<<k) -1;
+ while (n & mask) {k--; mask >>=1;};
+ }
+ if (k >= FFT_FIRST_K)
+ xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
+ else if (UNLIKELY (ap1 == a0))
+ {
+ ASSERT (anp <= n);
+ ASSERT (2*anp > n);
+ mpn_sqr (xp, a0, an);
+ anp = 2*an - n;
+ cy = mpn_sub (xp, xp, n, xp + n, anp);
+ xp[n] = 0;
+ MPN_INCR_U (xp, n+1, cy);
+ }
+ else
+ mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
+ }
+
+ /* Here the CRT recomposition begins.
+
+ xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+ Division by 2 is a bitwise rotation.
+
+ Assumes xp normalised mod (B^n+1).
+
+ The residue class [0] is represented by [B^n-1]; except when
+ both input are ZERO.
+ */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+ cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+ hi = cy << (GMP_NUMB_BITS - 1);
+ cy = 0;
+ /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+ overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+ cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+ hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+ cy >>= 1;
+ /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+ the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+ add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
+#else
+ cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+ rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+ cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+ cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+ cy += (rp[0]&1);
+ mpn_rshift(rp, rp, n, 1);
+ ASSERT (cy <= 2);
+ hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+ cy >>= 1;
+ /* We can have cy != 0 only if hi = 0... */
+ ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+ rp[n-1] |= hi;
+ /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+ ASSERT (cy <= 1);
+ /* Next increment can not overflow, read the previous comments about cy. */
+ ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+ MPN_INCR_U(rp, n, cy);
+
+ /* Compute the highest half:
+ ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+ */
+ if (UNLIKELY (2*an < rn))
+ {
+ /* Note that in this case, the only way the result can equal
+ zero mod B^{rn} - 1 is if the input is zero, and
+ then the output of both the recursive calls and this CRT
+ reconstruction is zero, not B^{rn} - 1. */
+ cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
+
+ /* FIXME: This subtraction of the high parts is not really
+ necessary, we do it to get the carry out, and for sanity
+ checking. */
+ cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
+ xp + 2*an - n, rn - 2*an, cy);
+ ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
+ cy = mpn_sub_1 (rp, rp, 2*an, cy);
+ ASSERT (cy == (xp + 2*an - n)[0]);
+ }
+ else
+ {
+ cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+ /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+ DECR will affect _at most_ the lowest n limbs. */
+ MPN_DECR_U (rp, 2*n, cy);
+ }
+#undef a0
+#undef a1
+#undef xp
+#undef sp1
+ }
+}
+
+mp_size_t
+mpn_sqrmod_bnm1_next_size (mp_size_t n)
+{
+ mp_size_t nh;
+
+ if (BELOW_THRESHOLD (n, SQRMOD_BNM1_THRESHOLD))
+ return n;
+ if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+ return (n + (2-1)) & (-2);
+ if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+ return (n + (4-1)) & (-4);
+
+ nh = (n + 1) >> 1;
+
+ if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
+ return (n + (8-1)) & (-8);
+
+ return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sqrtrem.c b/vendor/gmp-6.3.0/mpn/generic/sqrtrem.c
new file mode 100644
index 0000000..cc6dd9c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sqrtrem.c
@@ -0,0 +1,555 @@
+/* mpn_sqrtrem -- square root and remainder
+
+ Contributed to the GNU project by Paul Zimmermann (most code),
+ Torbjorn Granlund (mpn_sqrtrem1) and Marco Bodrato (mpn_dc_sqrt).
+
+ THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+ FUTURE GMP RELEASE.
+
+Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012, 2015, 2017 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* See "Karatsuba Square Root", reference in gmp.texi. */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#define USE_DIVAPPR_Q 1
+#define TRACE(x)
+
+static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
+{
+ 0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
+ 0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
+ 0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
+ 0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
+ 0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
+ 0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
+ 0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
+ 0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
+ 0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
+ 0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
+ 0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
+ 0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
+ 0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
+ 0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
+ 0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
+ 0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
+ 0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
+ 0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
+ 0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
+ 0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
+ 0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
+ 0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
+ 0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
+ 0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
+ 0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
+ 0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
+ 0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
+ 0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
+ 0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
+ 0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
+ 0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
+ 0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
+ 0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
+ 0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
+ 0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
+ 0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
+ 0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
+ 0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
+ 0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
+ 0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
+ 0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+ 0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+ 0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+ 0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
+ 0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
+ 0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
+ 0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
+ 0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00 /* sqrt(1/1f8)..sqrt(1/1ff) */
+};
+
+/* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2. */
+
+#if GMP_NUMB_BITS > 32
+#define MAGIC CNST_LIMB(0x10000000000) /* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
+#else
+#define MAGIC CNST_LIMB(0x100000) /* 0xfee6f < MAGIC < 0x29cbc8 */
+#endif
+
+static mp_limb_t
+mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
+{
+#if GMP_NUMB_BITS > 32
+ mp_limb_t a1;
+#endif
+ mp_limb_t x0, t2, t, x2;
+ unsigned abits;
+
+ ASSERT_ALWAYS (GMP_NAIL_BITS == 0);
+ ASSERT_ALWAYS (GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64);
+ ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);
+
+ /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
+ since we can do the former without division. As part of the last
+ iteration convert from 1/sqrt(a) to sqrt(a). */
+
+ abits = a0 >> (GMP_LIMB_BITS - 1 - 8); /* extract bits for table lookup */
+ x0 = 0x100 | invsqrttab[abits - 0x80]; /* initial 1/sqrt(a) */
+
+ /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
+
+#if GMP_NUMB_BITS > 32
+ a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
+ t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000 - a1 * x0 * x0) >> 16;
+ x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
+
+ /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
+
+ t2 = x0 * (a0 >> (32-8));
+ t = t2 >> 25;
+ t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
+ x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
+ x0 >>= 32;
+#else
+ t2 = x0 * (a0 >> (16-8));
+ t = t2 >> 13;
+ t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
+ x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
+ x0 >>= 16;
+#endif
+
+ /* x0 is now a full limb approximation of sqrt(a0) */
+
+ x2 = x0 * x0;
+ if (x2 + 2*x0 <= a0 - 1)
+ {
+ x2 += 2*x0 + 1;
+ x0++;
+ }
+
+ *rp = a0 - x2;
+ return x0;
+}
+
+
+#define Prec (GMP_NUMB_BITS >> 1)
+#if ! defined(SQRTREM2_INPLACE)
+#define SQRTREM2_INPLACE 0
+#endif
+
+/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
+ return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
+#if SQRTREM2_INPLACE
+#define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp)
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp)
+{
+ mp_srcptr np = rp;
+#else
+#define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp, rp)
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
+{
+#endif
+ mp_limb_t q, u, np0, sp0, rp0, q2;
+ int cc;
+
+ ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
+
+ np0 = np[0];
+ sp0 = mpn_sqrtrem1 (rp, np[1]);
+ rp0 = rp[0];
+ /* rp0 <= 2*sp0 < 2^(Prec + 1) */
+ rp0 = (rp0 << (Prec - 1)) + (np0 >> (Prec + 1));
+ q = rp0 / sp0;
+ /* q <= 2^Prec, if q = 2^Prec, reduce the overestimate. */
+ q -= q >> Prec;
+ /* now we have q < 2^Prec */
+ u = rp0 - q * sp0;
+ /* now we have (rp[0]<<Prec + np0>>Prec)/2 = q * sp0 + u */
+ sp0 = (sp0 << Prec) | q;
+ cc = u >> (Prec - 1);
+ rp0 = ((u << (Prec + 1)) & GMP_NUMB_MASK) + (np0 & ((CNST_LIMB (1) << (Prec + 1)) - 1));
+ /* subtract q * q from rp */
+ q2 = q * q;
+ cc -= rp0 < q2;
+ rp0 -= q2;
+ if (cc < 0)
+ {
+ rp0 += sp0;
+ cc += rp0 < sp0;
+ --sp0;
+ rp0 += sp0;
+ cc += rp0 < sp0;
+ }
+
+ rp[0] = rp0;
+ sp[0] = sp0;
+ return cc;
+}
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
+ and in {np, n} the low n limbs of the remainder, returns the high
+ limb of the remainder (which is 0 or 1).
+ Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
+ where B=2^GMP_NUMB_BITS.
+ Needs a scratch of n/2+1 limbs. */
+static mp_limb_t
+mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n, mp_limb_t approx, mp_ptr scratch)
+{
+ mp_limb_t q; /* carry out of {sp, n} */
+ int c, b; /* carry out of remainder */
+ mp_size_t l, h;
+
+ ASSERT (n > 1);
+ ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
+
+ l = n / 2;
+ h = n - l;
+ if (h == 1)
+ q = CALL_SQRTREM2_INPLACE (sp + l, np + 2 * l);
+ else
+ q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h, 0, scratch);
+ if (q != 0)
+ ASSERT_CARRY (mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h));
+ TRACE(printf("tdiv_qr(,,,,%u,,%u) -> %u\n", (unsigned) n, (unsigned) h, (unsigned) (n - h + 1)));
+ mpn_tdiv_qr (scratch, np + l, 0, np + l, n, sp + l, h);
+ q += scratch[l];
+ c = scratch[0] & 1;
+ mpn_rshift (sp, scratch, l, 1);
+ sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
+ if (UNLIKELY ((sp[0] & approx) != 0)) /* (sp[0] & mask) > 1 */
+ return 1; /* Remainder is non-zero */
+ q >>= 1;
+ if (c != 0)
+ c = mpn_add_n (np + l, np + l, sp + l, h);
+ TRACE(printf("sqr(,,%u)\n", (unsigned) l));
+ mpn_sqr (np + n, sp, l);
+ b = q + mpn_sub_n (np, np, np + n, 2 * l);
+ c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
+
+ if (c < 0)
+ {
+ q = mpn_add_1 (sp + l, sp + l, h, q);
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
+ c += mpn_addlsh1_n_ip1 (np, sp, n) + 2 * q;
+#else
+ c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
+#endif
+ c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
+ q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
+ }
+
+ return c;
+}
+
+#if USE_DIVAPPR_Q
+static void
+mpn_divappr_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+ gmp_pi1_t inv;
+ mp_limb_t qh;
+ ASSERT (dn > 2);
+ ASSERT (nn >= dn);
+ ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+ MPN_COPY (scratch, np, nn);
+ invert_pi1 (inv, dp[dn-1], dp[dn-2]);
+ if (BELOW_THRESHOLD (dn, DC_DIVAPPR_Q_THRESHOLD))
+ qh = mpn_sbpi1_divappr_q (qp, scratch, nn, dp, dn, inv.inv32);
+ else if (BELOW_THRESHOLD (dn, MU_DIVAPPR_Q_THRESHOLD))
+ qh = mpn_dcpi1_divappr_q (qp, scratch, nn, dp, dn, &inv);
+ else
+ {
+ mp_size_t itch = mpn_mu_divappr_q_itch (nn, dn, 0);
+ TMP_DECL;
+ TMP_MARK;
+ /* Sadly, scratch is too small. */
+ qh = mpn_mu_divappr_q (qp, np, nn, dp, dn, TMP_ALLOC_LIMBS (itch));
+ TMP_FREE;
+ }
+ qp [nn - dn] = qh;
+}
+#endif
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n-odd},
+ returns zero if the operand was a perfect square, one otherwise.
+ Assumes {np, 2n-odd}*4^nsh is normalized, i.e. B > np[2n-1-odd]*4^nsh >= B/4
+ where B=2^GMP_NUMB_BITS.
+ THINK: In the odd case, three more (dummy) limbs are taken into account,
+ when nsh is maximal, two limbs are discarded from the result of the
+ division. Too much? Is a single dummy limb enough? */
+static int
+mpn_dc_sqrt (mp_ptr sp, mp_srcptr np, mp_size_t n, unsigned nsh, unsigned odd)
+{
+ mp_limb_t q; /* carry out of {sp, n} */
+ int c; /* carry out of remainder */
+ mp_size_t l, h;
+ mp_ptr qp, tp, scratch;
+ TMP_DECL;
+ TMP_MARK;
+
+ ASSERT (np[2 * n - 1 - odd] != 0);
+ ASSERT (n > 4);
+ ASSERT (nsh < GMP_NUMB_BITS / 2);
+
+ l = (n - 1) / 2;
+ h = n - l;
+ ASSERT (n >= l + 2 && l + 2 >= h && h > l && l >= 1 + odd);
+ scratch = TMP_ALLOC_LIMBS (l + 2 * n + 5 - USE_DIVAPPR_Q); /* n + 2-USE_DIVAPPR_Q */
+ tp = scratch + n + 2 - USE_DIVAPPR_Q; /* n + h + 1, but tp [-1] is writable */
+ if (nsh != 0)
+ {
+ /* o is used to exactly set the lowest bits of the dividend, is it needed? */
+ int o = l > (1 + odd);
+ ASSERT_NOCARRY (mpn_lshift (tp - o, np + l - 1 - o - odd, n + h + 1 + o, 2 * nsh));
+ }
+ else
+ MPN_COPY (tp, np + l - 1 - odd, n + h + 1);
+ q = mpn_dc_sqrtrem (sp + l, tp + l + 1, h, 0, scratch);
+ if (q != 0)
+ ASSERT_CARRY (mpn_sub_n (tp + l + 1, tp + l + 1, sp + l, h));
+ qp = tp + n + 1; /* l + 2 */
+ TRACE(printf("div(appr)_q(,,%u,,%u) -> %u \n", (unsigned) n+1, (unsigned) h, (unsigned) (n + 1 - h + 1)));
+#if USE_DIVAPPR_Q
+ mpn_divappr_q (qp, tp, n + 1, sp + l, h, scratch);
+#else
+ mpn_div_q (qp, tp, n + 1, sp + l, h, scratch);
+#endif
+ q += qp [l + 1];
+ c = 1;
+ if (q > 1)
+ {
+ /* FIXME: if s!=0 we will shift later, a noop on this area. */
+ MPN_FILL (sp, l, GMP_NUMB_MAX);
+ }
+ else
+ {
+ /* FIXME: if s!=0 we will shift again later, shift just once. */
+ mpn_rshift (sp, qp + 1, l, 1);
+ sp[l - 1] |= q << (GMP_NUMB_BITS - 1);
+ if (((qp[0] >> (2 + USE_DIVAPPR_Q)) | /* < 3 + 4*USE_DIVAPPR_Q */
+ (qp[1] & (GMP_NUMB_MASK >> ((GMP_NUMB_BITS >> odd)- nsh - 1)))) == 0)
+ {
+ mp_limb_t cy;
+ /* Approximation is not good enough, the extra limb(+ nsh bits)
+ is smaller than needed to absorb the possible error. */
+ /* {qp + 1, l + 1} equals 2*{sp, l} */
+ /* FIXME: use mullo or wrap-around, or directly evaluate
+ remainder with a single sqrmod_bnm1. */
+ TRACE(printf("mul(,,%u,,%u)\n", (unsigned) h, (unsigned) (l+1)));
+ ASSERT_NOCARRY (mpn_mul (scratch, sp + l, h, qp + 1, l + 1));
+ /* Compute the remainder of the previous mpn_div(appr)_q. */
+ cy = mpn_sub_n (tp + 1, tp + 1, scratch, h);
+#if USE_DIVAPPR_Q || WANT_ASSERT
+ MPN_DECR_U (tp + 1 + h, l, cy);
+#if USE_DIVAPPR_Q
+ ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) <= 0);
+ if (mpn_cmp (tp + 1 + h, scratch + h, l) < 0)
+ {
+ /* May happen only if div result was not exact. */
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n_ip1 (tp + 1, sp + l, h);
+#else
+ cy = mpn_addmul_1 (tp + 1, sp + l, h, CNST_LIMB(2));
+#endif
+ ASSERT_NOCARRY (mpn_add_1 (tp + 1 + h, tp + 1 + h, l, cy));
+ MPN_DECR_U (sp, l, 1);
+ }
+ /* Can the root be exact when a correction was needed? We
+ did not find an example, but it depends on divappr
+ internals, and we can not assume it true in general...*/
+ /* else */
+#else /* WANT_ASSERT */
+ ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) == 0);
+#endif
+#endif
+ if (mpn_zero_p (tp + l + 1, h - l))
+ {
+ TRACE(printf("sqr(,,%u)\n", (unsigned) l));
+ mpn_sqr (scratch, sp, l);
+ c = mpn_cmp (tp + 1, scratch + l, l);
+ if (c == 0)
+ {
+ if (nsh != 0)
+ {
+ mpn_lshift (tp, np, l, 2 * nsh);
+ np = tp;
+ }
+ c = mpn_cmp (np, scratch + odd, l - odd);
+ }
+ if (c < 0)
+ {
+ MPN_DECR_U (sp, l, 1);
+ c = 1;
+ }
+ }
+ }
+ }
+ TMP_FREE;
+
+ if ((odd | nsh) != 0)
+ mpn_rshift (sp, sp, n, nsh + (odd ? GMP_NUMB_BITS / 2 : 0));
+ return c;
+}
+
+
+mp_size_t
+mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
+{
+ mp_limb_t cc, high, rl;
+ int c;
+ mp_size_t rn, tn;
+ TMP_DECL;
+
+ ASSERT (nn > 0);
+ ASSERT_MPN (np, nn);
+
+ ASSERT (np[nn - 1] != 0);
+ ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
+ ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
+ ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
+
+ high = np[nn - 1];
+ if (high & (GMP_NUMB_HIGHBIT | (GMP_NUMB_HIGHBIT / 2)))
+ c = 0;
+ else
+ {
+ count_leading_zeros (c, high);
+ c -= GMP_NAIL_BITS;
+
+ c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
+ }
+ if (nn == 1) {
+ if (c == 0)
+ {
+ sp[0] = mpn_sqrtrem1 (&rl, high);
+ if (rp != NULL)
+ rp[0] = rl;
+ }
+ else
+ {
+ cc = mpn_sqrtrem1 (&rl, high << (2*c)) >> c;
+ sp[0] = cc;
+ if (rp != NULL)
+ rp[0] = rl = high - cc*cc;
+ }
+ return rl != 0;
+ }
+ if (nn == 2) {
+ mp_limb_t tp [2];
+ if (rp == NULL) rp = tp;
+ if (c == 0)
+ {
+#if SQRTREM2_INPLACE
+ rp[1] = high;
+ rp[0] = np[0];
+ cc = CALL_SQRTREM2_INPLACE (sp, rp);
+#else
+ cc = mpn_sqrtrem2 (sp, rp, np);
+#endif
+ rp[1] = cc;
+ return ((rp[0] | cc) != 0) + cc;
+ }
+ else
+ {
+ rl = np[0];
+ rp[1] = (high << (2*c)) | (rl >> (GMP_NUMB_BITS - 2*c));
+ rp[0] = rl << (2*c);
+ CALL_SQRTREM2_INPLACE (sp, rp);
+ cc = sp[0] >>= c; /* c != 0, the highest bit of the root cc is 0. */
+ rp[0] = rl -= cc*cc; /* Computed modulo 2^GMP_LIMB_BITS, because it's smaller. */
+ return rl != 0;
+ }
+ }
+ tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
+
+ if ((rp == NULL) && (nn > 8))
+ return mpn_dc_sqrt (sp, np, tn, c, nn & 1);
+ TMP_MARK;
+ if (((nn & 1) | c) != 0)
+ {
+ mp_limb_t s0[1], mask;
+ mp_ptr tp, scratch;
+ TMP_ALLOC_LIMBS_2 (tp, 2 * tn, scratch, tn / 2 + 1);
+ tp[0] = 0; /* needed only when 2*tn > nn, but saves a test */
+ if (c != 0)
+ mpn_lshift (tp + (nn & 1), np, nn, 2 * c);
+ else
+ MPN_COPY (tp + (nn & 1), np, nn);
+ c += (nn & 1) ? GMP_NUMB_BITS / 2 : 0; /* c now represents k */
+ mask = (CNST_LIMB (1) << c) - 1;
+ rl = mpn_dc_sqrtrem (sp, tp, tn, (rp == NULL) ? mask - 1 : 0, scratch);
+ /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
+ thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
+ s0[0] = sp[0] & mask; /* S mod 2^k */
+ rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2*s0*S */
+ cc = mpn_submul_1 (tp, s0, 1, s0[0]);
+ rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
+ mpn_rshift (sp, sp, tn, c);
+ tp[tn] = rl;
+ if (rp == NULL)
+ rp = tp;
+ c = c << 1;
+ if (c < GMP_NUMB_BITS)
+ tn++;
+ else
+ {
+ tp++;
+ c -= GMP_NUMB_BITS;
+ }
+ if (c != 0)
+ mpn_rshift (rp, tp, tn, c);
+ else
+ MPN_COPY_INCR (rp, tp, tn);
+ rn = tn;
+ }
+ else
+ {
+ if (rp != np)
+ {
+ if (rp == NULL) /* nn <= 8 */
+ rp = TMP_SALLOC_LIMBS (nn);
+ MPN_COPY (rp, np, nn);
+ }
+ rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn, 0, TMP_ALLOC_LIMBS(tn / 2 + 1)));
+ }
+
+ MPN_NORMALIZE (rp, rn);
+
+ TMP_FREE;
+ return rn;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/strongfibo.c b/vendor/gmp-6.3.0/mpn/generic/strongfibo.c
new file mode 100644
index 0000000..7e8d612
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/strongfibo.c
@@ -0,0 +1,219 @@
+/* mpn_fib2m -- calculate Fibonacci numbers, modulo m.
+
+Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+#if ! HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n
+/* Stores |{ap,n}-{bp,n}| in {rp,n},
+ returns the sign of {ap,n}-{bp,n}. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ mp_limb_t x, y;
+ while (--n >= 0)
+ {
+ x = ap[n];
+ y = bp[n];
+ if (x != y)
+ {
+ ++n;
+ if (x > y)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (rp, ap, bp, n));
+ return 1;
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (rp, bp, ap, n));
+ return -1;
+ }
+ }
+ rp[n] = 0;
+ }
+ return 0;
+}
+#endif
+
+/* Computes at most count terms of the sequence needed by the
+ Lucas-Lehmer-Riesel test, indexing backward:
+ L_i = L_{i+1}^2 - 2
+
+ The sequence is computed modulo M = {mp, mn}.
+ The starting point is given in L_{count+1} = {lp, mn}.
+ The scratch pointed by sp, needs a space of at least 3 * mn + 1 limbs.
+
+ Returns the index i>0 if L_i = 0 (mod M) is found within the
+ computed count terms of the sequence. Otherwise it returns zero.
+
+ Note: (+/-2)^2-2=2, (+/-1)^2-2=-1, 0^2-2=-2
+ */
+
+static mp_bitcnt_t
+mpn_llriter (mp_ptr lp, mp_srcptr mp, mp_size_t mn, mp_bitcnt_t count, mp_ptr sp)
+{
+ do
+ {
+ mpn_sqr (sp, lp, mn);
+ mpn_tdiv_qr (sp + 2 * mn, lp, 0, sp, 2 * mn, mp, mn);
+ if (lp[0] < 5)
+ {
+ /* If L^2 % M < 5, |L^2 % M - 2| <= 2 */
+ if (mn == 1 || mpn_zero_p (lp + 1, mn - 1))
+ return (lp[0] == 2) ? count : 0;
+ else
+ MPN_DECR_U (lp, mn, 2);
+ }
+ else
+ lp[0] -= 2;
+ } while (--count != 0);
+ return 0;
+}
+
+/* Store the Lucas' number L[n] at lp (maybe), computed modulo m. lp
+ and scratch should have room for mn*2+1 limbs.
+
+ Returns the size of L[n] normally.
+
+ If F[n] is zero modulo m, or L[n] is, returns 0 and lp is
+ undefined.
+*/
+
+static mp_size_t
+mpn_lucm (mp_ptr lp, mp_srcptr np, mp_size_t nn, mp_srcptr mp, mp_size_t mn, mp_ptr scratch)
+{
+ int neg;
+ mp_limb_t cy;
+
+ ASSERT (! MPN_OVERLAP_P (lp, MAX(2*mn+1,5), scratch, MAX(2*mn+1,5)));
+ ASSERT (nn > 0);
+
+ neg = mpn_fib2m (lp, scratch, np, nn, mp, mn);
+
+ /* F[n] = +/-{lp, mn}, F[n-1] = +/-{scratch, mn} */
+ if (mpn_zero_p (lp, mn))
+ return 0;
+
+ if (neg) /* One sign is opposite, use sub instead of add. */
+ {
+#if HAVE_NATIVE_mpn_rsblsh1_n || HAVE_NATIVE_mpn_sublsh1_n
+#if HAVE_NATIVE_mpn_rsblsh1_n
+ cy = mpn_rsblsh1_n (lp, lp, scratch, mn); /* L[n] = +/-(2F[n-1]-(-F[n])) */
+#else
+ cy = mpn_sublsh1_n (lp, lp, scratch, mn); /* L[n] = -/+(F[n]-(-2F[n-1])) */
+ if (cy != 0)
+ cy = mpn_add_n (lp, lp, mp, mn) - cy;
+#endif
+ if (cy > 1)
+ cy += mpn_add_n (lp, lp, mp, mn);
+#else
+ cy = mpn_lshift (scratch, scratch, mn, 1); /* 2F[n-1] */
+ if (UNLIKELY (cy))
+ cy -= mpn_sub_n (lp, scratch, lp, mn); /* L[n] = +/-(2F[n-1]-(-F[n])) */
+ else
+ abs_sub_n (lp, lp, scratch, mn);
+#endif
+ ASSERT (cy <= 1);
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (lp, lp, scratch, mn); /* L[n] = +/-(2F[n-1]+F[n])) */
+#else
+ cy = mpn_lshift (scratch, scratch, mn, 1);
+ cy+= mpn_add_n (lp, lp, scratch, mn);
+#endif
+ ASSERT (cy <= 2);
+ }
+ while (cy || mpn_cmp (lp, mp, mn) >= 0)
+ cy -= mpn_sub_n (lp, lp, mp, mn);
+ MPN_NORMALIZE (lp, mn);
+ return mn;
+}
+
+int
+mpn_strongfibo (mp_srcptr mp, mp_size_t mn, mp_ptr scratch)
+{
+ mp_ptr lp, sp;
+ mp_size_t en;
+ mp_bitcnt_t b0;
+ TMP_DECL;
+
+#if GMP_NUMB_BITS % 4 == 0
+ b0 = mpn_scan0 (mp, 0);
+#else
+ {
+ mpz_t m = MPZ_ROINIT_N(mp, mn);
+ b0 = mpz_scan0 (m, 0);
+ }
+ if (UNLIKELY (b0 == mn * GMP_NUMB_BITS))
+ {
+ en = 1;
+ scratch [0] = 1;
+ }
+ else
+#endif
+ {
+ int cnt = b0 % GMP_NUMB_BITS;
+ en = b0 / GMP_NUMB_BITS;
+ if (LIKELY (cnt != 0))
+ mpn_rshift (scratch, mp + en, mn - en, cnt);
+ else
+ MPN_COPY (scratch, mp + en, mn - en);
+ en = mn - en;
+ scratch [0] |= 1;
+ en -= scratch [en - 1] == 0;
+ }
+ TMP_MARK;
+
+ lp = TMP_ALLOC_LIMBS (4 * mn + 6);
+ sp = lp + 2 * mn + 3;
+ en = mpn_lucm (sp, scratch, en, mp, mn, lp);
+ if (en != 0 && LIKELY (--b0 != 0))
+ {
+ mpn_sqr (lp, sp, en);
+ lp [0] |= 2; /* V^2 + 2 */
+ if (LIKELY (2 * en >= mn))
+ mpn_tdiv_qr (sp, lp, 0, lp, 2 * en, mp, mn);
+ else
+ MPN_ZERO (lp + 2 * en, mn - 2 * en);
+ if (! mpn_zero_p (lp, mn) && LIKELY (--b0 != 0))
+ b0 = mpn_llriter (lp, mp, mn, b0, lp + mn + 1);
+ }
+ TMP_FREE;
+ return (b0 != 0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sub.c b/vendor/gmp-6.3.0/mpn/generic/sub.c
new file mode 100644
index 0000000..df0afd6
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sub.c
@@ -0,0 +1,33 @@
+/* mpn_sub - subtract mpn from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_sub 1
+
+#include "gmp-impl.h"
diff --git a/vendor/gmp-6.3.0/mpn/generic/sub_1.c b/vendor/gmp-6.3.0/mpn/generic/sub_1.c
new file mode 100644
index 0000000..a20f191
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sub_1.c
@@ -0,0 +1,33 @@
+/* mpn_sub_1 - subtract limb from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_sub_1 1
+
+#include "gmp-impl.h"
diff --git a/vendor/gmp-6.3.0/mpn/generic/sub_err1_n.c b/vendor/gmp-6.3.0/mpn/generic/sub_err1_n.c
new file mode 100644
index 0000000..beca57e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sub_err1_n.c
@@ -0,0 +1,100 @@
+/* mpn_sub_err1_n -- sub_n with one error term
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+ return value is borrow out.
+
+ (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+ Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+ yp += n - 1;
+ el = eh = 0;
+
+ do
+ {
+ yl = *yp--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary sub_n */
+ SUBC_LIMB (cy1, sl, ul, vl);
+ SUBC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh:el) */
+ zl = (-cy) & yl;
+ el += zl;
+ eh += el < zl;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+ el &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el;
+ ep[1] = eh;
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sub_err2_n.c b/vendor/gmp-6.3.0/mpn/generic/sub_err2_n.c
new file mode 100644
index 0000000..1edf8d6
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sub_err2_n.c
@@ -0,0 +1,116 @@
+/* mpn_sub_err2_n -- sub_n with two error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+ return value is borrow out.
+
+ (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary sub_n */
+ SUBC_LIMB (cy1, sl, ul, vl);
+ SUBC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sub_err3_n.c b/vendor/gmp-6.3.0/mpn/generic/sub_err3_n.c
new file mode 100644
index 0000000..2db3c63
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sub_err3_n.c
@@ -0,0 +1,131 @@
+/* mpn_sub_err3_n -- sub_n with three error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+ return value is borrow out.
+
+ (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+ stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ yp3 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+ el3 = eh3 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ yl3 = *yp3--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary sub_n */
+ SUBC_LIMB (cy1, sl, ul, vl);
+ SUBC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+
+ /* update (eh3:el3) */
+ zl3 = (-cy) & yl3;
+ el3 += zl3;
+ eh3 += el3 < zl3;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+ eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+ el3 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+ ep[4] = el3;
+ ep[5] = eh3;
+
+ return cy;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/sub_n.c b/vendor/gmp-6.3.0/mpn/generic/sub_n.c
new file mode 100644
index 0000000..b192c96
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/sub_n.c
@@ -0,0 +1,89 @@
+/* mpn_sub_n -- Subtract equal length limb vectors.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++;
+ sl = ul - vl;
+ cy1 = sl > ul;
+ rl = sl - cy;
+ cy2 = rl > sl;
+ cy = cy1 | cy2;
+ *rp++ = rl;
+ }
+ while (--n != 0);
+
+ return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ mp_limb_t ul, vl, rl, cy;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++;
+ rl = ul - vl - cy;
+ cy = rl >> (GMP_LIMB_BITS - 1);
+ *rp++ = rl & GMP_NUMB_MASK;
+ }
+ while (--n != 0);
+
+ return cy;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/submul_1.c b/vendor/gmp-6.3.0/mpn/generic/submul_1.c
new file mode 100644
index 0000000..4744274
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/submul_1.c
@@ -0,0 +1,144 @@
+/* mpn_submul_1 -- multiply the N long limb vector pointed to by UP by VL,
+ subtract the N least significant limbs of the product from the limb
+ vector pointed to by RP. Return the most significant limb of the
+ product, adjusted for carry-out from the subtraction.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+ mp_limb_t u0, crec, c, p1, p0, r0;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+ crec = 0;
+ do
+ {
+ u0 = *up++;
+ umul_ppmm (p1, p0, u0, v0);
+
+ r0 = *rp;
+
+ p0 = r0 - p0;
+ c = r0 < p0;
+
+ p1 = p1 + c;
+
+ r0 = p0 - crec; /* cycle 0, 3, ... */
+ c = p0 < r0; /* cycle 1, 4, ... */
+
+ crec = p1 + c; /* cycle 2, 5, ... */
+
+ *rp++ = r0;
+ }
+ while (--n != 0);
+
+ return crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+ mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, cl, xl, c1, c2, c3;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT_MPN (rp, n);
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (v0);
+
+ shifted_v0 = v0 << GMP_NAIL_BITS;
+ cl = 0;
+ prev_p1 = 0;
+ do
+ {
+ u0 = *up++;
+ r0 = *rp;
+ umul_ppmm (p1, p0, u0, shifted_v0);
+ p0 >>= GMP_NAIL_BITS;
+ SUBC_LIMB (c1, xl, r0, prev_p1);
+ SUBC_LIMB (c2, xl, xl, p0);
+ SUBC_LIMB (c3, xl, xl, cl);
+ cl = c1 + c2 + c3;
+ *rp++ = xl;
+ prev_p1 = p1;
+ }
+ while (--n != 0);
+
+ return prev_p1 + cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+ mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, cl, xl;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT_MPN (rp, n);
+ ASSERT_MPN (up, n);
+ ASSERT_LIMB (v0);
+
+ shifted_v0 = v0 << GMP_NAIL_BITS;
+ cl = 0;
+ prev_p1 = 0;
+ do
+ {
+ u0 = *up++;
+ r0 = *rp;
+ umul_ppmm (p1, p0, u0, shifted_v0);
+ p0 >>= GMP_NAIL_BITS;
+ xw = r0 - (prev_p1 + p0) + cl;
+ cl = (mp_limb_signed_t) xw >> GMP_NUMB_BITS; /* FIXME: non-portable */
+ xl = xw & GMP_NUMB_MASK;
+ *rp++ = xl;
+ prev_p1 = p1;
+ }
+ while (--n != 0);
+
+ return prev_p1 - cl;
+}
+
+#endif
diff --git a/vendor/gmp-6.3.0/mpn/generic/tdiv_qr.c b/vendor/gmp-6.3.0/mpn/generic/tdiv_qr.c
new file mode 100644
index 0000000..92ff33c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/tdiv_qr.c
@@ -0,0 +1,386 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+ write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
+ qxn is non-zero, generate that many fraction limbs and append them after the
+ other quotient limbs, and update the remainder accordingly. The input
+ operands are unaffected.
+
+ Preconditions:
+ 1. The most significant limb of the divisor must be non-zero.
+ 2. nn >= dn, even if qxn is non-zero. (??? relax this ???)
+
+ The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+ complexity of multiplication.
+
+Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+ mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+ ASSERT_ALWAYS (qxn == 0);
+
+ ASSERT (nn >= 0);
+ ASSERT (dn >= 0);
+ ASSERT (dn == 0 || dp[dn - 1] != 0);
+ ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
+ ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
+
+ switch (dn)
+ {
+ case 0:
+ DIVIDE_BY_ZERO;
+
+ case 1:
+ {
+ rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+ return;
+ }
+
+ case 2:
+ {
+ mp_ptr n2p;
+ mp_limb_t qhl, cy;
+ TMP_DECL;
+ TMP_MARK;
+ if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
+ {
+ int cnt;
+ mp_limb_t d2p[2];
+ count_leading_zeros (cnt, dp[1]);
+ cnt -= GMP_NAIL_BITS;
+ d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
+ d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
+ n2p = TMP_ALLOC_LIMBS (nn + 1);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+ if (cy == 0)
+ qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
+ rp[0] = (n2p[0] >> cnt)
+ | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
+ rp[1] = (n2p[1] >> cnt);
+ }
+ else
+ {
+ n2p = TMP_ALLOC_LIMBS (nn);
+ MPN_COPY (n2p, np, nn);
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
+ qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
+ rp[0] = n2p[0];
+ rp[1] = n2p[1];
+ }
+ TMP_FREE;
+ return;
+ }
+
+ default:
+ {
+ int adjust;
+ gmp_pi1_t dinv;
+ TMP_DECL;
+ TMP_MARK;
+ adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
+ if (nn + adjust >= 2 * dn)
+ {
+ mp_ptr n2p, d2p;
+ mp_limb_t cy;
+ int cnt;
+
+ qp[nn - dn] = 0; /* zero high quotient limb */
+ if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
+ {
+ count_leading_zeros (cnt, dp[dn - 1]);
+ cnt -= GMP_NAIL_BITS;
+ d2p = TMP_ALLOC_LIMBS (dn);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = TMP_ALLOC_LIMBS (nn + 1);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ nn += adjust;
+ }
+ else
+ {
+ cnt = 0;
+ d2p = (mp_ptr) dp;
+ n2p = TMP_ALLOC_LIMBS (nn + 1);
+ MPN_COPY (n2p, np, nn);
+ n2p[nn] = 0;
+ nn += adjust;
+ }
+
+ invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
+ if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
+ mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
+ else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) || /* fast condition */
+ BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+ (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+ + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */
+ mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
+ else
+ {
+ mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
+ n2p = rp;
+ }
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ TMP_FREE;
+ return;
+ }
+
+ /* When we come here, the numerator/partial remainder is less
+ than twice the size of the denominator. */
+
+ {
+ /* Problem:
+
+ Divide a numerator N with nn limbs by a denominator D with dn
+ limbs forming a quotient of qn=nn-dn+1 limbs. When qn is small
+ compared to dn, conventional division algorithms perform poorly.
+ We want an algorithm that has an expected running time that is
+ dependent only on qn.
+
+ Algorithm (very informally stated):
+
+ 1) Divide the 2 x qn most significant limbs from the numerator
+ by the qn most significant limbs from the denominator. Call
+ the result qest. This is either the correct quotient, but
+ might be 1 or 2 too large. Compute the remainder from the
+ division. (This step is implemented by an mpn_divrem call.)
+
+ 2) Is the most significant limb from the remainder < p, where p
+ is the product of the most significant limb from the quotient
+ and the next(d)? (Next(d) denotes the next ignored limb from
+ the denominator.) If it is, decrement qest, and adjust the
+ remainder accordingly.
+
+ 3) Is the remainder >= qest? If it is, qest is the desired
+ quotient. The algorithm terminates.
+
+ 4) Subtract qest x next(d) from the remainder. If there is
+ borrow out, decrement qest, and adjust the remainder
+ accordingly.
+
+ 5) Skip one word from the denominator (i.e., let next(d) denote
+ the next less significant limb. */
+
+ mp_size_t qn;
+ mp_ptr n2p, d2p;
+ mp_ptr tp;
+ mp_limb_t cy;
+ mp_size_t in, rn;
+ mp_limb_t quotient_too_large;
+ unsigned int cnt;
+
+ qn = nn - dn;
+ qp[qn] = 0; /* zero high quotient limb */
+ qn += adjust; /* qn cannot become bigger */
+
+ if (qn == 0)
+ {
+ MPN_COPY (rp, np, dn);
+ TMP_FREE;
+ return;
+ }
+
+ in = dn - qn; /* (at least partially) ignored # of limbs in ops */
+ /* Normalize denominator by shifting it to the left such that its
+ most significant bit is set. Then shift the numerator the same
+ amount, to mathematically preserve quotient. */
+ if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
+ {
+ count_leading_zeros (cnt, dp[dn - 1]);
+ cnt -= GMP_NAIL_BITS;
+
+ d2p = TMP_ALLOC_LIMBS (qn);
+ mpn_lshift (d2p, dp + in, qn, cnt);
+ d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
+
+ n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+ cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+ if (adjust)
+ {
+ n2p[2 * qn] = cy;
+ n2p++;
+ }
+ else
+ {
+ n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
+ }
+ }
+ else
+ {
+ cnt = 0;
+ d2p = (mp_ptr) dp + in;
+
+ n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+ MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+ if (adjust)
+ {
+ n2p[2 * qn] = 0;
+ n2p++;
+ }
+ }
+
+ /* Get an approximate quotient using the extracted operands. */
+ if (qn == 1)
+ {
+ mp_limb_t q0, r0;
+ udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
+ n2p[0] = r0 >> GMP_NAIL_BITS;
+ qp[0] = q0;
+ }
+ else if (qn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
+ else
+ {
+ invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
+ if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+ mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
+ else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
+ mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
+ else
+ {
+ mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ mp_ptr r2p = rp;
+ if (np == r2p) /* If N and R share space, put ... */
+ r2p += nn - qn; /* intermediate remainder at N's upper end. */
+ mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
+ MPN_COPY (n2p, r2p, qn);
+ }
+ }
+
+ rn = qn;
+ /* Multiply the first ignored divisor limb by the most significant
+ quotient limb. If that product is > the partial remainder's
+ most significant limb, we know the quotient is too large. This
+ test quickly catches most cases where the quotient is too large;
+ it catches all cases where the quotient is 2 too large. */
+ {
+ mp_limb_t dl, x;
+ mp_limb_t h, dummy;
+
+ if (in - 2 < 0)
+ dl = 0;
+ else
+ dl = dp[in - 2];
+
+#if GMP_NAIL_BITS == 0
+ x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
+#else
+ x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
+ if (cnt != 0)
+ x |= dl >> (GMP_NUMB_BITS - cnt);
+#endif
+ umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
+
+ if (n2p[qn - 1] < h)
+ {
+ mp_limb_t cy;
+
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ cy = mpn_add_n (n2p, n2p, d2p, qn);
+ if (cy)
+ {
+ /* The partial remainder is safely large. */
+ n2p[qn] = cy;
+ ++rn;
+ }
+ }
+ }
+
+ quotient_too_large = 0;
+ if (cnt != 0)
+ {
+ mp_limb_t cy1, cy2;
+
+ /* Append partially used numerator limb to partial remainder. */
+ cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
+ n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
+
+ /* Update partial remainder with partially used divisor limb. */
+ cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
+ if (qn != rn)
+ {
+ ASSERT_ALWAYS (n2p[qn] >= cy2);
+ n2p[qn] -= cy2;
+ }
+ else
+ {
+ n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
+
+ quotient_too_large = (cy1 < cy2);
+ ++rn;
+ }
+ --in;
+ }
+ /* True: partial remainder now is neutral, i.e., it is not shifted up. */
+
+ tp = TMP_ALLOC_LIMBS (dn);
+
+ if (in < qn)
+ {
+ if (in == 0)
+ {
+ MPN_COPY (rp, n2p, rn);
+ ASSERT_ALWAYS (rn == dn);
+ goto foo;
+ }
+ mpn_mul (tp, qp, qn, dp, in);
+ }
+ else
+ mpn_mul (tp, dp, in, qp, qn);
+
+ cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+ MPN_COPY (rp + in, n2p, dn - in);
+ quotient_too_large |= cy;
+ cy = mpn_sub_n (rp, np, tp, in);
+ cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+ quotient_too_large |= cy;
+ foo:
+ if (quotient_too_large)
+ {
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ mpn_add_n (rp, rp, dp, dn);
+ }
+ }
+ TMP_FREE;
+ return;
+ }
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom22_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom22_mul.c
new file mode 100644
index 0000000..da56014
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom22_mul.c
@@ -0,0 +1,222 @@
+/* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn. Or more
+ accurately, bn <= an < 2bn.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+ <-s--><--n-->
+ ____ ______
+ |_a1_|___a0_|
+ |b1_|___b0_|
+ <-t-><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ vm1 = (a0- a1)*(b0- b1) # A(-1)*B(-1)
+ vinf= a1 * b1 # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_mul_toom22 1
+#else
+#define MAYBE_mul_toom22 \
+ (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws) \
+ do { \
+ if (! MAYBE_mul_toom22 \
+ || BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
+ mpn_mul_basecase (p, a, n, b, n); \
+ else \
+ mpn_toom22_mul (p, a, n, b, n, ws); \
+ } while (0)
+
+/* Normally, this calls mul_basecase or toom22_mul. But when when the fraction
+ MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
+ relative unbalance will become a larger and larger relative unbalance with
+ each recursion (the difference s-t will be invariant over recursive calls).
+ Therefore, we need to call toom32_mul. FIXME: Suppress depending on
+ MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD. */
+#define TOOM22_MUL_REC(p, a, an, b, bn, ws) \
+ do { \
+ if (! MAYBE_mul_toom22 \
+ || BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD)) \
+ mpn_mul_basecase (p, a, an, b, bn); \
+ else if (4 * an < 5 * bn) \
+ mpn_toom22_mul (p, a, an, b, bn, ws); \
+ else \
+ mpn_toom32_mul (p, a, an, b, bn, ws); \
+ } while (0)
+
+void
+mpn_toom22_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ const int __gmpn_cpuvec_initialized = 1;
+ mp_size_t n, s, t;
+ int vm1_neg;
+ mp_limb_t cy, cy2;
+ mp_ptr asm1;
+ mp_ptr bsm1;
+
+#define a0 ap
+#define a1 (ap + n)
+#define b0 bp
+#define b1 (bp + n)
+
+ s = an >> 1;
+ n = an - s;
+ t = bn - n;
+
+ ASSERT (an >= bn);
+
+ ASSERT (0 < s && s <= n && (n - s) == (an & 1));
+ ASSERT (0 < t && t <= s);
+
+ asm1 = pp;
+ bsm1 = pp + n;
+
+ vm1_neg = 0;
+
+ /* Compute asm1. */
+ if ((an & 1) == 0) /* s == n */
+ {
+ if (mpn_cmp (a0, a1, n) < 0)
+ {
+ mpn_sub_n (asm1, a1, a0, n);
+ vm1_neg = 1;
+ }
+ else
+ {
+ mpn_sub_n (asm1, a0, a1, n);
+ }
+ }
+ else /* n - s == 1 */
+ {
+ if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
+ {
+ mpn_sub_n (asm1, a1, a0, s);
+ asm1[s] = 0;
+ vm1_neg = 1;
+ }
+ else
+ {
+ asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
+ }
+ }
+
+ /* Compute bsm1. */
+ if (t == n)
+ {
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, n);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ mpn_sub_n (bsm1, b0, b1, n);
+ }
+ }
+ else
+ {
+ if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, t);
+ MPN_ZERO (bsm1 + t, n - t);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ mpn_sub (bsm1, b0, n, b1, t);
+ }
+ }
+
+#define v0 pp /* 2n */
+#define vinf (pp + 2 * n) /* s+t */
+#define vm1 scratch /* 2n */
+#define scratch_out scratch + 2 * n
+
+ /* vm1, 2n limbs */
+ TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+
+ if (s > t) TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
+ else TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
+
+ /* v0, 2n limbs */
+ TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
+
+ /* H(v0) + L(vinf) */
+ cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+ /* L(v0) + (H(v0) + L(vinf)) */
+ cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+ /* (H(v0) + L(vinf)) + H(vinf) */
+ cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
+
+ if (vm1_neg)
+ cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
+ else {
+ cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+ if (UNLIKELY (cy + 1 == 0)) { /* cy is negative */
+ /* The total contribution of v0+vinf-vm1 can not be negative. */
+#if WANT_ASSERT
+ /* The borrow in cy stops the propagation of the carry cy2, */
+ ASSERT (cy2 == 1);
+ cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
+ ASSERT (cy == 0);
+#else
+ /* we simply fill the area with zeros. */
+ MPN_FILL (pp + 2 * n, n, 0);
+ /* ASSERT (s + t == n || mpn_zero_p (pp + 3 * n, s + t - n)); */
+#endif
+ return;
+ }
+ }
+
+ ASSERT (cy <= 2);
+ ASSERT (cy2 <= 2);
+
+ MPN_INCR_U (pp + 2 * n, s + t, cy2);
+ /* if s+t==n, cy is zero, but we should not access pp[3*n] at all. */
+ MPN_INCR_U (pp + 3 * n, s + t - n, cy);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom2_sqr.c b/vendor/gmp-6.3.0/mpn/generic/toom2_sqr.c
new file mode 100644
index 0000000..db7a846
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom2_sqr.c
@@ -0,0 +1,155 @@
+/* mpn_toom2_sqr -- Square {ap,an}.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+ <-s--><--n-->
+ ____ ______
+ |_a1_|___a0_|
+
+ v0 = a0 ^2 # A(0)^2
+ vm1 = (a0- a1)^2 # A(-1)^2
+ vinf= a1 ^2 # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_sqr_toom2 1
+#else
+#define MAYBE_sqr_toom2 \
+ (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
+#endif
+
+#define TOOM2_SQR_REC(p, a, n, ws) \
+ do { \
+ if (! MAYBE_sqr_toom2 \
+ || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
+ mpn_sqr_basecase (p, a, n); \
+ else \
+ mpn_toom2_sqr (p, a, n, ws); \
+ } while (0)
+
+void
+mpn_toom2_sqr (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_ptr scratch)
+{
+ const int __gmpn_cpuvec_initialized = 1;
+ mp_size_t n, s;
+ mp_limb_t cy, cy2;
+ mp_ptr asm1;
+
+#define a0 ap
+#define a1 (ap + n)
+
+ s = an >> 1;
+ n = an - s;
+
+ ASSERT (0 < s && s <= n && (n - s) == (an & 1));
+
+ asm1 = pp;
+
+ /* Compute asm1. */
+ if ((an & 1) == 0) /* s == n */
+ {
+ if (mpn_cmp (a0, a1, n) < 0)
+ {
+ mpn_sub_n (asm1, a1, a0, n);
+ }
+ else
+ {
+ mpn_sub_n (asm1, a0, a1, n);
+ }
+ }
+ else /* n - s == 1 */
+ {
+ if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
+ {
+ mpn_sub_n (asm1, a1, a0, s);
+ asm1[s] = 0;
+ }
+ else
+ {
+ asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
+ }
+ }
+
+#define v0 pp /* 2n */
+#define vinf (pp + 2 * n) /* s+s */
+#define vm1 scratch /* 2n */
+#define scratch_out scratch + 2 * n
+
+ /* vm1, 2n limbs */
+ TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
+
+ /* vinf, s+s limbs */
+ TOOM2_SQR_REC (vinf, a1, s, scratch_out);
+
+ /* v0, 2n limbs */
+ TOOM2_SQR_REC (v0, ap, n, scratch_out);
+
+ /* H(v0) + L(vinf) */
+ cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+ /* L(v0) + H(v0) */
+ cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+ /* L(vinf) + H(vinf) */
+ cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
+
+ cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+
+ ASSERT (cy + 1 <= 3);
+ ASSERT (cy2 <= 2);
+
+ if (LIKELY (cy <= 2)) {
+ MPN_INCR_U (pp + 2 * n, s + s, cy2);
+ MPN_INCR_U (pp + 3 * n, s + s - n, cy);
+ } else { /* cy is negative */
+ /* The total contribution of v0+vinf-vm1 can not be negative. */
+#if WANT_ASSERT
+ /* The borrow in cy stops the propagation of the carry cy2, */
+ ASSERT (cy2 == 1);
+ cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
+ ASSERT (cy == 0);
+#else
+ /* we simply fill the area with zeros. */
+ MPN_FILL (pp + 2 * n, n, 0);
+#endif
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom32_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom32_mul.c
new file mode 100644
index 0000000..1139d17
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom32_mul.c
@@ -0,0 +1,320 @@
+/* mpn_toom32_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 1.5
+ times as large as bn. Or more accurately, bn < an < 3bn.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Improvements by Marco Bodrato and Niels Möller.
+
+ The idea of applying Toom to unbalanced multiplication is due to Marco
+ Bodrato and Alberto Zanoni.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2020, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +inf
+
+ <-s-><--n--><--n-->
+ ___ ______ ______
+ |a2_|___a1_|___a0_|
+ |_b1_|___b0_|
+ <-t--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = (a0+ a1+ a2)*(b0+ b1) # A(1)*B(1) ah <= 2 bh <= 1
+ vm1 = (a0- a1+ a2)*(b0- b1) # A(-1)*B(-1) |ah| <= 1 bh = 0
+ vinf= a2 * b1 # A(inf)*B(inf)
+*/
+
+#define TOOM32_MUL_N_REC(p, a, b, n, ws) \
+ do { \
+ mpn_mul_n (p, a, b, n); \
+ } while (0)
+
+void
+mpn_toom32_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ int vm1_neg;
+ mp_limb_t cy;
+ mp_limb_signed_t hi;
+ mp_limb_t ap1_hi, bp1_hi;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2 * n)
+#define b0 bp
+#define b1 (bp + n)
+
+ /* Required, to ensure that s + t >= n. */
+ ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
+
+ n = 2 * an >= 3 * bn ? (an + 2) / (size_t) 3 : (bn + 1) >> 1;
+
+ s = an - 2 * n;
+ t = bn - n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ ASSERT (s + t >= n);
+
+ /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
+#define ap1 (pp) /* n, most significant limb in ap1_hi */
+#define bp1 (pp + n) /* n, most significant bit in bp1_hi */
+#define am1 (pp + 2*n) /* n, most significant bit in hi */
+#define bm1 (pp + 3*n) /* n */
+#define v1 (scratch) /* 2n + 1 */
+#define vm1 (pp) /* 2n + 1 */
+#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
+
+ /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
+
+ /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
+
+ /* Compute ap1 = a0 + a1 + a2, am1 = a0 - a1 + a2 */
+ ap1_hi = mpn_add (ap1, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+ {
+ ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
+ hi = 0;
+ vm1_neg = 1;
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
+ hi = ap1_hi - (cy & 1);
+ ap1_hi += (cy >> 1);
+ vm1_neg = 0;
+ }
+#else
+ if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
+ hi = 0;
+ vm1_neg = 1;
+ }
+ else
+ {
+ hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
+ vm1_neg = 0;
+ }
+ ap1_hi += mpn_add_n (ap1, ap1, a1, n);
+#endif
+
+ /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
+ if (t == n)
+ {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
+ }
+ bp1_hi = cy >> 1;
+#else
+ bp1_hi = mpn_add_n (bp1, b0, b1, n);
+
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, n));
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bm1, b0, b1, n));
+ }
+#endif
+ }
+ else
+ {
+ /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
+ bp1_hi = mpn_add (bp1, b0, n, b1, t);
+
+ if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, t));
+ MPN_ZERO (bm1 + t, n - t);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub (bm1, b0, n, b1, t));
+ }
+ }
+
+ TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
+ if (ap1_hi == 1)
+ {
+ cy = mpn_add_n (v1 + n, v1 + n, bp1, n);
+ }
+ else if (ap1_hi > 1) /* ap1_hi == 2 */
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy = mpn_addlsh1_n_ip1 (v1 + n, bp1, n);
+#else
+ cy = mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+#endif
+ }
+ else
+ cy = 0;
+ if (bp1_hi != 0)
+ cy += ap1_hi + mpn_add_n (v1 + n, v1 + n, ap1, n);
+ v1[2 * n] = cy;
+
+ TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out);
+ if (hi)
+ hi = mpn_add_n (vm1+n, vm1+n, bm1, n);
+
+ vm1[2*n] = hi;
+
+ /* v1 <-- (v1 + vm1) / 2 = x0 + x2 */
+ if (vm1_neg)
+ {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (v1, v1, vm1, 2*n+1);
+#else
+ mpn_sub_n (v1, v1, vm1, 2*n+1);
+ ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (v1, v1, vm1, 2*n+1);
+#else
+ mpn_add_n (v1, v1, vm1, 2*n+1);
+ ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+ }
+
+ /* We get x1 + x3 = (x0 + x2) - (x0 - x1 + x2 - x3), and hence
+
+ y = x1 + x3 + (x0 + x2) * B
+ = (x0 + x2) * B + (x0 + x2) - vm1.
+
+ y is 3*n + 1 limbs, y = y0 + y1 B + y2 B^2. We store them as
+ follows: y0 at scratch, y1 at pp + 2*n, and y2 at scratch + n
+ (already in place, except for carry propagation).
+
+ We thus add
+
+ B^3 B^2 B 1
+ | | | |
+ +-----+----+
+ + | x0 + x2 |
+ +----+-----+----+
+ + | x0 + x2 |
+ +----------+
+ - | vm1 |
+ --+----++----+----+-
+ | y2 | y1 | y0 |
+ +-----+----+----+
+
+ Since we store y0 at the same location as the low half of x0 + x2, we
+ need to do the middle sum first. */
+
+ hi = vm1[2*n];
+ cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
+ MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
+
+ /* FIXME: Can we get rid of this second vm1_neg conditional by
+ swapping the location of +1 and -1 values? */
+ if (vm1_neg)
+ {
+ cy = mpn_add_n (v1, v1, vm1, n);
+ hi += mpn_add_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+ MPN_INCR_U (v1 + n, n+1, hi);
+ }
+ else
+ {
+ cy = mpn_sub_n (v1, v1, vm1, n);
+ hi += mpn_sub_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+ MPN_DECR_U (v1 + n, n+1, hi);
+ }
+
+ TOOM32_MUL_N_REC (pp, a0, b0, n, scratch_out);
+ /* vinf, s+t limbs. Use mpn_mul for now, to handle unbalanced operands */
+ if (s > t) mpn_mul (pp+3*n, a2, s, b1, t);
+ else mpn_mul (pp+3*n, b1, t, a2, s);
+
+ /* Remaining interpolation.
+
+ y * B + x0 + x3 B^3 - x0 B^2 - x3 B
+ = (x1 + x3) B + (x0 + x2) B^2 + x0 + x3 B^3 - x0 B^2 - x3 B
+ = y0 B + y1 B^2 + y3 B^3 + Lx0 + H x0 B
+ + L x3 B^3 + H x3 B^4 - Lx0 B^2 - H x0 B^3 - L x3 B - H x3 B^2
+ = L x0 + (y0 + H x0 - L x3) B + (y1 - L x0 - H x3) B^2
+ + (y2 - (H x0 - L x3)) B^3 + H x3 B^4
+
+ B^4 B^3 B^2 B 1
+ | | | | | |
+ +-------+ +---------+---------+
+ | Hx3 | | Hx0-Lx3 | Lx0 |
+ +------+----------+---------+---------+---------+
+ | y2 | y1 | y0 |
+ ++---------+---------+---------+
+ -| Hx0-Lx3 | - Lx0 |
+ +---------+---------+
+ | - Hx3 |
+ +--------+
+
+ We must take into account the carry from Hx0 - Lx3.
+ */
+
+ cy = mpn_sub_n (pp + n, pp + n, pp+3*n, n);
+ hi = scratch[2*n] + cy;
+
+ cy = mpn_sub_nc (pp + 2*n, pp + 2*n, pp, n, cy);
+ hi -= mpn_sub_nc (pp + 3*n, scratch + n, pp + n, n, cy);
+
+ hi += mpn_add (pp + n, pp + n, 3*n, scratch, n);
+
+ /* FIXME: Is support for s + t == n needed? */
+ if (LIKELY (s + t > n))
+ {
+ hi -= mpn_sub (pp + 2*n, pp + 2*n, 2*n, pp + 4*n, s+t-n);
+
+ ASSERT (hi >= 0); /* contribution of the middle terms >= 0 */
+ MPN_INCR_U (pp + 4*n, s+t-n, hi);
+ }
+ else
+ ASSERT (hi == 0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom33_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom33_mul.c
new file mode 100644
index 0000000..54f055f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom33_mul.c
@@ -0,0 +1,316 @@
+/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
+ size. Or more accurately, bn <= an < (3/2)bn.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Additional improvements by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+ <-s--><--n--><--n-->
+ ____ ______ ______
+ |_a2_|___a1_|___a0_|
+ |b2_|___b1_|___b0_|
+ <-t-><--n--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = (a0+ a1+ a2)*(b0+ b1+ b2) # A(1)*B(1) ah <= 2 bh <= 2
+ vm1 = (a0- a1+ a2)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 bh <= 1
+ v2 = (a0+2a1+4a2)*(b0+2b1+4b2) # A(2)*B(2) ah <= 6 bh <= 6
+ vinf= a2 * b2 # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom33 1
+#else
+#define MAYBE_mul_basecase \
+ (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom33 \
+ (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
+#endif
+
+/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
+ multiplication at the infinity point. We may have
+ MAYBE_mul_basecase == 0, and still get s just below
+ MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
+ s == 1 and mpn_toom22_mul will crash.
+*/
+
+#define TOOM33_MUL_N_REC(p, a, b, n, ws) \
+ do { \
+ if (MAYBE_mul_basecase \
+ && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
+ mpn_mul_basecase (p, a, n, b, n); \
+ else if (! MAYBE_mul_toom33 \
+ || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
+ mpn_toom22_mul (p, a, n, b, n, ws); \
+ else \
+ mpn_toom33_mul (p, a, n, b, n, ws); \
+ } while (0)
+
+void
+mpn_toom33_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ const int __gmpn_cpuvec_initialized = 1;
+ mp_size_t n, s, t;
+ int vm1_neg;
+ mp_limb_t cy, vinf0;
+ mp_ptr gp;
+ mp_ptr as1, asm1, as2;
+ mp_ptr bs1, bsm1, bs2;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+#define b0 bp
+#define b1 (bp + n)
+#define b2 (bp + 2*n)
+
+ n = (an + 2) / (size_t) 3;
+
+ s = an - 2 * n;
+ t = bn - 2 * n;
+
+ ASSERT (an >= bn);
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+
+ as1 = scratch + 4 * n + 4;
+ asm1 = scratch + 2 * n + 2;
+ as2 = pp + n + 1;
+
+ bs1 = pp;
+ bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
+ bs2 = pp + 2 * n + 2;
+
+ gp = scratch;
+
+ vm1_neg = 0;
+
+ /* Compute as1 and asm1. */
+ cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+ as1[n] = cy >> 1;
+ asm1[n] = 0;
+ vm1_neg = 1;
+ }
+ else
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+ as1[n] = cy + (cy2 >> 1);
+ asm1[n] = cy - (cy2 & 1);
+ }
+#else
+ as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+ if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+ {
+ mpn_sub_n (asm1, a1, gp, n);
+ asm1[n] = 0;
+ vm1_neg = 1;
+ }
+ else
+ {
+ cy -= mpn_sub_n (asm1, gp, a1, n);
+ asm1[n] = cy;
+ }
+#endif
+
+ /* Compute as2. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+ cy = mpn_add_n (as2, a2, as1, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+ cy += as1[n];
+ cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (as2, a1, a2, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+ cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+ cy = mpn_add_n (as2, a2, as1, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+ cy += as1[n];
+ cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+ cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+ as2[n] = cy;
+
+ /* Compute bs1 and bsm1. */
+ cy = mpn_add (gp, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
+ bs1[n] = cy >> 1;
+ bsm1[n] = 0;
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
+ bs1[n] = cy + (cy2 >> 1);
+ bsm1[n] = cy - (cy2 & 1);
+ }
+#else
+ bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
+ if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, gp, n);
+ bsm1[n] = 0;
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ cy -= mpn_sub_n (bsm1, gp, b1, n);
+ bsm1[n] = cy;
+ }
+#endif
+
+ /* Compute bs2. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+ cy = mpn_add_n (bs2, b2, bs1, t);
+ if (t != n)
+ cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+ cy += bs1[n];
+ cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (bs2, b1, b2, t);
+ if (t != n)
+ cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
+ cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
+#else
+ cy = mpn_add_n (bs2, bs1, b2, t);
+ if (t != n)
+ cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+ cy += bs1[n];
+ cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+ cy -= mpn_sub_n (bs2, bs2, b0, n);
+#endif
+#endif
+ bs2[n] = cy;
+
+ ASSERT (as1[n] <= 2);
+ ASSERT (bs1[n] <= 2);
+ ASSERT (asm1[n] <= 1);
+ ASSERT (bsm1[n] <= 1);
+ ASSERT (as2[n] <= 6);
+ ASSERT (bs2[n] <= 6);
+
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 4 * n) /* s+t */
+#define vm1 scratch /* 2n+1 */
+#define v2 (scratch + 2 * n + 1) /* 2n+2 */
+#define scratch_out (scratch + 5 * n + 5)
+
+ /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+ TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+ cy = 0;
+ if (asm1[n] != 0)
+ cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+ if (bsm1[n] != 0)
+ cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+ vm1[2 * n] = cy;
+#else
+ vm1[2 * n] = 0;
+ TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] | asm1[n]), scratch_out);
+#endif
+
+ TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
+
+ /* vinf, s+t limbs */
+ if (s > t) mpn_mul (vinf, a2, s, b2, t);
+ else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
+
+ vinf0 = vinf[0]; /* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+ /* v1, 2n+1 limbs */
+ TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+ if (as1[n] == 1)
+ {
+ cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+ }
+ else if (as1[n] != 0)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+ cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+ }
+ else
+ cy = 0;
+ if (bs1[n] == 1)
+ {
+ cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+ }
+ else if (bs1[n] != 0)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+ cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+ }
+ v1[2 * n] = cy;
+#else
+ cy = vinf[1];
+ TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
+ vinf[1] = cy;
+#endif
+
+ TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
+
+ mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom3_sqr.c b/vendor/gmp-6.3.0/mpn/generic/toom3_sqr.c
new file mode 100644
index 0000000..297a27f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom3_sqr.c
@@ -0,0 +1,221 @@
+/* mpn_toom3_sqr -- Square {ap,an}.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Additional improvements by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2015, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+ <-s--><--n--><--n-->
+ ____ ______ ______
+ |_a2_|___a1_|___a0_|
+
+ v0 = a0 ^2 # A(0)^2
+ v1 = (a0+ a1+ a2)^2 # A(1)^2 ah <= 2
+ vm1 = (a0- a1+ a2)^2 # A(-1)^2 |ah| <= 1
+ v2 = (a0+2a1+4a2)^2 # A(2)^2 ah <= 6
+ vinf= a2 ^2 # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom3 1
+#else
+#define MAYBE_sqr_basecase \
+ (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom3 \
+ (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
+#endif
+
+#define TOOM3_SQR_REC(p, a, n, ws) \
+ do { \
+ if (MAYBE_sqr_basecase \
+ && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
+ mpn_sqr_basecase (p, a, n); \
+ else if (! MAYBE_sqr_toom3 \
+ || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) \
+ mpn_toom2_sqr (p, a, n, ws); \
+ else \
+ mpn_toom3_sqr (p, a, n, ws); \
+ } while (0)
+
+void
+mpn_toom3_sqr (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_ptr scratch)
+{
+ const int __gmpn_cpuvec_initialized = 1;
+ mp_size_t n, s;
+ mp_limb_t cy, vinf0;
+ mp_ptr gp;
+ mp_ptr as1, asm1, as2;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+
+ n = (an + 2) / (size_t) 3;
+
+ s = an - 2 * n;
+
+ ASSERT (0 < s && s <= n);
+
+ as1 = scratch + 4 * n + 4;
+ asm1 = scratch + 2 * n + 2;
+ as2 = pp + n + 1;
+
+ gp = scratch;
+
+ /* Compute as1 and asm1. */
+ cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+ as1[n] = cy >> 1;
+ asm1[n] = 0;
+ }
+ else
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+ as1[n] = cy + (cy2 >> 1);
+ asm1[n] = cy - (cy2 & 1);
+ }
+#else
+ as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+ if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+ {
+ mpn_sub_n (asm1, a1, gp, n);
+ asm1[n] = 0;
+ }
+ else
+ {
+ cy -= mpn_sub_n (asm1, gp, a1, n);
+ asm1[n] = cy;
+ }
+#endif
+
+ /* Compute as2. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+ cy = mpn_add_n (as2, a2, as1, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+ cy += as1[n];
+ cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (as2, a1, a2, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+ cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+ cy = mpn_add_n (as2, a2, as1, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+ cy += as1[n];
+ cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+ cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+ as2[n] = cy;
+
+ ASSERT (as1[n] <= 2);
+ ASSERT (asm1[n] <= 1);
+
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 4 * n) /* s+s */
+#define vm1 scratch /* 2n+1 */
+#define v2 (scratch + 2 * n + 1) /* 2n+2 */
+#define scratch_out (scratch + 5 * n + 5)
+
+ /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+ TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
+ cy = asm1[n];
+ if (cy != 0)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy += mpn_addlsh1_n_ip1 (vm1 + n, asm1, n);
+#else
+ cy += mpn_addmul_1 (vm1 + n, asm1, n, CNST_LIMB(2));
+#endif
+ }
+ vm1[2 * n] = cy;
+#else
+ vm1[2 * n] = 0;
+ TOOM3_SQR_REC (vm1, asm1, n + asm1[n], scratch_out);
+#endif
+
+ TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */
+
+ TOOM3_SQR_REC (vinf, a2, s, scratch_out); /* vinf, s+s limbs */
+
+ vinf0 = vinf[0]; /* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+ /* v1, 2n+1 limbs */
+ TOOM3_SQR_REC (v1, as1, n, scratch_out);
+ cy = as1[n];
+ if (cy == 1)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+ cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+ }
+ else if (cy != 0)
+ {
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+ cy = 4 + mpn_addlsh2_n_ip1 (v1 + n, as1, n);
+#else
+ cy = 4 + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(4));
+#endif
+ }
+ v1[2 * n] = cy;
+#else
+ cy = vinf[1];
+ TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
+ vinf[1] = cy;
+#endif
+
+ TOOM3_SQR_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */
+
+ mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom42_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom42_mul.c
new file mode 100644
index 0000000..e84ce65
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom42_mul.c
@@ -0,0 +1,234 @@
+/* mpn_toom42_mul -- Multiply {ap,an} and {bp,bn} where an is nominally twice
+ as large as bn. Or more accurately, (3/2)bn < an < 4bn.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Additional improvements by Marco Bodrato.
+
+ The idea of applying toom to unbalanced multiplication is due to Marco
+ Bodrato and Alberto Zanoni.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+ <-s-><--n--><--n--><--n-->
+ ___ ______ ______ ______
+ |a3_|___a2_|___a1_|___a0_|
+ |_b1_|___b0_|
+ <-t--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = (a0+ a1+ a2+ a3)*(b0+ b1) # A(1)*B(1) ah <= 3 bh <= 1
+ vm1 = (a0- a1+ a2- a3)*(b0- b1) # A(-1)*B(-1) |ah| <= 1 bh = 0
+ v2 = (a0+2a1+4a2+8a3)*(b0+2b1) # A(2)*B(2) ah <= 14 bh <= 2
+ vinf= a3 * b1 # A(inf)*B(inf)
+*/
+
+#define TOOM42_MUL_N_REC(p, a, b, n, ws) \
+ do { \
+ mpn_mul_n (p, a, b, n); \
+ } while (0)
+
+void
+mpn_toom42_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ int vm1_neg;
+ mp_limb_t cy, vinf0;
+ mp_ptr a0_a2;
+ mp_ptr as1, asm1, as2;
+ mp_ptr bs1, bsm1, bs2;
+ mp_ptr tmp;
+ TMP_DECL;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+#define a3 (ap + 3*n)
+#define b0 bp
+#define b1 (bp + n)
+
+ n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
+
+ s = an - 3 * n;
+ t = bn - n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+
+ TMP_MARK;
+
+ tmp = TMP_ALLOC_LIMBS (6 * n + 5);
+ as1 = tmp; tmp += n + 1;
+ asm1 = tmp; tmp += n + 1;
+ as2 = tmp; tmp += n + 1;
+ bs1 = tmp; tmp += n + 1;
+ bsm1 = tmp; tmp += n;
+ bs2 = tmp; tmp += n + 1;
+
+ a0_a2 = pp;
+
+ /* Compute as1 and asm1. */
+ vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
+
+ /* Compute as2. */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (as2, a2, a3, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+ cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+ cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+ cy = mpn_lshift (as2, a3, s, 1);
+ cy += mpn_add_n (as2, a2, as2, s);
+ if (s != n)
+ cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+ cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+ cy += mpn_add_n (as2, a1, as2, n);
+ cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+ cy += mpn_add_n (as2, a0, as2, n);
+#endif
+ as2[n] = cy;
+
+ /* Compute bs1 and bsm1. */
+ if (t == n)
+ {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+ }
+ bs1[n] = cy >> 1;
+#else
+ bs1[n] = mpn_add_n (bs1, b0, b1, n);
+
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, n);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ mpn_sub_n (bsm1, b0, b1, n);
+ }
+#endif
+ }
+ else
+ {
+ bs1[n] = mpn_add (bs1, b0, n, b1, t);
+
+ if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, t);
+ MPN_ZERO (bsm1 + t, n - t);
+ vm1_neg ^= 1;
+ }
+ else
+ {
+ mpn_sub (bsm1, b0, n, b1, t);
+ }
+ }
+
+ /* Compute bs2, recycling bs1. bs2=bs1+b1 */
+ mpn_add (bs2, bs1, n + 1, b1, t);
+
+ ASSERT (as1[n] <= 3);
+ ASSERT (bs1[n] <= 1);
+ ASSERT (asm1[n] <= 1);
+/*ASSERT (bsm1[n] == 0);*/
+ ASSERT (as2[n] <= 14);
+ ASSERT (bs2[n] <= 2);
+
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 4 * n) /* s+t */
+#define vm1 scratch /* 2n+1 */
+#define v2 (scratch + 2 * n + 1) /* 2n+2 */
+#define scratch_out scratch + 4 * n + 4 /* Currently unused. */
+
+ /* vm1, 2n+1 limbs */
+ TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+ cy = 0;
+ if (asm1[n] != 0)
+ cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+ vm1[2 * n] = cy;
+
+ TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
+
+ /* vinf, s+t limbs */
+ if (s > t) mpn_mul (vinf, a3, s, b1, t);
+ else mpn_mul (vinf, b1, t, a3, s);
+
+ vinf0 = vinf[0]; /* v1 overlaps with this */
+
+ /* v1, 2n+1 limbs */
+ TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+ if (as1[n] == 1)
+ {
+ cy = mpn_add_n (v1 + n, v1 + n, bs1, n);
+ }
+ else if (as1[n] == 2)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy = mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+ cy = mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+ }
+ else if (as1[n] == 3)
+ {
+ cy = mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3));
+ }
+ else
+ cy = 0;
+ if (bs1[n] != 0)
+ cy += as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
+ v1[2 * n] = cy;
+
+ TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
+
+ mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom42_mulmid.c b/vendor/gmp-6.3.0/mpn/generic/toom42_mulmid.c
new file mode 100644
index 0000000..f581b10
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom42_mulmid.c
@@ -0,0 +1,237 @@
+/* mpn_toom42_mulmid -- toom42 middle product
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+
+/*
+ Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
+
+ Neither ap nor bp may overlap rp.
+
+ Must have n >= 4.
+
+ Amount of scratch space required is given by mpn_toom42_mulmid_itch().
+
+ FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
+ requirements should be clarified.
+*/
+void
+mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+ mp_ptr scratch)
+{
+ mp_limb_t cy, e[12], zh, zl;
+ mp_size_t m;
+ int neg;
+
+ ASSERT (n >= 4);
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+ ap += n & 1; /* handle odd row and diagonal later */
+ m = n / 2;
+
+ /* (e0h:e0l) etc are correction terms, in 2's complement */
+#define e0l (e[0])
+#define e0h (e[1])
+#define e1l (e[2])
+#define e1h (e[3])
+#define e2l (e[4])
+#define e2h (e[5])
+#define e3l (e[6])
+#define e3h (e[7])
+#define e4l (e[8])
+#define e4h (e[9])
+#define e5l (e[10])
+#define e5h (e[11])
+
+#define s (scratch + 2)
+#define t (rp + m + 2)
+#define p0 rp
+#define p1 scratch
+#define p2 (rp + m)
+#define next_scratch (scratch + 3*m + 1)
+
+ /*
+ rp scratch
+ |---------|-----------| |---------|---------|----------|
+ 0 m 2m+2 0 m 2m 3m+1
+ <----p2----> <-------------s------------->
+ <----p0----><---t----> <----p1---->
+ */
+
+ /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
+ cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
+ cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
+ bp + m, bp, m, cy);
+ mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
+
+ /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
+ if (mpn_cmp (bp + m, bp, m) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
+ ap + m - 1, ap + 2*m - 1, m, 0));
+ neg = 1;
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
+ ap + m - 1, ap + 2*m - 1, m, 0));
+ neg = 0;
+ }
+
+ /* recursive middle products. The picture is:
+
+ b[2m-1] A A A B B B - - - - -
+ ... - A A A B B B - - - -
+ b[m] - - A A A B B B - - -
+ b[m-1] - - - C C C D D D - -
+ ... - - - - C C C D D D -
+ b[0] - - - - - C C C D D D
+ a[0] ... a[m] ... a[2m] ... a[4m-2]
+ */
+
+ if (m < MULMID_TOOM42_THRESHOLD)
+ {
+ /* A + B */
+ mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
+ /* accumulate high limbs of p0 into e1 */
+ ADDC_LIMB (cy, e1l, e1l, p0[m]);
+ e1h += p0[m + 1] + cy;
+ /* (-1)^neg * (B - C) (overwrites first m limbs of s) */
+ mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
+ /* C + D (overwrites t) */
+ mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
+ }
+ else
+ {
+ /* as above, but use toom42 instead */
+ mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
+ ADDC_LIMB (cy, e1l, e1l, p0[m]);
+ e1h += p0[m + 1] + cy;
+ mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
+ mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
+ }
+
+ /* apply error terms */
+
+ /* -e0 at rp[0] */
+ SUBC_LIMB (cy, rp[0], rp[0], e0l);
+ SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
+ if (UNLIKELY (cy))
+ {
+ cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
+ SUBC_LIMB (cy, e1l, e1l, cy);
+ e1h -= cy;
+ }
+
+ /* z = e1 - e2 + high(p0) */
+ SUBC_LIMB (cy, zl, e1l, e2l);
+ zh = e1h - e2h - cy;
+
+ /* z at rp[m] */
+ ADDC_LIMB (cy, rp[m], rp[m], zl);
+ zh = (zh + cy) & GMP_NUMB_MASK;
+ ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
+ cy -= (zh >> (GMP_NUMB_BITS - 1));
+ if (UNLIKELY (cy))
+ {
+ if (cy == 1)
+ mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
+ else /* cy == -1 */
+ mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
+ }
+
+ /* e3 at rp[2*m] */
+ ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
+ rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
+
+ /* e4 at p1[0] */
+ ADDC_LIMB (cy, p1[0], p1[0], e4l);
+ ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
+ if (UNLIKELY (cy))
+ mpn_add_1 (p1 + 2, p1 + 2, m, 1);
+
+ /* -e5 at p1[m] */
+ SUBC_LIMB (cy, p1[m], p1[m], e5l);
+ p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
+
+ /* adjustment if p1 ends up negative */
+ cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
+
+ /* add (-1)^neg * (p1 - B^m * p1) to output */
+ if (neg)
+ {
+ mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
+ mpn_add (rp, rp, 2*m + 2, p1, m + 2); /* A + C */
+ mpn_sub_n (rp + m, rp + m, p1, m + 2); /* B + D */
+ }
+ else
+ {
+ mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
+ mpn_sub (rp, rp, 2*m + 2, p1, m + 2); /* A + C */
+ mpn_add_n (rp + m, rp + m, p1, m + 2); /* B + D */
+ }
+
+ /* odd row and diagonal */
+ if (n & 1)
+ {
+ /*
+ Products marked E are already done. We need to do products marked O.
+
+ OOOOO----
+ -EEEEO---
+ --EEEEO--
+ ---EEEEO-
+ ----EEEEO
+ */
+
+ /* first row of O's */
+ cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
+ ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
+
+ /* O's on diagonal */
+ /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
+ that can handle the sum below. Currently we're relying on
+ mulmid_basecase being pretty fast for a diagonal sum like this,
+ which is true at least for the K8 asm version, but surely false
+ for the generic version. */
+ mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
+ mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom43_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom43_mul.c
new file mode 100644
index 0000000..34acd25
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom43_mul.c
@@ -0,0 +1,238 @@
+/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+ times as large as bn. Or more accurately, bn < an < 2 bn.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ The idea of applying toom to unbalanced multiplication is due to Marco
+ Bodrato and Alberto Zanoni.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+ <-s-><--n--><--n--><--n-->
+ ___ ______ ______ ______
+ |a3_|___a2_|___a1_|___a0_|
+ |_b2_|___b1_|___b0_|
+ <-t--><--n--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) # A(1)*B(1) ah <= 3 bh <= 2
+ vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 |bh|<= 1
+ v2 = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) # A(2)*B(2) ah <= 14 bh <= 6
+ vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) # A(-2)*B(-2) |ah| <= 9 |bh|<= 4
+ vinf= a3 * b2 # A(inf)*B(inf)
+*/
+
+void
+mpn_toom43_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ enum toom6_flags flags;
+ mp_limb_t cy;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2 * n)
+#define a3 (ap + 3 * n)
+#define b0 bp
+#define b1 (bp + n)
+#define b2 (bp + 2 * n)
+
+ n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
+
+ s = an - 3 * n;
+ t = bn - 2 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+
+ /* This is true whenever an >= 25 or bn >= 19, I think. It
+ guarantees that we can fit 5 values of size n+1 in the product
+ area. */
+ ASSERT (s+t >= 5);
+
+#define v0 pp /* 2n */
+#define vm1 (scratch) /* 2n+1 */
+#define v1 (pp + 2*n) /* 2n+1 */
+#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
+#define v2 (scratch + 4 * n + 2) /* 2n+1 */
+#define vinf (pp + 5 * n) /* s+t */
+#define bs1 pp /* n+1 */
+#define bsm1 (scratch + 2 * n + 2) /* n+1 */
+#define asm1 (scratch + 3 * n + 3) /* n+1 */
+#define asm2 (scratch + 4 * n + 4) /* n+1 */
+#define bsm2 (pp + n + 1) /* n+1 */
+#define bs2 (pp + 2 * n + 2) /* n+1 */
+#define as2 (pp + 3 * n + 3) /* n+1 */
+#define as1 (pp + 4 * n + 4) /* n+1 */
+
+ /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
+ limb, because products will overwrite 2n+2 limbs. */
+
+#define a0a2 scratch
+#define b0b2 scratch
+#define a1a3 asm1
+#define b1d bsm1
+
+ /* Compute as2 and asm2. */
+ flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
+
+ /* Compute bs2 and bsm2. */
+ b1d[n] = mpn_lshift (b1d, b1, n, 1); /* 2b1 */
+#if HAVE_NATIVE_mpn_addlsh2_n
+ cy = mpn_addlsh2_n (b0b2, b0, b2, t); /* 4b2 + b0 */
+#else
+ cy = mpn_lshift (b0b2, b2, t, 2); /* 4b2 */
+ cy += mpn_add_n (b0b2, b0b2, b0, t); /* 4b2 + b0 */
+#endif
+ if (t != n)
+ cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
+ b0b2[n] = cy;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (mpn_cmp (b0b2, b1d, n+1) < 0)
+ {
+ mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+ }
+ else
+ {
+ mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
+ }
+#else
+ mpn_add_n (bs2, b0b2, b1d, n+1);
+ if (mpn_cmp (b0b2, b1d, n+1) < 0)
+ {
+ mpn_sub_n (bsm2, b1d, b0b2, n+1);
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+ }
+ else
+ {
+ mpn_sub_n (bsm2, b0b2, b1d, n+1);
+ }
+#endif
+
+ /* Compute as1 and asm1. */
+ flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2)));
+
+ /* Compute bs1 and bsm1. */
+ bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
+ bs1[n] = cy >> 1;
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
+ bs1[n] = bsm1[n] + (cy >> 1);
+ bsm1[n]-= cy & 1;
+ }
+#else
+ bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
+ if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, bsm1, n);
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+ }
+ else
+ {
+ bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
+ }
+#endif
+
+ ASSERT (as1[n] <= 3);
+ ASSERT (bs1[n] <= 2);
+ ASSERT (asm1[n] <= 1);
+ ASSERT (bsm1[n] <= 1);
+ ASSERT (as2[n] <=14);
+ ASSERT (bs2[n] <= 6);
+ ASSERT (asm2[n] <= 9);
+ ASSERT (bsm2[n] <= 4);
+
+ /* vm1, 2n+1 limbs */
+ vm1[2*n] = 0;
+ mpn_mul_n (vm1, asm1, bsm1, n + (asm1[n] | bsm1[n])); /* W4 */
+
+ /* vm2, 2n+1 limbs */
+ mpn_mul_n (vm2, asm2, bsm2, n+1); /* W2 */
+
+ /* v2, 2n+1 limbs */
+ mpn_mul_n (v2, as2, bs2, n+1); /* W1 */
+
+ /* v1, 2n+1 limbs */
+ mpn_mul_n (v1, as1, bs1, n+1); /* W3 */
+
+ /* vinf, s+t limbs */ /* W0 */
+ if (s > t) mpn_mul (vinf, a3, s, b2, t);
+ else mpn_mul (vinf, b2, t, a3, s);
+
+ /* v0, 2n limbs */
+ mpn_mul_n (v0, ap, bp, n); /* W5 */
+
+ mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+/* #undef as1 */
+/* #undef as2 */
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef b1d
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom44_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom44_mul.c
new file mode 100644
index 0000000..a361899
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom44_mul.c
@@ -0,0 +1,239 @@
+/* mpn_toom44_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
+ size. Or more accurately, bn <= an < (4/3)bn.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+ <-s--><--n--><--n--><--n-->
+ ____ ______ ______ ______
+ |_a3_|___a2_|___a1_|___a0_|
+ |b3_|___b2_|___b1_|___b0_|
+ <-t-><--n--><--n--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) # A(1)*B(1) ah <= 3 bh <= 3
+ vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) # A(-1)*B(-1) |ah| <= 1 |bh| <= 1
+ v2 = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) # A(2)*B(2) ah <= 14 bh <= 14
+ vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) # A(-2)*B(-2) |ah| <= 9 |bh| <= 9
+ vh = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) # A(1/2)*B(1/2) ah <= 14 bh <= 14
+ vinf= a3 * b2 # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22 1
+#define MAYBE_mul_toom44 1
+#else
+#define MAYBE_mul_basecase \
+ (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22 \
+ (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom44 \
+ (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+#endif
+
+#define TOOM44_MUL_N_REC(p, a, b, n, ws) \
+ do { \
+ if (MAYBE_mul_basecase \
+ && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
+ mpn_mul_basecase (p, a, n, b, n); \
+ else if (MAYBE_mul_toom22 \
+ && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
+ mpn_toom22_mul (p, a, n, b, n, ws); \
+ else if (! MAYBE_mul_toom44 \
+ || BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \
+ mpn_toom33_mul (p, a, n, b, n, ws); \
+ else \
+ mpn_toom44_mul (p, a, n, b, n, ws); \
+ } while (0)
+
+/* Use of scratch space. In the product area, we store
+
+ ___________________
+ |vinf|____|_v1_|_v0_|
+ s+t 2n-1 2n+1 2n
+
+ The other recursive products, vm1, v2, vm2, vh are stored in the
+ scratch area. When computing them, we use the product area for
+ intermediate values.
+
+ Next, we compute v1. We can store the intermediate factors at v0
+ and at vh + 2n + 2.
+
+ Finally, for v0 and vinf, factors are parts of the input operands,
+ and we need scratch space only for the recursive multiplication.
+
+ In all, if S(an) is the scratch need, the needed space is bounded by
+
+ S(an) <= 4 (2*ceil(an/4) + 1) + 1 + S(ceil(an/4) + 1)
+
+ which should give S(n) = 8 n/3 + c log(n) for some constant c.
+*/
+
+void
+mpn_toom44_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ mp_limb_t cy;
+ enum toom7_flags flags;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+#define a3 (ap + 3*n)
+#define b0 bp
+#define b1 (bp + n)
+#define b2 (bp + 2*n)
+#define b3 (bp + 3*n)
+
+ ASSERT (an >= bn);
+
+ n = (an + 3) >> 2;
+
+ s = an - 3 * n;
+ t = bn - 3 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ ASSERT (s >= t);
+
+ /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+ * following limb, so these must be computed in order, and we need a
+ * one limb gap to tp. */
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 6 * n) /* s+t */
+#define v2 scratch /* 2n+1 */
+#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
+#define vh (scratch + 4 * n + 2) /* 2n+1 */
+#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+ /* apx and bpx must not overlap with v1 */
+#define apx pp /* n+1 */
+#define amx (pp + n + 1) /* n+1 */
+#define bmx (pp + 2*n + 2) /* n+1 */
+#define bpx (pp + 4*n + 2) /* n+1 */
+
+ /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+ gives roughly 32 n/3 + log term. */
+
+ /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3. */
+ flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
+
+ /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3. */
+ flags = (enum toom7_flags) (flags ^ (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp)));
+
+ TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp); /* v2, 2n+1 limbs */
+ TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp); /* vm2, 2n+1 limbs */
+
+ /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (apx, a1, a0, n);
+ cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+ if (s < n)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+ apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+ MPN_INCR_U (apx + s, n+1-s, cy2);
+ }
+ else
+ apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+ cy = mpn_lshift (apx, a0, n, 1);
+ cy += mpn_add_n (apx, apx, a1, n);
+ cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+ cy += mpn_add_n (apx, apx, a2, n);
+ cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+ apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+ /* Compute bpx = 8 b0 + 4 b1 + 2 b2 + b3 = (((2*b0 + b1) * 2 + b2) * 2 + b3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (bpx, b1, b0, n);
+ cy = 2*cy + mpn_addlsh1_n (bpx, b2, bpx, n);
+ if (t < n)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_addlsh1_n (bpx, b3, bpx, t);
+ bpx[n] = 2*cy + mpn_lshift (bpx + t, bpx + t, n - t, 1);
+ MPN_INCR_U (bpx + t, n+1-t, cy2);
+ }
+ else
+ bpx[n] = 2*cy + mpn_addlsh1_n (bpx, b3, bpx, n);
+#else
+ cy = mpn_lshift (bpx, b0, n, 1);
+ cy += mpn_add_n (bpx, bpx, b1, n);
+ cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+ cy += mpn_add_n (bpx, bpx, b2, n);
+ cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+ bpx[n] = cy + mpn_add (bpx, bpx, n, b3, t);
+#endif
+
+ ASSERT (apx[n] < 15);
+ ASSERT (bpx[n] < 15);
+
+ TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp); /* vh, 2n+1 limbs */
+
+ /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3. */
+ flags = (enum toom7_flags) (flags | (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp)));
+
+ /* Compute bpx = b0 + b1 + b2 + b3 and bmx = b0 - b1 + b2 - b3. */
+ flags = (enum toom7_flags) (flags ^ (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp)));
+
+ ASSERT (amx[n] <= 1);
+ ASSERT (bmx[n] <= 1);
+
+ vm1 [2 * n] = 0;
+ TOOM44_MUL_N_REC (vm1, amx, bmx, n + (bmx[n] | amx[n]), tp); /* vm1, 2n+1 limbs */
+ /* Clobbers amx, bmx. */
+ TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp); /* v1, 2n+1 limbs */
+
+ TOOM44_MUL_N_REC (v0, a0, b0, n, tp);
+ if (s > t)
+ mpn_mul (vinf, a3, s, b3, t);
+ else
+ TOOM44_MUL_N_REC (vinf, a3, b3, s, tp); /* vinf, s+t limbs */
+
+ mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, tp);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom4_sqr.c b/vendor/gmp-6.3.0/mpn/generic/toom4_sqr.c
new file mode 100644
index 0000000..fd59d1c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom4_sqr.c
@@ -0,0 +1,164 @@
+/* mpn_toom4_sqr -- Square {ap,an}.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2013, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1/2, +1, +2, +inf
+
+ <-s--><--n--><--n--><--n-->
+ ____ ______ ______ ______
+ |_a3_|___a2_|___a1_|___a0_|
+
+ v0 = a0 ^2 # A(0)^2
+ v1 = ( a0+ a1+ a2+ a3)^2 # A(1)^2 ah <= 3
+ vm1 = ( a0- a1+ a2- a3)^2 # A(-1)^2 |ah| <= 1
+ v2 = ( a0+2a1+4a2+8a3)^2 # A(2)^2 ah <= 14
+ vm2 = ( a0-2a1+4a2-8a3)^2 # A(-2)^2 -9<=ah<=4
+ vh = (8a0+4a1+2a2+ a3)^2 # A(1/2)^2 ah <= 14
+ vinf= a3 ^2 # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom2 1
+#define MAYBE_sqr_toom4 1
+#else
+#define MAYBE_sqr_basecase \
+ (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2 \
+ (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom4 \
+ (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+#endif
+
+#define TOOM4_SQR_REC(p, a, n, ws) \
+ do { \
+ if (MAYBE_sqr_basecase \
+ && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
+ mpn_sqr_basecase (p, a, n); \
+ else if (MAYBE_sqr_toom2 \
+ && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) \
+ mpn_toom2_sqr (p, a, n, ws); \
+ else if (! MAYBE_sqr_toom4 \
+ || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)) \
+ mpn_toom3_sqr (p, a, n, ws); \
+ else \
+ mpn_toom4_sqr (p, a, n, ws); \
+ } while (0)
+
+void
+mpn_toom4_sqr (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_ptr scratch)
+{
+ mp_size_t n, s;
+ mp_limb_t cy;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+#define a3 (ap + 3*n)
+
+ n = (an + 3) >> 2;
+
+ s = an - 3 * n;
+
+ ASSERT (0 < s && s <= n);
+
+ /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+ * following limb, so these must be computed in order, and we need a
+ * one limb gap to tp. */
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 6 * n) /* s+t */
+#define v2 scratch /* 2n+1 */
+#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
+#define vh (scratch + 4 * n + 2) /* 2n+1 */
+#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+ /* No overlap with v1 */
+#define apx pp /* n+1 */
+#define amx (pp + 4*n + 2) /* n+1 */
+
+ /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+ gives roughly 32 n/3 + log term. */
+
+ /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3. */
+ mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+
+ TOOM4_SQR_REC (v2, apx, n + 1, tp); /* v2, 2n+1 limbs */
+ TOOM4_SQR_REC (vm2, amx, n + 1, tp); /* vm2, 2n+1 limbs */
+
+ /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (apx, a1, a0, n);
+ cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+ if (s < n)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+ apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+ MPN_INCR_U (apx + s, n+1-s, cy2);
+ }
+ else
+ apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+ cy = mpn_lshift (apx, a0, n, 1);
+ cy += mpn_add_n (apx, apx, a1, n);
+ cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+ cy += mpn_add_n (apx, apx, a2, n);
+ cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+ apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+ ASSERT (apx[n] < 15);
+
+ TOOM4_SQR_REC (vh, apx, n + 1, tp); /* vh, 2n+1 limbs */
+
+ /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3. */
+ mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+
+ TOOM4_SQR_REC (v1, apx, n + 1, tp); /* v1, 2n+1 limbs */
+ vm1 [2 * n] = 0;
+ TOOM4_SQR_REC (vm1, amx, n + amx[n], tp); /* vm1, 2n+1 limbs */
+
+ TOOM4_SQR_REC (v0, a0, n, tp);
+ TOOM4_SQR_REC (vinf, a3, s, tp); /* vinf, 2s limbs */
+
+ mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom52_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom52_mul.c
new file mode 100644
index 0000000..974059b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom52_mul.c
@@ -0,0 +1,256 @@
+/* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+ times as large as bn. Or more accurately, bn < an < 2 bn.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ The idea of applying toom to unbalanced multiplication is due to Marco
+ Bodrato and Alberto Zanoni.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+ <-s-><--n--><--n--><--n--><--n-->
+ ___ ______ ______ ______ ______
+ |a4_|___a3_|___a2_|___a1_|___a0_|
+ |b1|___b0_|
+ <t-><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = (a0+ a1+ a2+ a3+ a4)*(b0+ b1) # A(1)*B(1) ah <= 4 bh <= 1
+ vm1 = (a0- a1+ a2- a3+ a4)*(b0- b1) # A(-1)*B(-1) |ah| <= 2 bh = 0
+ v2 = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) # A(2)*B(2) ah <= 30 bh <= 2
+ vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) # A(-2)*B(-2) |ah| <= 20 |bh|<= 1
+ vinf= a4 * b1 # A(inf)*B(inf)
+
+ Some slight optimization in evaluation are taken from the paper:
+ "Towards Optimal Toom-Cook Multiplication for Univariate and
+ Multivariate Polynomials in Characteristic 2 and 0."
+*/
+
+void
+mpn_toom52_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ enum toom6_flags flags;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2 * n)
+#define a3 (ap + 3 * n)
+#define a4 (ap + 4 * n)
+#define b0 bp
+#define b1 (bp + n)
+
+ n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
+
+ s = an - 4 * n;
+ t = bn - n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+
+ /* Ensures that 5 values of n+1 limbs each fits in the product area.
+ Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
+ n = 8. */
+ ASSERT (s+t >= 5);
+
+#define v0 pp /* 2n */
+#define vm1 (scratch) /* 2n+1 */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
+#define v2 (scratch + 4 * n + 2) /* 2n+1 */
+#define vinf (pp + 5 * n) /* s+t */
+#define bs1 pp /* n+1 */
+#define bsm1 (scratch + 2 * n + 2) /* n */
+#define asm1 (scratch + 3 * n + 3) /* n+1 */
+#define asm2 (scratch + 4 * n + 4) /* n+1 */
+#define bsm2 (pp + n + 1) /* n+1 */
+#define bs2 (pp + 2 * n + 2) /* n+1 */
+#define as2 (pp + 3 * n + 3) /* n+1 */
+#define as1 (pp + 4 * n + 4) /* n+1 */
+
+ /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
+ products will overwrite 2n+2 limbs. */
+
+#define a0a2 scratch
+#define a1a3 asm1
+
+ /* Compute as2 and asm2. */
+ flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
+
+ /* Compute bs1 and bsm1. */
+ if (t == n)
+ {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mp_limb_t cy;
+
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+ }
+ bs1[n] = cy >> 1;
+#else
+ bs1[n] = mpn_add_n (bs1, b0, b1, n);
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, n);
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+ }
+ else
+ {
+ mpn_sub_n (bsm1, b0, b1, n);
+ }
+#endif
+ }
+ else
+ {
+ bs1[n] = mpn_add (bs1, b0, n, b1, t);
+ if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, t);
+ MPN_ZERO (bsm1 + t, n - t);
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+ }
+ else
+ {
+ mpn_sub (bsm1, b0, n, b1, t);
+ }
+ }
+
+ /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1 */
+ mpn_add (bs2, bs1, n+1, b1, t);
+ if (flags & toom6_vm1_neg)
+ {
+ bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+ }
+ else
+ {
+ bsm2[n] = 0;
+ if (t == n)
+ {
+ if (mpn_cmp (bsm1, b1, n) < 0)
+ {
+ mpn_sub_n (bsm2, b1, bsm1, n);
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+ }
+ else
+ {
+ mpn_sub_n (bsm2, bsm1, b1, n);
+ }
+ }
+ else
+ {
+ if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+ {
+ mpn_sub_n (bsm2, b1, bsm1, t);
+ MPN_ZERO (bsm2 + t, n - t);
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+ }
+ else
+ {
+ mpn_sub (bsm2, bsm1, n, b1, t);
+ }
+ }
+ }
+
+ /* Compute as1 and asm1. */
+ flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2)));
+
+ ASSERT (as1[n] <= 4);
+ ASSERT (bs1[n] <= 1);
+ ASSERT (asm1[n] <= 2);
+/* ASSERT (bsm1[n] <= 1); */
+ ASSERT (as2[n] <=30);
+ ASSERT (bs2[n] <= 2);
+ ASSERT (asm2[n] <= 20);
+ ASSERT (bsm2[n] <= 1);
+
+ /* vm1, 2n+1 limbs */
+ mpn_mul (vm1, asm1, n+1, bsm1, n); /* W4 */
+
+ /* vm2, 2n+1 limbs */
+ mpn_mul_n (vm2, asm2, bsm2, n+1); /* W2 */
+
+ /* v2, 2n+1 limbs */
+ mpn_mul_n (v2, as2, bs2, n+1); /* W1 */
+
+ /* v1, 2n+1 limbs */
+ mpn_mul_n (v1, as1, bs1, n+1); /* W3 */
+
+ /* vinf, s+t limbs */ /* W0 */
+ if (s > t) mpn_mul (vinf, a4, s, b1, t);
+ else mpn_mul (vinf, b1, t, a4, s);
+
+ /* v0, 2n limbs */
+ mpn_mul_n (v0, ap, bp, n); /* W5 */
+
+ mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+#undef as1
+#undef as2
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom53_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom53_mul.c
new file mode 100644
index 0000000..c934297
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom53_mul.c
@@ -0,0 +1,331 @@
+/* mpn_toom53_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 5/3
+ times as large as bn. Or more accurately, (4/3)bn < an < (5/2)bn.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ The idea of applying toom to unbalanced multiplication is due to Marco
+ Bodrato and Alberto Zanoni.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+ <-s-><--n--><--n--><--n--><--n-->
+ ___ ______ ______ ______ ______
+ |a4_|___a3_|___a2_|___a1_|___a0_|
+ |__b2|___b1_|___b0_|
+ <-t--><--n--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = ( a0+ a1+ a2+ a3+ a4)*( b0+ b1+ b2) # A(1)*B(1) ah <= 4 bh <= 2
+ vm1 = ( a0- a1+ a2- a3+ a4)*( b0- b1+ b2) # A(-1)*B(-1) |ah| <= 2 bh <= 1
+ v2 = ( a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) # A(2)*B(2) ah <= 30 bh <= 6
+ vm2 = ( a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) # A(2)*B(2) -9<=ah<=20 -1<=bh<=4
+ vh = (16a0+8a1+4a2+2a3+ a4)*(4b0+2b1+ b2) # A(1/2)*B(1/2) ah <= 30 bh <= 6
+ vinf= a4 * b2 # A(inf)*B(inf)
+*/
+
+void
+mpn_toom53_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ mp_limb_t cy;
+ mp_ptr gp;
+ mp_ptr as1, asm1, as2, asm2, ash;
+ mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+ mp_ptr tmp;
+ enum toom7_flags flags;
+ TMP_DECL;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+#define a3 (ap + 3*n)
+#define a4 (ap + 4*n)
+#define b0 bp
+#define b1 (bp + n)
+#define b2 (bp + 2*n)
+
+ n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
+
+ s = an - 4 * n;
+ t = bn - 2 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+
+ TMP_MARK;
+
+ tmp = TMP_ALLOC_LIMBS (10 * (n + 1));
+ as1 = tmp; tmp += n + 1;
+ asm1 = tmp; tmp += n + 1;
+ as2 = tmp; tmp += n + 1;
+ asm2 = tmp; tmp += n + 1;
+ ash = tmp; tmp += n + 1;
+ bs1 = tmp; tmp += n + 1;
+ bsm1 = tmp; tmp += n + 1;
+ bs2 = tmp; tmp += n + 1;
+ bsm2 = tmp; tmp += n + 1;
+ bsh = tmp; tmp += n + 1;
+
+ gp = pp;
+
+ /* Compute as1 and asm1. */
+ flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
+
+ /* Compute as2 and asm2. */
+ flags = (enum toom7_flags) (flags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp)));
+
+ /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
+ = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (ash, a1, a0, n);
+ cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+ cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+ if (s < n)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_addlsh1_n (ash, a4, ash, s);
+ ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+ MPN_INCR_U (ash + s, n+1-s, cy2);
+ }
+ else
+ ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+#else
+ cy = mpn_lshift (ash, a0, n, 1);
+ cy += mpn_add_n (ash, ash, a1, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ cy += mpn_add_n (ash, ash, a2, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ cy += mpn_add_n (ash, ash, a3, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ ash[n] = cy + mpn_add (ash, ash, n, a4, s);
+#endif
+
+ /* Compute bs1 and bsm1. */
+ bs1[n] = mpn_add (bs1, b0, n, b2, t); /* b0 + b2 */
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+ {
+ bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
+ bsm1[n] = 0;
+ flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n);
+ bsm1[n] = bs1[n] - (cy & 1);
+ bs1[n] += (cy >> 1);
+ }
+#else
+ if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, bs1, n);
+ bsm1[n] = 0;
+ flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+ }
+ else
+ {
+ bsm1[n] = bs1[n] - mpn_sub_n (bsm1, bs1, b1, n);
+ }
+ bs1[n] += mpn_add_n (bs1, bs1, b1, n); /* b0+b1+b2 */
+#endif
+
+ /* Compute bs2 and bsm2. */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+ cy = mpn_addlsh2_n (bs2, b0, b2, t);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+ cy = mpn_addlsh_n (bs2, b0, b2, t, 2);
+#endif
+ if (t < n)
+ cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy);
+ bs2[n] = cy;
+#else
+ cy = mpn_lshift (gp, b2, t, 2);
+ bs2[n] = mpn_add (bs2, b0, n, gp, t);
+ MPN_INCR_U (bs2 + t, n+1-t, cy);
+#endif
+
+ gp[n] = mpn_lshift (gp, b1, n, 1);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (mpn_cmp (bs2, gp, n+1) < 0)
+ {
+ ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
+ flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1));
+ }
+#else
+ if (mpn_cmp (bs2, gp, n+1) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
+ flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1));
+ }
+ mpn_add_n (bs2, bs2, gp, n+1);
+#endif
+
+ /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2. */
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (bsh, b1, b0, n);
+ if (t < n)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_addlsh1_n (bsh, b2, bsh, t);
+ bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1);
+ MPN_INCR_U (bsh + t, n+1-t, cy2);
+ }
+ else
+ bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n);
+#else
+ cy = mpn_lshift (bsh, b0, n, 1);
+ cy += mpn_add_n (bsh, bsh, b1, n);
+ cy = 2*cy + mpn_lshift (bsh, bsh, n, 1);
+ bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t);
+#endif
+
+ ASSERT (as1[n] <= 4);
+ ASSERT (bs1[n] <= 2);
+ ASSERT (asm1[n] <= 2);
+ ASSERT (bsm1[n] <= 1);
+ ASSERT (as2[n] <= 30);
+ ASSERT (bs2[n] <= 6);
+ ASSERT (asm2[n] <= 20);
+ ASSERT (bsm2[n] <= 4);
+ ASSERT (ash[n] <= 30);
+ ASSERT (bsh[n] <= 6);
+
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 6 * n) /* s+t */
+#define v2 scratch /* 2n+1 */
+#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
+#define vh (scratch + 4 * n + 2) /* 2n+1 */
+#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
+ /* Total scratch need: 10*n+5 */
+
+ /* Must be in allocation order, as they overwrite one limb beyond
+ * 2n+1. */
+ mpn_mul_n (v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
+ mpn_mul_n (vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
+ mpn_mul_n (vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
+
+ /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+ mpn_mul_n (vm1, asm1, bsm1, n);
+ if (asm1[n] == 1)
+ {
+ cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+ }
+ else if (asm1[n] == 2)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy = 2 * bsm1[n] + mpn_addlsh1_n_ip1 (vm1 + n, bsm1, n);
+#else
+ cy = 2 * bsm1[n] + mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+ }
+ else
+ cy = 0;
+ if (bsm1[n] != 0)
+ cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+ vm1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+ vm1[2 * n] = 0;
+ mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+ /* v1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+ mpn_mul_n (v1, as1, bs1, n);
+ if (as1[n] == 1)
+ {
+ cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+ }
+ else if (as1[n] == 2)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+ cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+ }
+ else if (as1[n] != 0)
+ {
+ cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+ }
+ else
+ cy = 0;
+ if (bs1[n] == 1)
+ {
+ cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+ }
+ else if (bs1[n] == 2)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+ cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+ }
+ v1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+ v1[2 * n] = 0;
+ mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+ mpn_mul_n (v0, a0, b0, n); /* v0, 2n limbs */
+
+ /* vinf, s+t limbs */
+ if (s > t) mpn_mul (vinf, a4, s, b2, t);
+ else mpn_mul (vinf, b2, t, a4, s);
+
+ mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t,
+ scratch_out);
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom54_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom54_mul.c
new file mode 100644
index 0000000..343b02e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom54_mul.c
@@ -0,0 +1,142 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+
+/* Toom-4.5, the splitting 5x4 unbalanced version.
+ Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+ <--s-><--n--><--n--><--n--><--n-->
+ ____ ______ ______ ______ ______
+ |_a4_|__a3__|__a2__|__a1__|__a0__|
+ |b3_|__b2__|__b1__|__b0__|
+ <-t-><--n--><--n--><--n-->
+
+*/
+#define TOOM_54_MUL_N_REC(p, a, b, n, ws) \
+ do { mpn_mul_n (p, a, b, n); \
+ } while (0)
+
+#define TOOM_54_MUL_REC(p, a, na, b, nb, ws) \
+ do { mpn_mul (p, a, na, b, nb); \
+ } while (0)
+
+void
+mpn_toom54_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ int sign;
+
+ /***************************** decomposition *******************************/
+#define a4 (ap + 4 * n)
+#define b3 (bp + 3 * n)
+
+ ASSERT (an >= bn);
+ n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+
+ s = an - 4 * n;
+ t = bn - 3 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ /* Required by mpn_toom_interpolate_8pts. */
+ ASSERT ( s + t >= n );
+ ASSERT ( s + t > 4);
+ ASSERT ( n > 2);
+
+#define r8 pp /* 2n */
+#define r7 scratch /* 3n+1 */
+#define r5 (pp + 3*n) /* 3n+1 */
+#define v0 (pp + 3*n) /* n+1 */
+#define v1 (pp + 4*n+1) /* n+1 */
+#define v2 (pp + 5*n+2) /* n+1 */
+#define v3 (pp + 6*n+3) /* n+1 */
+#define r3 (scratch + 3 * n + 1) /* 3n+1 */
+#define r1 (pp + 7*n) /* s+t <= 2*n */
+#define ws (scratch + 6 * n + 2) /* ??? */
+
+ /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+ need all of them, when DO_mpn_sublsh_n usea a scratch */
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm4$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
+ ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+ TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+ mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+ /* $\pm1$ */
+ sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s, pp)
+ ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+ TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+ mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+ /* $\pm2$ */
+ sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
+ ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+ TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+ mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+ /* A(0)*B(0) */
+ TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
+
+ /* Infinity */
+ if (s > t) {
+ TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
+ } else {
+ TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
+ };
+
+ mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a4
+#undef b3
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom62_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom62_mul.c
new file mode 100644
index 0000000..d971cc0
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom62_mul.c
@@ -0,0 +1,310 @@
+/* mpn_toom62_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 3 times
+ as large as bn. Or more accurately, (5/2)bn < an < 6bn.
+
+ Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+ The idea of applying toom to unbalanced multiplication is due to Marco
+ Bodrato and Alberto Zanoni.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in:
+ 0, +1, -1, +2, -2, 1/2, +inf
+
+ <-s-><--n--><--n--><--n--><--n--><--n-->
+ ___ ______ ______ ______ ______ ______
+ |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
+ |_b1_|___b0_|
+ <-t--><--n-->
+
+ v0 = a0 * b0 # A(0)*B(0)
+ v1 = ( a0+ a1+ a2+ a3+ a4+ a5)*( b0+ b1) # A(1)*B(1) ah <= 5 bh <= 1
+ vm1 = ( a0- a1+ a2- a3+ a4- a5)*( b0- b1) # A(-1)*B(-1) |ah| <= 2 bh = 0
+ v2 = ( a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) # A(2)*B(2) ah <= 62 bh <= 2
+ vm2 = ( a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) # A(-2)*B(-2) -41<=ah<=20 -1<=bh<=0
+ vh = (32a0+16a1+8a2+4a3+ 2a4+ a5)*(2b0+ b1) # A(1/2)*B(1/2) ah <= 62 bh <= 2
+ vinf= a5 * b1 # A(inf)*B(inf)
+*/
+
+void
+mpn_toom62_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn,
+ mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ mp_limb_t cy;
+ mp_ptr as1, asm1, as2, asm2, ash;
+ mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+ mp_ptr gp;
+ enum toom7_flags aflags, bflags;
+ TMP_DECL;
+
+#define a0 ap
+#define a1 (ap + n)
+#define a2 (ap + 2*n)
+#define a3 (ap + 3*n)
+#define a4 (ap + 4*n)
+#define a5 (ap + 5*n)
+#define b0 bp
+#define b1 (bp + n)
+
+ n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+
+ s = an - 5 * n;
+ t = bn - n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+
+ TMP_MARK;
+
+ as1 = TMP_SALLOC_LIMBS (n + 1);
+ asm1 = TMP_SALLOC_LIMBS (n + 1);
+ as2 = TMP_SALLOC_LIMBS (n + 1);
+ asm2 = TMP_SALLOC_LIMBS (n + 1);
+ ash = TMP_SALLOC_LIMBS (n + 1);
+
+ bs1 = TMP_SALLOC_LIMBS (n + 1);
+ bsm1 = TMP_SALLOC_LIMBS (n);
+ bs2 = TMP_SALLOC_LIMBS (n + 1);
+ bsm2 = TMP_SALLOC_LIMBS (n + 1);
+ bsh = TMP_SALLOC_LIMBS (n + 1);
+
+ gp = pp;
+
+ /* Compute as1 and asm1. */
+ aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
+
+ /* Compute as2 and asm2. */
+ aflags = (enum toom7_flags) (aflags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp)));
+
+ /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
+ = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (ash, a1, a0, n);
+ cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+ cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+ cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+ if (s < n)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_addlsh1_n (ash, a5, ash, s);
+ ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+ MPN_INCR_U (ash + s, n+1-s, cy2);
+ }
+ else
+ ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
+#else
+ cy = mpn_lshift (ash, a0, n, 1);
+ cy += mpn_add_n (ash, ash, a1, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ cy += mpn_add_n (ash, ash, a2, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ cy += mpn_add_n (ash, ash, a3, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ cy += mpn_add_n (ash, ash, a4, n);
+ cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+ ash[n] = cy + mpn_add (ash, ash, n, a5, s);
+#endif
+
+ /* Compute bs1 and bsm1. */
+ if (t == n)
+ {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+ bflags = toom7_w3_neg;
+ }
+ else
+ {
+ cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+ bflags = (enum toom7_flags) 0;
+ }
+ bs1[n] = cy >> 1;
+#else
+ bs1[n] = mpn_add_n (bs1, b0, b1, n);
+ if (mpn_cmp (b0, b1, n) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, n);
+ bflags = toom7_w3_neg;
+ }
+ else
+ {
+ mpn_sub_n (bsm1, b0, b1, n);
+ bflags = (enum toom7_flags) 0;
+ }
+#endif
+ }
+ else
+ {
+ bs1[n] = mpn_add (bs1, b0, n, b1, t);
+ if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+ {
+ mpn_sub_n (bsm1, b1, b0, t);
+ MPN_ZERO (bsm1 + t, n - t);
+ bflags = toom7_w3_neg;
+ }
+ else
+ {
+ mpn_sub (bsm1, b0, n, b1, t);
+ bflags = (enum toom7_flags) 0;
+ }
+ }
+
+ /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
+ bsm1 - b1 */
+ mpn_add (bs2, bs1, n + 1, b1, t);
+ if (bflags & toom7_w3_neg)
+ {
+ bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+ bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+ }
+ else
+ {
+ /* FIXME: Simplify this logic? */
+ if (t < n)
+ {
+ if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
+ MPN_ZERO (bsm2 + t, n + 1 - t);
+ bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
+ bsm2[n] = 0;
+ }
+ }
+ else
+ {
+ if (mpn_cmp (bsm1, b1, n) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
+ bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
+ }
+ bsm2[n] = 0;
+ }
+ }
+
+ /* Compute bsh, recycling bs1. bsh=bs1+b0; */
+ bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
+
+ ASSERT (as1[n] <= 5);
+ ASSERT (bs1[n] <= 1);
+ ASSERT (asm1[n] <= 2);
+ ASSERT (as2[n] <= 62);
+ ASSERT (bs2[n] <= 2);
+ ASSERT (asm2[n] <= 41);
+ ASSERT (bsm2[n] <= 1);
+ ASSERT (ash[n] <= 62);
+ ASSERT (bsh[n] <= 2);
+
+#define v0 pp /* 2n */
+#define v1 (pp + 2 * n) /* 2n+1 */
+#define vinf (pp + 6 * n) /* s+t */
+#define v2 scratch /* 2n+1 */
+#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
+#define vh (scratch + 4 * n + 2) /* 2n+1 */
+#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
+ /* Total scratch need: 10*n+5 */
+
+ /* Must be in allocation order, as they overwrite one limb beyond
+ * 2n+1. */
+ mpn_mul_n (v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
+ mpn_mul_n (vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
+ mpn_mul_n (vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
+
+ /* vm1, 2n+1 limbs */
+ mpn_mul_n (vm1, asm1, bsm1, n);
+ cy = 0;
+ if (asm1[n] == 1)
+ {
+ cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+ }
+ else if (asm1[n] == 2)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n);
+#else
+ cy = mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+ }
+ vm1[2 * n] = cy;
+
+ /* v1, 2n+1 limbs */
+ mpn_mul_n (v1, as1, bs1, n);
+ if (as1[n] == 1)
+ {
+ cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+ }
+ else if (as1[n] == 2)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n
+ cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+ cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+ }
+ else if (as1[n] != 0)
+ {
+ cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+ }
+ else
+ cy = 0;
+ if (bs1[n] != 0)
+ cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+ v1[2 * n] = cy;
+
+ mpn_mul_n (v0, a0, b0, n); /* v0, 2n limbs */
+
+ /* vinf, s+t limbs */
+ if (s > t) mpn_mul (vinf, a5, s, b1, t);
+ else mpn_mul (vinf, b1, t, a5, s);
+
+ mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
+ vm2, vm1, v2, vh, s + t, scratch_out);
+
+ TMP_FREE;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom63_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom63_mul.c
new file mode 100644
index 0000000..181996d
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom63_mul.c
@@ -0,0 +1,231 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ mp_limb_t x, y;
+ while (--n >= 0)
+ {
+ x = ap[n];
+ y = bp[n];
+ if (x != y)
+ {
+ n++;
+ if (x > y)
+ {
+ mpn_sub_n (rp, ap, bp, n);
+ return 0;
+ }
+ else
+ {
+ mpn_sub_n (rp, bp, ap, n);
+ return ~0;
+ }
+ }
+ rp[n] = 0;
+ }
+ return 0;
+}
+
+static int
+abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
+ int result;
+ result = abs_sub_n (rm, rp, rs, n);
+ ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
+ return result;
+}
+
+
+/* Toom-4.5, the splitting 6x3 unbalanced version.
+ Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+ <--s-><--n--><--n--><--n--><--n--><--n-->
+ ____ ______ ______ ______ ______ ______
+ |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
+ |b2_|__b1__|__b0__|
+ <-t-><--n--><--n-->
+
+*/
+#define TOOM_63_MUL_N_REC(p, a, b, n, ws) \
+ do { mpn_mul_n (p, a, b, n); \
+ } while (0)
+
+#define TOOM_63_MUL_REC(p, a, na, b, nb, ws) \
+ do { mpn_mul (p, a, na, b, nb); \
+ } while (0)
+
+void
+mpn_toom63_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ mp_limb_t cy;
+ int sign;
+
+ /***************************** decomposition *******************************/
+#define a5 (ap + 5 * n)
+#define b0 (bp + 0 * n)
+#define b1 (bp + 1 * n)
+#define b2 (bp + 2 * n)
+
+ ASSERT (an >= bn);
+ n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
+
+ s = an - 5 * n;
+ t = bn - 2 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ /* WARNING! it assumes s+t>=n */
+ ASSERT ( s + t >= n );
+ ASSERT ( s + t > 4);
+ /* WARNING! it assumes n>1 */
+ ASSERT ( n > 2);
+
+#define r8 pp /* 2n */
+#define r7 scratch /* 3n+1 */
+#define r5 (pp + 3*n) /* 3n+1 */
+#define v0 (pp + 3*n) /* n+1 */
+#define v1 (pp + 4*n+1) /* n+1 */
+#define v2 (pp + 5*n+2) /* n+1 */
+#define v3 (pp + 6*n+3) /* n+1 */
+#define r3 (scratch + 3 * n + 1) /* 3n+1 */
+#define r1 (pp + 7*n) /* s+t <= 2*n */
+#define ws (scratch + 6 * n + 2) /* ??? */
+
+ /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+ need all of them, when DO_mpn_sublsh_n usea a scratch */
+/* if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
+
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm4$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+ pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
+ /* FIXME: use addlsh */
+ v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
+ if ( n == t )
+ v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
+ else
+ v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
+ sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+ TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+ TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+ mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+ /* $\pm1$ */
+ sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp);
+ /* Compute bs1 and bsm1. Code taken from toom33 */
+ cy = mpn_add (ws, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+ {
+ cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
+ v3[n] = cy >> 1;
+ v1[n] = 0;
+ sign = ~sign;
+ }
+ else
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
+ v3[n] = cy + (cy2 >> 1);
+ v1[n] = cy - (cy2 & 1);
+ }
+#else
+ v3[n] = cy + mpn_add_n (v3, ws, b1, n);
+ if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+ {
+ mpn_sub_n (v1, b1, ws, n);
+ v1[n] = 0;
+ sign = ~sign;
+ }
+ else
+ {
+ cy -= mpn_sub_n (v1, ws, b1, n);
+ v1[n] = cy;
+ }
+#endif
+ TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+ TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+ mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+ /* $\pm2$ */
+ sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+ pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
+ /* FIXME: use addlsh or addlsh2 */
+ v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
+ if ( n == t )
+ v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
+ else
+ v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
+ sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+ TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+ TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+ mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+ /* A(0)*B(0) */
+ TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
+
+ /* Infinity */
+ if (s > t) {
+ TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
+ } else {
+ TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
+ };
+
+ mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a5
+#undef b0
+#undef b1
+#undef b2
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom6_sqr.c b/vendor/gmp-6.3.0/mpn/generic/toom6_sqr.c
new file mode 100644
index 0000000..336eef9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom6_sqr.c
@@ -0,0 +1,181 @@
+/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase 1
+#define MAYBE_sqr_toom2 1
+#define MAYBE_sqr_above_toom2 1
+#define MAYBE_sqr_toom3 1
+#define MAYBE_sqr_above_toom3 1
+#define MAYBE_sqr_above_toom4 1
+#else
+#ifdef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
+#else
+#define SQR_TOOM6_MAX \
+ ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ? \
+ ((SQR_FFT_THRESHOLD+6*2-1+5)/6) \
+ : MP_SIZE_T_MAX )
+#endif
+#define MAYBE_sqr_basecase \
+ (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase \
+ (SQR_TOOM6_MAX >= SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2 \
+ (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2 \
+ (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3 \
+ (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3 \
+ (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom4 \
+ (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
+#endif
+
+#define TOOM6_SQR_REC(p, a, n, ws) \
+ do { \
+ if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
+ || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \
+ mpn_sqr_basecase (p, a, n); \
+ else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
+ || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \
+ mpn_toom2_sqr (p, a, n, ws); \
+ else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
+ || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \
+ mpn_toom3_sqr (p, a, n, ws); \
+ else if (! MAYBE_sqr_above_toom4 \
+ || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) \
+ mpn_toom4_sqr (p, a, n, ws); \
+ else \
+ mpn_toom6_sqr (p, a, n, ws); \
+ } while (0)
+
+void
+mpn_toom6_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+ mp_size_t n, s;
+
+ /***************************** decomposition *******************************/
+
+ ASSERT( an >= 18 );
+
+ n = 1 + (an - 1) / (size_t) 6;
+
+ s = an - 5 * n;
+
+ ASSERT (0 < s && s <= n);
+
+#define r4 (pp + 3 * n) /* 3n+1 */
+#define r2 (pp + 7 * n) /* 3n+1 */
+#define r0 (pp +11 * n) /* s+t <= 2*n */
+#define r5 (scratch) /* 3n+1 */
+#define r3 (scratch + 3 * n + 1) /* 3n+1 */
+#define r1 (scratch + 6 * n + 2) /* 3n+1 */
+#define v0 (pp + 7 * n) /* n+1 */
+#define v2 (pp + 9 * n+2) /* n+1 */
+#define wse (scratch + 9 * n + 3) /* 3n+1 */
+
+ /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
+ need all of them, when DO_mpn_sublsh_n usea a scratch */
+/* if (scratch== NULL) */
+/* scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
+
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm1/2$ */
+ mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
+ TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+ TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+ mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
+
+ /* $\pm1$ */
+ mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp);
+ TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
+ TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
+ mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
+
+ /* $\pm4$ */
+ mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+ TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
+ TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
+ mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
+
+ /* $\pm1/4$ */
+ mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
+ TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+ TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+ mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
+
+ /* $\pm2$ */
+ mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+ TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
+ TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
+ mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
+
+#undef v0
+#undef v2
+
+ /* A(0)*B(0) */
+ TOOM6_SQR_REC(pp, ap, n, wse);
+
+ mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+
+}
+#undef TOOM6_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom6h_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom6h_mul.c
new file mode 100644
index 0000000..637f2a5
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom6h_mul.c
@@ -0,0 +1,262 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22 1
+#define MAYBE_mul_toom33 1
+#define MAYBE_mul_toom6h 1
+#else
+#define MAYBE_mul_basecase \
+ (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22 \
+ (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33 \
+ (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom6h \
+ (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
+#endif
+
+#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
+ do { \
+ if (MAYBE_mul_basecase \
+ && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
+ mpn_mul_basecase (p, a, n, b, n); \
+ if (f) \
+ mpn_mul_basecase (p2, a2, n, b2, n); \
+ } else if (MAYBE_mul_toom22 \
+ && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
+ mpn_toom22_mul (p, a, n, b, n, ws); \
+ if (f) \
+ mpn_toom22_mul (p2, a2, n, b2, n, ws); \
+ } else if (MAYBE_mul_toom33 \
+ && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
+ mpn_toom33_mul (p, a, n, b, n, ws); \
+ if (f) \
+ mpn_toom33_mul (p2, a2, n, b2, n, ws); \
+ } else if (! MAYBE_mul_toom6h \
+ || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
+ mpn_toom44_mul (p, a, n, b, n, ws); \
+ if (f) \
+ mpn_toom44_mul (p2, a2, n, b2, n, ws); \
+ } else { \
+ mpn_toom6h_mul (p, a, n, b, n, ws); \
+ if (f) \
+ mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
+ } \
+ } while (0)
+
+#define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \
+ do { mpn_mul (p, a, na, b, nb); \
+ } while (0)
+
+/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+ With: an >= bn >= 46, an*6 < bn * 17.
+ It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
+
+ Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
+*/
+/* Estimate on needed scratch:
+ S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+ since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
+
+void
+mpn_toom6h_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ int p, q, half;
+ int sign;
+
+ /***************************** decomposition *******************************/
+
+ ASSERT (an >= bn);
+ /* Can not handle too much unbalancement */
+ ASSERT (bn >= 42);
+ /* Can not handle too much unbalancement */
+ ASSERT ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17));
+
+ /* Limit num/den is a rational number between
+ (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */
+#define LIMIT_numerator (18)
+#define LIMIT_denominat (17)
+
+ if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
+ {
+ n = 1 + (an - 1) / (size_t) 6;
+ p = q = 5;
+ half = 0;
+
+ s = an - 5 * n;
+ t = bn - 5 * n;
+ }
+ else {
+ if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
+ { p = 7; q = 6; }
+ else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
+ { p = 7; q = 5; }
+ else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn) /* is 4*... < 8*... */
+ { p = 8; q = 5; }
+ else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn) /* is 4*... < 8*... */
+ { p = 8; q = 4; }
+ else
+ { p = 9; q = 4; }
+
+ half = (p ^ q) & 1;
+ n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+ p--; q--;
+
+ s = an - p * n;
+ t = bn - q * n;
+
+ /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
+ if (half) { /* Recover from badly chosen splitting */
+ if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+ else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+ }
+ }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ ASSERT (half || s + t > 3);
+ ASSERT (n > 2);
+
+#define r4 (pp + 3 * n) /* 3n+1 */
+#define r2 (pp + 7 * n) /* 3n+1 */
+#define r0 (pp +11 * n) /* s+t <= 2*n */
+#define r5 (scratch) /* 3n+1 */
+#define r3 (scratch + 3 * n + 1) /* 3n+1 */
+#define r1 (scratch + 6 * n + 2) /* 3n+1 */
+#define v0 (pp + 7 * n) /* n+1 */
+#define v1 (pp + 8 * n+1) /* n+1 */
+#define v2 (pp + 9 * n+2) /* n+1 */
+#define v3 (scratch + 9 * n + 3) /* n+1 */
+#define wsi (scratch + 9 * n + 3) /* 3n+1 */
+#define wse (scratch +10 * n + 4) /* 2n+1 */
+
+ /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
+ need all of them */
+/* if (scratch == NULL) */
+/* scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
+ ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
+ ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
+
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm1/2$ */
+ sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+ mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+ /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
+
+ /* $\pm1$ */
+ sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
+ if (UNLIKELY (q == 3))
+ sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
+ else
+ sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
+ /* A(-1)*B(-1) */ /* A(1)*B(1) */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
+
+ /* $\pm4$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+ mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+ /* A(-4)*B(-4) */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+ mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
+
+ /* $\pm1/4$ */
+ sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+ mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+ /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+ /* $\pm2$ */
+ sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+ mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+ /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+ /* A(0)*B(0) */
+ TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
+
+ /* Infinity */
+ if (UNLIKELY (half != 0)) {
+ if (s > t) {
+ TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+ } else {
+ TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+ };
+ };
+
+ mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef wsi
+}
+
+#undef TOOM6H_MUL_N_REC
+#undef TOOM6H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom6h
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom8_sqr.c b/vendor/gmp-6.3.0/mpn/generic/toom8_sqr.c
new file mode 100644
index 0000000..03e5c64
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom8_sqr.c
@@ -0,0 +1,225 @@
+/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase 1
+#define MAYBE_sqr_toom2 1
+#define MAYBE_sqr_above_toom2 1
+#define MAYBE_sqr_toom3 1
+#define MAYBE_sqr_above_toom3 1
+#define MAYBE_sqr_toom4 1
+#define MAYBE_sqr_above_toom4 1
+#define MAYBE_sqr_above_toom6 1
+#else
+#define SQR_TOOM8_MAX \
+ ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ? \
+ ((SQR_FFT_THRESHOLD+8*2-1+7)/8) \
+ : MP_SIZE_T_MAX )
+#define MAYBE_sqr_basecase \
+ (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase \
+ (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2 \
+ (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2 \
+ (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3 \
+ (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3 \
+ (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_toom4 \
+ (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom4 \
+ (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom6 \
+ (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
+#endif
+
+#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws) \
+ do { \
+ if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
+ || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) { \
+ mpn_sqr_basecase (p, a, n); \
+ if (f) mpn_sqr_basecase (p2, a2, n); \
+ } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
+ || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) { \
+ mpn_toom2_sqr (p, a, n, ws); \
+ if (f) mpn_toom2_sqr (p2, a2, n, ws); \
+ } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
+ || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) { \
+ mpn_toom3_sqr (p, a, n, ws); \
+ if (f) mpn_toom3_sqr (p2, a2, n, ws); \
+ } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \
+ || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) { \
+ mpn_toom4_sqr (p, a, n, ws); \
+ if (f) mpn_toom4_sqr (p2, a2, n, ws); \
+ } else if (! MAYBE_sqr_above_toom6 \
+ || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) { \
+ mpn_toom6_sqr (p, a, n, ws); \
+ if (f) mpn_toom6_sqr (p2, a2, n, ws); \
+ } else { \
+ mpn_toom8_sqr (p, a, n, ws); \
+ if (f) mpn_toom8_sqr (p2, a2, n, ws); \
+ } \
+ } while (0)
+
+void
+mpn_toom8_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+ mp_size_t n, s;
+
+ /***************************** decomposition *******************************/
+
+ ASSERT ( an >= 40 );
+
+ n = 1 + ((an - 1)>>3);
+
+ s = an - 7 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT ( s + s > 3 );
+
+#define r6 (pp + 3 * n) /* 3n+1 */
+#define r4 (pp + 7 * n) /* 3n+1 */
+#define r2 (pp +11 * n) /* 3n+1 */
+#define r0 (pp +15 * n) /* s+t <= 2*n */
+#define r7 (scratch) /* 3n+1 */
+#define r5 (scratch + 3 * n + 1) /* 3n+1 */
+#define r3 (scratch + 6 * n + 2) /* 3n+1 */
+#define r1 (scratch + 9 * n + 3) /* 3n+1 */
+#define v0 (pp +11 * n) /* n+1 */
+#define v2 (pp +13 * n+2) /* n+1 */
+#define wse (scratch +12 * n + 4) /* 3n+1 */
+
+ /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
+ need all of them, when DO_mpn_sublsh_n usea a scratch */
+/* if (scratch == NULL) */
+/* scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
+
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm1/8$ */
+ mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
+ /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+ TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
+ mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
+
+ /* $\pm1/4$ */
+ mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
+ /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+ TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
+ mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
+
+ /* $\pm2$ */
+ mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
+ /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+ TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
+ mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
+
+ /* $\pm8$ */
+ mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
+ /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+ TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
+ mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
+
+ /* $\pm1/2$ */
+ mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
+ /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+ TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
+ mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
+
+ /* $\pm1$ */
+ mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s, pp);
+ /* A(-1)*B(-1) */ /* A(1)*B(1) */
+ TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
+ mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
+
+ /* $\pm4$ */
+ mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
+ /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+ TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
+ mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
+
+#undef v0
+#undef v2
+
+ /* A(0)*B(0) */
+ TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
+
+ mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wse
+
+}
+
+#undef TOOM8_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom8h_mul.c b/vendor/gmp-6.3.0/mpn/generic/toom8h_mul.c
new file mode 100644
index 0000000..5ba259a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom8h_mul.c
@@ -0,0 +1,305 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22 1
+#define MAYBE_mul_toom33 1
+#define MAYBE_mul_toom44 1
+#define MAYBE_mul_toom8h 1
+#else
+#define MAYBE_mul_basecase \
+ (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22 \
+ (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33 \
+ (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom44 \
+ (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
+#define MAYBE_mul_toom8h \
+ (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
+#endif
+
+#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
+ do { \
+ if (MAYBE_mul_basecase \
+ && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
+ mpn_mul_basecase (p, a, n, b, n); \
+ if (f) mpn_mul_basecase (p2, a2, n, b2, n); \
+ } else if (MAYBE_mul_toom22 \
+ && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
+ mpn_toom22_mul (p, a, n, b, n, ws); \
+ if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \
+ } else if (MAYBE_mul_toom33 \
+ && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
+ mpn_toom33_mul (p, a, n, b, n, ws); \
+ if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \
+ } else if (MAYBE_mul_toom44 \
+ && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
+ mpn_toom44_mul (p, a, n, b, n, ws); \
+ if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \
+ } else if (! MAYBE_mul_toom8h \
+ || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \
+ mpn_toom6h_mul (p, a, n, b, n, ws); \
+ if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
+ } else { \
+ mpn_toom8h_mul (p, a, n, b, n, ws); \
+ if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \
+ } \
+ } while (0)
+
+#define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \
+ do { mpn_mul (p, a, na, b, nb); } while (0)
+
+/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+ With: an >= bn >= 86, an*5 < bn * 11.
+ It _may_ work with bn<=?? and bn*?? < an*? < bn*??
+
+ Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
+*/
+/* Estimate on needed scratch:
+ S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
+ since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
+ */
+
+void
+mpn_toom8h_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ int p, q, half;
+ int sign;
+
+ /***************************** decomposition *******************************/
+
+ ASSERT (an >= bn);
+ /* Can not handle too small operands */
+ ASSERT (bn >= 86);
+ /* Can not handle too much unbalancement */
+ ASSERT (an <= bn*4);
+ ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
+ ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
+ ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3);
+
+ /* Limit num/den is a rational number between
+ (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */
+#define LIMIT_numerator (21)
+#define LIMIT_denominat (20)
+
+ if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
+ {
+ half = 0;
+ n = 1 + ((an - 1)>>3);
+ p = q = 7;
+ s = an - 7 * n;
+ t = bn - 7 * n;
+ }
+ else
+ {
+ if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
+ { p = 9; q = 8; }
+ else if (GMP_NUMB_BITS <= 9*3 ||
+ an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
+ { p = 9; q = 7; }
+ else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
+ { p =10; q = 7; }
+ else if (GMP_NUMB_BITS <= 10*3 ||
+ an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
+ { p =10; q = 6; }
+ else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
+ { p =11; q = 6; }
+ else if (GMP_NUMB_BITS <= 11*3 ||
+ an * 4 < 9 * bn)
+ { p =11; q = 5; }
+ else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */
+ { p =12; q = 5; }
+ else if (GMP_NUMB_BITS <= 12*3 ||
+ an * 9 < 28 * bn ) /* is 4*... <12*... */
+ { p =12; q = 4; }
+ else
+ { p =13; q = 4; }
+
+ half = (p+q)&1;
+ n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+ p--; q--;
+
+ s = an - p * n;
+ t = bn - q * n;
+
+ if(half) { /* Recover from badly chosen splitting */
+ if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+ else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+ }
+ }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ ASSERT (half || s + t > 3);
+ ASSERT (n > 2);
+
+#define r6 (pp + 3 * n) /* 3n+1 */
+#define r4 (pp + 7 * n) /* 3n+1 */
+#define r2 (pp +11 * n) /* 3n+1 */
+#define r0 (pp +15 * n) /* s+t <= 2*n */
+#define r7 (scratch) /* 3n+1 */
+#define r5 (scratch + 3 * n + 1) /* 3n+1 */
+#define r3 (scratch + 6 * n + 2) /* 3n+1 */
+#define r1 (scratch + 9 * n + 3) /* 3n+1 */
+#define v0 (pp +11 * n) /* n+1 */
+#define v1 (pp +12 * n+1) /* n+1 */
+#define v2 (pp +13 * n+2) /* n+1 */
+#define v3 (scratch +12 * n + 4) /* n+1 */
+#define wsi (scratch +12 * n + 4) /* 3n+1 */
+#define wse (scratch +13 * n + 5) /* 2n+1 */
+
+ /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
+ need all of them */
+/* if (scratch == NULL) */
+/* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
+ ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
+ ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
+
+ /********************** evaluation and recursive calls *********************/
+
+ /* $\pm1/8$ */
+ sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
+ mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
+ /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
+
+ /* $\pm1/4$ */
+ sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+ mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+ /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+ /* $\pm2$ */
+ sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+ mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+ /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
+
+ /* $\pm8$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
+ mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
+ /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
+
+ /* $\pm1/2$ */
+ sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+ mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+ /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
+
+ /* $\pm1$ */
+ sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
+ if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
+ sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
+ else
+ sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
+ /* A(-1)*B(-1) */ /* A(1)*B(1) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
+
+ /* $\pm4$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+ mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+ /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
+ mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+ /* A(0)*B(0) */
+ TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
+
+ /* Infinity */
+ if (UNLIKELY (half != 0)) {
+ if (s > t) {
+ TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+ } else {
+ TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+ };
+ };
+
+ mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wsi
+}
+
+#undef TOOM8H_MUL_N_REC
+#undef TOOM8H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom44
+#undef MAYBE_mul_toom8h
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_couple_handling.c b/vendor/gmp-6.3.0/mpn/generic/toom_couple_handling.c
new file mode 100644
index 0000000..cd253f7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_couple_handling.c
@@ -0,0 +1,80 @@
+/* Helper function for high degree Toom-Cook algorithms.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:
+ {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}
+ {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}
+ Finally recompose them obtaining:
+ {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}
+*/
+void
+mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,
+ int nsign, mp_size_t off, int ps, int ns)
+{
+ if (nsign) {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (np, pp, np, n);
+#else
+ mpn_sub_n (np, pp, np, n);
+ mpn_rshift (np, np, n, 1);
+#endif
+ } else {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (np, pp, np, n);
+#else
+ mpn_add_n (np, pp, np, n);
+ mpn_rshift (np, np, n, 1);
+#endif
+ }
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ if (ps == 1)
+ mpn_rsh1sub_n (pp, pp, np, n);
+ else
+#endif
+ {
+ mpn_sub_n (pp, pp, np, n);
+ if (ps > 0)
+ mpn_rshift (pp, pp, n, ps);
+ }
+ if (ns > 0)
+ mpn_rshift (np, np, n, ns);
+ pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);
+ ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm1.c b/vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm1.c
new file mode 100644
index 0000000..5f491b6
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm1.c
@@ -0,0 +1,72 @@
+/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1
+
+ Contributed to the GNU project by Niels Möller
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+int
+mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,
+ mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+ int neg;
+
+ ASSERT (x3n > 0);
+ ASSERT (x3n <= n);
+
+ xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+ tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);
+
+ neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (neg)
+ mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+ else
+ mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+ if (neg)
+ mpn_sub_n (xm1, tp, xp1, n + 1);
+ else
+ mpn_sub_n (xm1, xp1, tp, n + 1);
+
+ mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+ ASSERT (xp1[n] <= 3);
+ ASSERT (xm1[n] <= 1);
+
+ return neg;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm2.c b/vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm2.c
new file mode 100644
index 0000000..55e6b89
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_eval_dgr3_pm2.c
@@ -0,0 +1,97 @@
+/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2
+
+ Contributed to the GNU project by Niels Möller
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Needs n+1 limbs of temporary storage. */
+int
+mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
+ mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+ mp_limb_t cy;
+ int neg;
+
+ ASSERT (x3n > 0);
+ ASSERT (x3n <= n);
+
+ /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+ xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);
+
+ cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+ xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);
+
+ cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
+#endif
+ if (x3n < n)
+ cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
+ tp[n] = cy;
+#else
+ cy = mpn_lshift (tp, xp + 2*n, n, 2);
+ xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);
+
+ tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
+ if (x3n < n)
+ tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
+ else
+ tp[n] += mpn_add_n (tp, xp + n, tp, n);
+#endif
+ mpn_lshift (tp, tp, n+1, 1);
+
+ neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (neg)
+ mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+ else
+ mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else
+ if (neg)
+ mpn_sub_n (xm2, tp, xp2, n + 1);
+ else
+ mpn_sub_n (xm2, xp2, tp, n + 1);
+
+ mpn_add_n (xp2, xp2, tp, n + 1);
+#endif
+
+ ASSERT (xp2[n] < 15);
+ ASSERT (xm2[n] < 10);
+
+ return neg;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm1.c b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm1.c
new file mode 100644
index 0000000..a8cfa93
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm1.c
@@ -0,0 +1,89 @@
+/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
+
+ Contributed to the GNU project by Niels Möller
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
+int
+mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
+ mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+ unsigned i;
+ int neg;
+
+ ASSERT (k >= 4);
+
+ ASSERT (hn > 0);
+ ASSERT (hn <= n);
+
+ /* The degree k is also the number of full-size coefficients, so
+ * that last coefficient, of size hn, starts at xp + k*n. */
+
+ xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+ for (i = 4; i < k; i += 2)
+ ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
+
+ tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
+ for (i = 5; i < k; i += 2)
+ ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
+
+ if (k & 1)
+ ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
+ else
+ ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
+
+ neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (neg)
+ mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+ else
+ mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+ if (neg)
+ mpn_sub_n (xm1, tp, xp1, n + 1);
+ else
+ mpn_sub_n (xm1, xp1, tp, n + 1);
+
+ mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+ ASSERT (xp1[n] <= k);
+ ASSERT (xm1[n] <= k/2 + 1);
+
+ return neg;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2.c b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2.c
new file mode 100644
index 0000000..be682c7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2.c
@@ -0,0 +1,130 @@
+/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
+
+ Contributed to the GNU project by Niels Möller and Marco Bodrato
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
+ can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
+#if HAVE_NATIVE_mpn_addlsh2_n
+#define DO_addlsh2(d, a, b, n, cy) \
+do { \
+ (cy) <<= 2; \
+ (cy) += mpn_addlsh2_n(d, a, b, n); \
+} while (0)
+#else
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_addlsh2(d, a, b, n, cy) \
+do { \
+ (cy) <<= 2; \
+ (cy) += mpn_addlsh_n(d, a, b, n, 2); \
+} while (0)
+#else
+/* The following is not a general substitute for addlsh2.
+ It is correct if d == b, but it is not if d == a. */
+#define DO_addlsh2(d, a, b, n, cy) \
+do { \
+ (cy) <<= 2; \
+ (cy) += mpn_lshift(d, b, n, 2); \
+ (cy) += mpn_add_n(d, d, a, n); \
+} while (0)
+#endif
+#endif
+
+/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
+ points +2 and -2. */
+int
+mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
+ mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+ int i;
+ int neg;
+ mp_limb_t cy;
+
+ ASSERT (k >= 3);
+ ASSERT (k < GMP_NUMB_BITS);
+
+ ASSERT (hn > 0);
+ ASSERT (hn <= n);
+
+ /* The degree k is also the number of full-size coefficients, so
+ * that last coefficient, of size hn, starts at xp + k*n. */
+
+ cy = 0;
+ DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
+ if (hn != n)
+ cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
+ for (i = k - 4; i >= 0; i -= 2)
+ DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
+ xp2[n] = cy;
+
+ k--;
+
+ cy = 0;
+ DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
+ for (i = k - 4; i >= 0; i -= 2)
+ DO_addlsh2 (tp, xp + i * n, tp, n, cy);
+ tp[n] = cy;
+
+ if (k & 1)
+ ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
+ else
+ ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
+
+ neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (neg)
+ mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+ else
+ mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+ if (neg)
+ mpn_sub_n (xm2, tp, xp2, n + 1);
+ else
+ mpn_sub_n (xm2, xp2, tp, n + 1);
+
+ mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+ ASSERT (xp2[n] < (1<<(k+2))-1);
+ ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
+
+ neg ^= ((k & 1) - 1);
+
+ return neg;
+}
+
+#undef DO_addlsh2
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2exp.c b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2exp.c
new file mode 100644
index 0000000..c3c4651
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2exp.c
@@ -0,0 +1,127 @@
+/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k
+
+ Contributed to the GNU project by Niels Möller
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
+int
+mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
+ mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
+ mp_ptr tp)
+{
+ unsigned i;
+ int neg;
+#if HAVE_NATIVE_mpn_addlsh_n
+ mp_limb_t cy;
+#endif
+
+ ASSERT (k >= 3);
+ ASSERT (shift*k < GMP_NUMB_BITS);
+
+ ASSERT (hn > 0);
+ ASSERT (hn <= n);
+
+ /* The degree k is also the number of full-size coefficients, so
+ * that last coefficient, of size hn, starts at xp + k*n. */
+
+#if HAVE_NATIVE_mpn_addlsh_n
+ xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
+ for (i = 4; i < k; i += 2)
+ xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);
+
+ tp[n] = mpn_lshift (tp, xp+n, n, shift);
+ for (i = 3; i < k; i+= 2)
+ tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);
+
+ if (k & 1)
+ {
+ cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
+ MPN_INCR_U (tp + hn, n+1 - hn, cy);
+ }
+ else
+ {
+ cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
+ MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
+ }
+
+#else /* !HAVE_NATIVE_mpn_addlsh_n */
+ xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
+ xp2[n] += mpn_add_n (xp2, xp, tp, n);
+ for (i = 4; i < k; i += 2)
+ {
+ xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
+ xp2[n] += mpn_add_n (xp2, xp2, tp, n);
+ }
+
+ tp[n] = mpn_lshift (tp, xp+n, n, shift);
+ for (i = 3; i < k; i+= 2)
+ {
+ tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
+ tp[n] += mpn_add_n (tp, tp, xm2, n);
+ }
+
+ xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
+ if (k & 1)
+ mpn_add (tp, tp, n+1, xm2, hn+1);
+ else
+ mpn_add (xp2, xp2, n+1, xm2, hn+1);
+#endif /* !HAVE_NATIVE_mpn_addlsh_n */
+
+ neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (neg)
+ mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+ else
+ mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+ if (neg)
+ mpn_sub_n (xm2, tp, xp2, n + 1);
+ else
+ mpn_sub_n (xm2, xp2, tp, n + 1);
+
+ mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+ /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
+ ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
+ xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
+ ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
+ xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));
+
+ return neg;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2rexp.c b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2rexp.c
new file mode 100644
index 0000000..6cd62fb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_eval_pm2rexp.c
@@ -0,0 +1,101 @@
+/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k
+
+ Contributed to the GNU project by Marco Bodrato
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+ return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+ mp_limb_t __cy;
+ __cy = mpn_lshift(ws,src,n,s);
+ return __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+/* Evaluates a polynomial of degree k >= 3. */
+int
+mpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,
+ unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,
+ unsigned int s, mp_ptr ws)
+{
+ unsigned int i;
+ int neg;
+ /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/
+ ASSERT (n >= t);
+ ASSERT (s != 0); /* or _eval_pm1 should be used */
+ ASSERT (q > 1);
+ ASSERT (s*q < GMP_NUMB_BITS);
+ rp[n] = mpn_lshift(rp, ap, n, s*q);
+ ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));
+ if( (q & 1) != 0) {
+ ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));
+ rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);
+ } else {
+ ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));
+ }
+ for(i=2; i<q-1; i++)
+ {
+ rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);
+ i++;
+ ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);
+ };
+
+ neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (neg)
+ mpn_add_n_sub_n (rp, rm, ws, rp, n + 1);
+ else
+ mpn_add_n_sub_n (rp, rm, rp, ws, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+ if (neg)
+ mpn_sub_n (rm, ws, rp, n + 1);
+ else
+ mpn_sub_n (rm, rp, ws, n + 1);
+
+ ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+ return neg;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_12pts.c b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_12pts.c
new file mode 100644
index 0000000..6273466
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_12pts.c
@@ -0,0 +1,374 @@
+/* Interpolation for the algorithm Toom-Cook 6.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2015, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented: Both sublsh_n(,,,20) should be corrected.
+#endif
+
+#if GMP_NUMB_BITS < 16
+#error Not implemented: divexact_by42525 needs splitting.
+#endif
+
+#if GMP_NUMB_BITS < 12
+#error Not implemented: Hard to adapt...
+#endif
+
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+ return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+ mp_limb_t __cy;
+ __cy = mpn_lshift(ws,src,n,s);
+ return __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+#if !defined (AORSMUL_FASTER_2AORSLSH) && !defined (AORSMUL_FASTER_AORS_2AORSLSH)
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+ return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+ mp_limb_t __cy;
+ __cy = mpn_lshift(ws,src,n,s);
+ return __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
+do { \
+ mp_limb_t __cy; \
+ MPN_DECR_U (dst, nd, src[0] >> s); \
+ __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
+ MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
+} while (0)
+#endif
+
+
+#define BINVERT_9 \
+ ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+ (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+ /* FIXME: find some more general expressions for 2835^-1, 42525^-1 */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35))
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+ (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,2)
+#else
+#define mpn_divexact_by9x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,0)
+#else
+#define mpn_divexact_by42525(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525))
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,2)
+#else
+#define mpn_divexact_by2835x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<2)
+#endif
+#endif
+
+/* Interpolation for Toom-6.5 (or Toom-6), using the evaluation
+ points: infinity(6.5 only), +-4, +-2, +-1, +-1/4, +-1/2, 0. More precisely,
+ we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+ degree 11 (or 10), given the 12 (rsp. 11) values:
+
+ r0 = limit at infinity of f(x) / x^11,
+ r1 = f(4),f(-4),
+ r2 = f(2),f(-2),
+ r3 = f(1),f(-1),
+ r4 = f(1/4),f(-1/4),
+ r5 = f(1/2),f(-1/2),
+ r6 = f(0).
+
+ All couples of the form f(n),f(-n) must be already mixed with
+ toom_couple_handling(f(n),...,f(-n),...)
+
+ The result is stored in {pp, spt + 7*n (or 6*n)}.
+ At entry, r6 is stored at {pp, 2n},
+ r4 is stored at {pp + 3n, 3n + 1}.
+ r2 is stored at {pp + 7n, 3n + 1}.
+ r0 is stored at {pp +11n, spt}.
+
+ The other values are 3n+1 limbs each (with most significant limbs small).
+
+ Negative intermediate results are stored two-complemented.
+ Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
+ mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+ mp_limb_t cy;
+ mp_size_t n3;
+ mp_size_t n3p1;
+ n3 = 3 * n;
+ n3p1 = n3 + 1;
+
+#define r4 (pp + n3) /* 3n+1 */
+#define r2 (pp + 7 * n) /* 3n+1 */
+#define r0 (pp +11 * n) /* s+t <= 2*n */
+
+ /******************************* interpolation *****************************/
+ if (half != 0) {
+ cy = mpn_sub_n (r3, r3, r0, spt);
+ MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+
+ cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
+ MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+ DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);
+
+ cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
+ MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
+ DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
+ };
+
+ r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
+ DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
+#else
+ ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
+ mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
+ MP_PTR_SWAP(r1, wsi);
+#endif
+
+ r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
+ DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+ mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+ ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+ MP_PTR_SWAP(r5, wsi);
+#endif
+
+ r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+ mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
+#else
+ mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
+ DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
+#endif
+ /* A division by 2835x4 follows. Warning: the operand can be negative! */
+ mpn_divexact_by2835x4(r4, r4, n3p1);
+ if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+ r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+#if AORSMUL_FASTER_2AORSLSH
+ mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
+#else
+ DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
+ DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
+#endif
+ mpn_divexact_by255(r5, r5, n3p1);
+
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));
+
+#if AORSMUL_FASTER_3AORSLSH
+ ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
+#else
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
+#endif
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
+ mpn_divexact_by42525(r1, r1, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+ ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
+#else
+ ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
+ ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
+#endif
+ mpn_divexact_by9x4(r2, r2, n3p1);
+
+ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (r4, r2, r4, n3p1);
+ r4 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+ mpn_sub_n (r4, r2, r4, n3p1);
+ ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
+#endif
+ ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (r5, r5, r1, n3p1);
+ r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+ mpn_add_n (r5, r5, r1, n3p1);
+ ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+#endif
+
+ /* last interpolation steps... */
+ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+ ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
+ /* ... could be mixed with recomposition
+ ||H-r5|M-r5|L-r5| ||H-r1|M-r1|L-r1|
+ */
+
+ /***************************** recomposition *******************************/
+ /*
+ pp[] prior to operations:
+ |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+
+ summation scheme for remaining operations:
+ |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+ |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+ ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5|
+ */
+
+ cy = mpn_add_n (pp + n, pp + n, r5, n);
+ cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+ cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
+#else
+ MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+ cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
+#endif
+ MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);
+
+ pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
+ cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+ cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
+#else
+ MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+ cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
+#endif
+ MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+ pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
+ if (half) {
+ cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+ if (LIKELY (spt > n)) {
+ cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
+ MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+ } else {
+ ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
+ }
+#else
+ MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+ if (LIKELY (spt > n)) {
+ cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
+ MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+ } else {
+ ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
+ }
+#endif
+ } else {
+ ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
+ }
+
+#undef r0
+#undef r2
+#undef r4
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_16pts.c b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_16pts.c
new file mode 100644
index 0000000..c1457be
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_16pts.c
@@ -0,0 +1,545 @@
+/* Interpolation for the algorithm Toom-Cook 8.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2015, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
+#endif
+
+#if GMP_NUMB_BITS < 28
+#error Not implemented: divexact_by188513325 and _by182712915 will not work.
+#endif
+
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+ return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+ mp_limb_t __cy;
+ __cy = mpn_lshift(ws,src,n,s);
+ return __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+#if !defined (AORSMUL_FASTER_2AORSLSH) && !defined (AORSMUL_FASTER_AORS_2AORSLSH)
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+ return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+ mp_limb_t __cy;
+ __cy = mpn_lshift(ws,src,n,s);
+ return __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
+do { \
+ mp_limb_t __cy; \
+ MPN_DECR_U (dst, nd, src[0] >> s); \
+ __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
+ MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
+} while (0)
+#endif
+
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#define BINVERT_9 \
+ ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+ (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+ /* FIXME: find some more general expressions for inverses */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35))
+#define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB))
+#define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5))
+#define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25))
+#define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B))
+#if GMP_NAIL_BITS == 0
+#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
+#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
+#else /* GMP_NAIL_BITS != 0 */
+#define BINVERT_255x182712915H \
+ (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
+#define BINVERT_255x188513325H \
+ (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
+#endif
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35))
+#define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25))
+#define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+ (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by255x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
+#else
+#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
+#else
+#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
+#else
+#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x64
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
+#else
+#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
+#endif
+#endif
+
+#ifndef mpn_divexact_by255x182712915
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
+/* FIXME: use mpn_bdiv_q_2_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
+#define mpn_divexact_by255x182712915(dst,src,size) \
+ do { \
+ mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \
+ mpn_divexact_by255(dst,dst,size); \
+ } while(0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size) \
+ do { \
+ mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \
+ mpn_divexact_by255(dst,dst,size); \
+ } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
+#define mpn_divexact_by255x182712915(dst,src,size) \
+ mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+#ifndef mpn_divexact_by255x188513325
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
+/* FIXME: use mpn_bdiv_q_1_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
+#define mpn_divexact_by255x188513325(dst,src,size) \
+ do { \
+ mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \
+ mpn_divexact_by255(dst,dst,size); \
+ } while(0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size) \
+ do { \
+ mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \
+ mpn_divexact_by255(dst,dst,size); \
+ } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
+#define mpn_divexact_by255x188513325(dst,src,size) \
+ mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
+ points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
+ +-1/8, 0. More precisely, we want to compute
+ f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
+ 14), given the 16 (rsp. 15) values:
+
+ r0 = limit at infinity of f(x) / x^15,
+ r1 = f(8),f(-8),
+ r2 = f(4),f(-4),
+ r3 = f(2),f(-2),
+ r4 = f(1),f(-1),
+ r5 = f(1/4),f(-1/4),
+ r6 = f(1/2),f(-1/2),
+ r7 = f(1/8),f(-1/8),
+ r8 = f(0).
+
+ All couples of the form f(n),f(-n) must be already mixed with
+ toom_couple_handling(f(n),...,f(-n),...)
+
+ The result is stored in {pp, spt + 7*n (or 8*n)}.
+ At entry, r8 is stored at {pp, 2n},
+ r6 is stored at {pp + 3n, 3n + 1}.
+ r4 is stored at {pp + 7n, 3n + 1}.
+ r2 is stored at {pp +11n, 3n + 1}.
+ r0 is stored at {pp +15n, spt}.
+
+ The other values are 3n+1 limbs each (with most significant limbs small).
+
+ Negative intermediate results are stored two-complemented.
+ Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
+ mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+ mp_limb_t cy;
+ mp_size_t n3;
+ mp_size_t n3p1;
+ n3 = 3 * n;
+ n3p1 = n3 + 1;
+
+#define r6 (pp + n3) /* 3n+1 */
+#define r4 (pp + 7 * n) /* 3n+1 */
+#define r2 (pp +11 * n) /* 3n+1 */
+#define r0 (pp +15 * n) /* s+t <= 2*n */
+
+ ASSERT( spt <= 2 * n );
+ /******************************* interpolation *****************************/
+ if( half != 0) {
+ cy = mpn_sub_n (r4, r4, r0, spt);
+ MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
+
+ cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
+ MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+ DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
+
+ cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
+ MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+ DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
+
+ cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+ cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
+ n3p1 - spt - BIT_CORRECTION, cy);
+ ASSERT (BIT_CORRECTION > 0 || cy == 0);
+ /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
+ cy = r7[n3p1];
+ r7[n3p1] = 0x80;
+#else
+ MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
+#endif
+ DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
+#if BIT_CORRECTION
+ /* FIXME: assumes r7[n3p1] is writable. */
+ ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
+ r7[n3p1] = cy;
+#endif
+ };
+
+ r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
+ DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+ mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+ ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+ MP_PTR_SWAP(r5, wsi);
+#endif
+
+ r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
+ DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
+#else
+ ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
+ mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
+ MP_PTR_SWAP(r3, wsi);
+#endif
+
+ cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+ MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
+ cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
+ cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
+ ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
+#else
+ r7[n3] -= cy;
+ DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
+#endif
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
+#else
+ mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
+ mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */
+ MP_PTR_SWAP(r7, wsi);
+#endif
+
+ r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_2AORSLSH
+ mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
+#else
+ DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
+ DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
+#endif
+
+ mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
+#if AORSMUL_FASTER_3AORSLSH
+ mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
+#else
+ DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
+ DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
+ DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
+#endif
+ mpn_divexact_by255x188513325(r7, r7, n3p1);
+
+ mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
+ /* A division by 2835x64 follows. Warning: the operand can be negative! */
+ mpn_divexact_by2835x64(r5, r5, n3p1);
+ if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
+ r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+ mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
+#else
+ mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
+ DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
+#endif
+#if AORSMUL_FASTER_2AORSLSH
+ mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
+#else
+ DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
+ DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
+#endif
+ /* A division by 255x4 follows. Warning: the operand can be negative! */
+ mpn_divexact_by255x4(r6, r6, n3p1);
+ if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+ r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
+
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
+ ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
+
+ /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
+ DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
+ mpn_submul_1 (r1, r2, n3p1, 1428);
+ mpn_submul_1 (r1, r3, n3p1, 112896);
+ mpn_divexact_by255x182712915(r1, r1, n3p1);
+
+ ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
+ mpn_divexact_by42525x16(r2, r2, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+ ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
+#else
+ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+ ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
+ ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
+#endif
+ ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
+ mpn_divexact_by9x16(r3, r3, n3p1);
+
+ ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
+ ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
+ ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (r6, r2, r6, n3p1);
+ r6 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+ mpn_add_n (r6, r2, r6, n3p1);
+ ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
+#endif
+ ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (r5, r3, r5, n3p1);
+ r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+ mpn_sub_n (r5, r3, r5, n3p1);
+ ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+#endif
+ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (r7, r1, r7, n3p1);
+ r7 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+ mpn_add_n (r7, r1, r7, n3p1);
+ ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
+#endif
+ ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
+
+ /* last interpolation steps... */
+ /* ... could be mixed with recomposition
+ ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5|
+ */
+
+ /***************************** recomposition *******************************/
+ /*
+ pp[] prior to operations:
+ |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+
+ summation scheme for remaining operations:
+ |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+ |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+ ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7|
+ */
+
+ cy = mpn_add_n (pp + n, pp + n, r7, n);
+ cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+ cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
+#else
+ MPN_INCR_U (r7 + 2 * n, n + 1, cy);
+ cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
+#endif
+ MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
+
+ pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
+ cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+ cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
+#else
+ MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+ cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
+#endif
+ MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+ pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
+ cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+ cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
+#else
+ MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+ cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
+#endif
+ MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
+
+ pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
+ if ( half ) {
+ cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+ if(LIKELY(spt > n)) {
+ cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
+ MPN_INCR_U (pp + 16 * n, spt - n, cy);
+ } else {
+ ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
+ }
+#else
+ MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+ if(LIKELY(spt > n)) {
+ cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
+ MPN_INCR_U (pp + 16 * n, spt - n, cy);
+ } else {
+ ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
+ }
+#endif
+ } else {
+ ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
+ }
+
+#undef r0
+#undef r2
+#undef r4
+#undef r6
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_5pts.c b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_5pts.c
new file mode 100644
index 0000000..466ab85
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_5pts.c
@@ -0,0 +1,198 @@
+/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
+
+ Contributed to the GNU project by Robert Harley.
+ Improvements by Paul Zimmermann and Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
+ mp_size_t k, mp_size_t twor, int sa,
+ mp_limb_t vinf0)
+{
+ mp_limb_t cy, saved;
+ mp_size_t twok;
+ mp_size_t kk1;
+ mp_ptr c1, v1, c3, vinf;
+
+ twok = k + k;
+ kk1 = twok + 1;
+
+ c1 = c + k;
+ v1 = c1 + k;
+ c3 = v1 + k;
+ vinf = c3 + k;
+
+#define v0 (c)
+ /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) =
+ thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0)
+ */
+ if (sa)
+ ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
+ else
+ ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
+
+ /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+ v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */
+
+ ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */
+ /* (5 3 1 1 0)*/
+
+ /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+ v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */
+
+ /* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+ tm1 >= 0 (0 1 0 1 0)
+ No carry comes out from {v1, kk1} +/- {vm1, kk1},
+ and the division by two is exact.
+ If (sa!=0) the sign of vm1 is negative */
+ if (sa)
+ {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (vm1, v1, vm1, kk1);
+#else
+ ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
+ ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+ }
+ else
+ {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (vm1, v1, vm1, kk1);
+#else
+ ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
+ ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+ }
+
+ /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+ v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
+
+ /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
+ t1 >= 0
+ */
+ vinf[0] -= mpn_sub_n (v1, v1, c, twok);
+
+ /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+ v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
+
+ /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
+ t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
+ */
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (v2, v2, v1, kk1);
+#else
+ ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
+ ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
+#endif
+
+ /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+ v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */
+
+ /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
+ result is v1 >= 0
+ */
+ ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
+
+ /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
+ cy = mpn_add_n (c1, c1, vm1, kk1);
+ MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
+ /* Memory allocated for vm1 is now free, it can be recycled ...*/
+
+ /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
+ result is v2 >= 0 */
+ saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */
+ vinf[0] = vinf0; /* Set the right value for vinf0 */
+#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
+ cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
+#else
+ /* Overwrite unused vm1 */
+ cy = mpn_lshift (vm1, vinf, twor, 1);
+ cy += mpn_sub_n (v2, v2, vm1, twor);
+#endif
+ MPN_DECR_U (v2 + twor, kk1 - twor, cy);
+
+ /* Current matrix is
+ [1 0 0 0 0; vinf
+ 0 1 0 0 0; v2
+ 1 0 1 0 0; v1
+ 0 1 0 1 0; vm1
+ 0 0 0 0 1] v0
+ Some values already are in-place (we added vm1 in the correct position)
+ | vinf| v1 | v0 |
+ | vm1 |
+ One still is in a separated area
+ | +v2 |
+ We have to compute v1-=vinf; vm1 -= v2,
+ |-vinf|
+ | -v2 |
+ Carefully reordering operations we can avoid to compute twice the sum
+ of the high half of v2 plus the low half of vinf.
+ */
+
+ /* Add the high half of t2 in {vinf} */
+ if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */
+ cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
+ MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+ } else { /* triggered only by very unbalanced cases like
+ (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
+ ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
+ }
+ /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
+ result is >= 0 */
+ /* Side effect: we also subtracted (high half) vm1 -= v2 */
+ cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */
+ vinf0 = vinf[0]; /* Save again the right value for vinf0 */
+ vinf[0] = saved;
+ MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */
+
+ /* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+ Operate only on the low half.
+ */
+ cy = mpn_sub_n (c1, c1, v2, k);
+ MPN_DECR_U (v1, kk1, cy);
+
+ /********************* Beginning the final phase **********************/
+
+ /* Most of the recomposition was done */
+
+ /* add t2 in {c+3k, ...}, but only the low half */
+ cy = mpn_add_n (c3, c3, v2, k);
+ vinf[0] += cy;
+ ASSERT(vinf[0] >= cy); /* No carry */
+ MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+
+#undef v0
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_6pts.c b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_6pts.c
new file mode 100644
index 0000000..eb23661
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_6pts.c
@@ -0,0 +1,241 @@
+/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+/* Interpolation for Toom-3.5, using the evaluation points: infinity,
+ 1, -1, 2, -2. More precisely, we want to compute
+ f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
+ six values
+
+ w5 = f(0),
+ w4 = f(-1),
+ w3 = f(1)
+ w2 = f(-2),
+ w1 = f(2),
+ w0 = limit at infinity of f(x) / x^5,
+
+ The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
+ {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
+ {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
+ significant limbs small). f(-1) and f(-2) may be negative, signs
+ determined by the flag bits. All intermediate results are positive.
+ Inputs are destroyed.
+
+ Interpolation sequence was taken from the paper: "Integer and
+ Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
+ Some slight variations were introduced: adaptation to "gmp
+ instruction set", and a final saving of an operation by interlacing
+ interpolation and recomposition phases.
+*/
+
+void
+mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
+ mp_ptr w4, mp_ptr w2, mp_ptr w1,
+ mp_size_t w0n)
+{
+ mp_limb_t cy;
+ /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
+ mp_limb_t cy4, cy6, embankment;
+
+ ASSERT( n > 0 );
+ ASSERT( 2*n >= w0n && w0n > 0 );
+
+#define w5 pp /* 2n */
+#define w3 (pp + 2 * n) /* 2n+1 */
+#define w0 (pp + 5 * n) /* w0n */
+
+ /* Interpolate with sequence:
+ W2 =(W1 - W2)>>2
+ W1 =(W1 - W5)>>1
+ W1 =(W1 - W2)>>1
+ W4 =(W3 - W4)>>1
+ W2 =(W2 - W4)/3
+ W3 = W3 - W4 - W5
+ W1 =(W1 - W3)/3
+ // Last steps are mixed with recomposition...
+ W2 = W2 - W0<<2
+ W4 = W4 - W2
+ W3 = W3 - W1
+ W2 = W2 - W0
+ */
+
+ /* W2 =(W1 - W2)>>2 */
+ if (flags & toom6_vm2_neg)
+ mpn_add_n (w2, w1, w2, 2 * n + 1);
+ else
+ mpn_sub_n (w2, w1, w2, 2 * n + 1);
+ mpn_rshift (w2, w2, 2 * n + 1, 2);
+
+ /* W1 =(W1 - W5)>>1 */
+ w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
+ mpn_rshift (w1, w1, 2 * n + 1, 1);
+
+ /* W1 =(W1 - W2)>>1 */
+#if HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
+#else
+ mpn_sub_n (w1, w1, w2, 2 * n + 1);
+ mpn_rshift (w1, w1, 2 * n + 1, 1);
+#endif
+
+ /* W4 =(W3 - W4)>>1 */
+ if (flags & toom6_vm1_neg)
+ {
+#if HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
+#else
+ mpn_add_n (w4, w3, w4, 2 * n + 1);
+ mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
+#else
+ mpn_sub_n (w4, w3, w4, 2 * n + 1);
+ mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+ }
+
+ /* W2 =(W2 - W4)/3 */
+ mpn_sub_n (w2, w2, w4, 2 * n + 1);
+ mpn_divexact_by3 (w2, w2, 2 * n + 1);
+
+ /* W3 = W3 - W4 - W5 */
+ mpn_sub_n (w3, w3, w4, 2 * n + 1);
+ w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
+
+ /* W1 =(W1 - W3)/3 */
+ mpn_sub_n (w1, w1, w3, 2 * n + 1);
+ mpn_divexact_by3 (w1, w1, 2 * n + 1);
+
+ /*
+ [1 0 0 0 0 0;
+ 0 1 0 0 0 0;
+ 1 0 1 0 0 0;
+ 0 1 0 1 0 0;
+ 1 0 1 0 1 0;
+ 0 0 0 0 0 1]
+
+ pp[] prior to operations:
+ |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+
+ summation scheme for remaining operations:
+ |______________5|n_____4|n_____3|n_____2|n______|n______|pp
+ |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+ || H w4 | L w4 |
+ || H w2 | L w2 |
+ || H w1 | L w1 |
+ ||-H w1 |-L w1 |
+ |-H w0 |-L w0 ||-H w2 |-L w2 |
+ */
+ cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
+ MPN_INCR_U (pp + 3 * n + 1, n, cy);
+
+ /* W2 -= W0<<2 */
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+ cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
+#else
+ cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
+#endif
+#else
+ /* {W4,2*n+1} is now free and can be overwritten. */
+ cy = mpn_lshift(w4, w0, w0n, 2);
+ cy+= mpn_sub_n(w2, w2, w4, w0n);
+#endif
+ MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
+
+ /* W4L = W4L - W2L */
+ cy = mpn_sub_n (pp + n, pp + n, w2, n);
+ MPN_DECR_U (w3, 2 * n + 1, cy);
+
+ /* W3H = W3H + W2L */
+ cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
+ /* W1L + W2H */
+ cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
+ MPN_INCR_U (w1 + n, n + 1, cy);
+
+ /* W0 = W0 + W1H */
+ if (LIKELY (w0n > n))
+ cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
+ else
+ cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
+
+ /*
+ summation scheme for the next operation:
+ |...____5|n_____4|n_____3|n_____2|n______|n______|pp
+ |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
+ ...-w0___|-w1_w2 |
+ */
+ /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
+ cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
+
+ /* embankment is a "dirty trick" to avoid carry/borrow propagation
+ beyond allocated memory */
+ embankment = w0[w0n - 1] - 1;
+ w0[w0n - 1] = 1;
+ if (LIKELY (w0n > n)) {
+ if (cy4 > cy6)
+ MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
+ else
+ MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
+ MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
+ MPN_INCR_U (w0 + n, w0n - n, cy6);
+ } else {
+ MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
+ MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
+ }
+ w0[w0n - 1] += embankment;
+
+#undef w5
+#undef w3
+#undef w0
+
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_7pts.c b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_7pts.c
new file mode 100644
index 0000000..167c45b
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_7pts.c
@@ -0,0 +1,274 @@
+/* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
+
+ Contributed to the GNU project by Niels Möller.
+ Improvements by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2009, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_9 \
+ ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_15 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+/* For the various mpn_divexact_byN here, fall back to using either
+ mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is
+ many faster if it is native. For now, since mpn_divexact_1 is native on
+ several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+ mpn_pi1_bdiv_q_1 unconditionally. FIXME. */
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)
+#else
+#define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
+#endif
+#endif
+
+#ifndef mpn_divexact_by15
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)
+#else
+#define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
+#endif
+#endif
+
+/* Interpolation for toom4, using the evaluation points 0, infinity,
+ 1, -1, 2, -2, 1/2. More precisely, we want to compute
+ f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
+ seven values
+
+ w0 = f(0),
+ w1 = f(-2),
+ w2 = f(1),
+ w3 = f(-1),
+ w4 = f(2)
+ w5 = 64 * f(1/2)
+ w6 = limit at infinity of f(x) / x^6,
+
+ The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
+ w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
+ w6n }. The other values are 2n + 1 limbs each (with most
+ significant limbs small). f(-1) and f(-1/2) may be negative, signs
+ determined by the flag bits. Inputs are destroyed.
+
+ Needs (2*n + 1) limbs of temporary storage.
+*/
+
+void
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
+ mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
+ mp_size_t w6n, mp_ptr tp)
+{
+ mp_size_t m;
+ mp_limb_t cy;
+
+ m = 2*n + 1;
+#define w0 rp
+#define w2 (rp + 2*n)
+#define w6 (rp + 6*n)
+
+ ASSERT (w6n > 0);
+ ASSERT (w6n <= 2*n);
+
+ /* Using formulas similar to Marco Bodrato's
+
+ W5 = W5 + W4
+ W1 =(W4 - W1)/2
+ W4 = W4 - W0
+ W4 =(W4 - W1)/4 - W6*16
+ W3 =(W2 - W3)/2
+ W2 = W2 - W3
+
+ W5 = W5 - W2*65 May be negative.
+ W2 = W2 - W6 - W0
+ W5 =(W5 + W2*45)/2 Now >= 0 again.
+ W4 =(W4 - W2)/3
+ W2 = W2 - W4
+
+ W1 = W5 - W1 May be negative.
+ W5 =(W5 - W3*8)/9
+ W3 = W3 - W5
+ W1 =(W1/15 + W5)/2 Now >= 0 again.
+ W5 = W5 - W1
+
+ where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
+ W4 = f(2), W5 = f(1/2), W6 = f(oo),
+
+ Note that most intermediate results are positive; the ones that
+ may be negative are represented in two's complement. We must
+ never shift right a value that may be negative, since that would
+ invalidate the sign bit. On the other hand, divexact by odd
+ numbers work fine with two's complement.
+ */
+
+ mpn_add_n (w5, w5, w4, m);
+ if (flags & toom7_w1_neg)
+ {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (w1, w1, w4, m);
+#else
+ mpn_add_n (w1, w1, w4, m); ASSERT (!(w1[0] & 1));
+ mpn_rshift (w1, w1, m, 1);
+#endif
+ }
+ else
+ {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (w1, w4, w1, m);
+#else
+ mpn_sub_n (w1, w4, w1, m); ASSERT (!(w1[0] & 1));
+ mpn_rshift (w1, w1, m, 1);
+#endif
+ }
+ mpn_sub (w4, w4, m, w0, 2*n);
+ mpn_sub_n (w4, w4, w1, m); ASSERT (!(w4[0] & 3));
+ mpn_rshift (w4, w4, m, 2); /* w4>=0 */
+
+ tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
+ mpn_sub (w4, w4, m, tp, w6n+1);
+
+ if (flags & toom7_w3_neg)
+ {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (w3, w3, w2, m);
+#else
+ mpn_add_n (w3, w3, w2, m); ASSERT (!(w3[0] & 1));
+ mpn_rshift (w3, w3, m, 1);
+#endif
+ }
+ else
+ {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (w3, w2, w3, m);
+#else
+ mpn_sub_n (w3, w2, w3, m); ASSERT (!(w3[0] & 1));
+ mpn_rshift (w3, w3, m, 1);
+#endif
+ }
+
+ mpn_sub_n (w2, w2, w3, m);
+
+ mpn_submul_1 (w5, w2, m, 65);
+ mpn_sub (w2, w2, m, w6, w6n);
+ mpn_sub (w2, w2, m, w0, 2*n);
+
+ mpn_addmul_1 (w5, w2, m, 45); ASSERT (!(w5[0] & 1));
+ mpn_rshift (w5, w5, m, 1);
+ mpn_sub_n (w4, w4, w2, m);
+
+ mpn_divexact_by3 (w4, w4, m);
+ mpn_sub_n (w2, w2, w4, m);
+
+ mpn_sub_n (w1, w5, w1, m);
+ mpn_lshift (tp, w3, m, 3);
+ mpn_sub_n (w5, w5, tp, m);
+ mpn_divexact_by9 (w5, w5, m);
+ mpn_sub_n (w3, w3, w5, m);
+
+ mpn_divexact_by15 (w1, w1, m);
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+ mpn_rsh1add_n (w1, w1, w5, m);
+ w1[m - 1] &= GMP_NUMB_MASK >> 1;
+#else
+ mpn_add_n (w1, w1, w5, m); ASSERT (!(w1[0] & 1));
+ mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
+#endif
+
+ mpn_sub_n (w5, w5, w1, m);
+
+ /* These bounds are valid for the 4x4 polynomial product of toom44,
+ * and they are conservative for toom53 and toom62. */
+ ASSERT (w1[2*n] < 2);
+ ASSERT (w2[2*n] < 3);
+ ASSERT (w3[2*n] < 4);
+ ASSERT (w4[2*n] < 3);
+ ASSERT (w5[2*n] < 2);
+
+ /* Addition chain. Note carries and the 2n'th limbs that need to be
+ * added in.
+ *
+ * Special care is needed for w2[2n] and the corresponding carry,
+ * since the "simple" way of adding it all together would overwrite
+ * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
+ * the high half of w3 and the low half of w4.
+ *
+ * 7 6 5 4 3 2 1 0
+ * | | | | | | | | |
+ * ||w3 (2n+1)|
+ * ||w4 (2n+1)|
+ * ||w5 (2n+1)| ||w1 (2n+1)|
+ * + | w6 (w6n)| ||w2 (2n+1)| w0 (2n) | (share storage with r)
+ * -----------------------------------------------
+ * r | | | | | | | | |
+ * c7 c6 c5 c4 c3 Carries to propagate
+ */
+
+ cy = mpn_add_n (rp + n, rp + n, w1, m);
+ MPN_INCR_U (w2 + n + 1, n , cy);
+ cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
+ MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
+ cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
+ MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
+ cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
+ MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
+ if (w6n > n + 1)
+ {
+ cy = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
+ MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, cy);
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
+#if WANT_ASSERT
+ {
+ mp_size_t i;
+ for (i = w6n; i <= n; i++)
+ ASSERT (w5[n + i] == 0);
+ }
+#endif
+ }
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_8pts.c b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_8pts.c
new file mode 100644
index 0000000..5e65fab
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/toom_interpolate_8pts.c
@@ -0,0 +1,211 @@
+/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_15 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
+
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by45
+#if GMP_NUMB_BITS % 12 == 0
+#define mpn_divexact_by45(dst,src,size) \
+ (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
+#else
+#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
+#endif
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
+#else
+#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+ return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+ mp_limb_t __cy;
+ __cy = mpn_lshift (ws,src,n,s);
+ return __cy + mpn_sub_n (dst,dst,ws,n);
+#endif
+}
+#endif
+
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
+#else
+/* This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
+do { \
+ mp_limb_t __cy; \
+ MPN_DECR_U (dst, nd, src[0] >> s); \
+ __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
+ MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
+} while (0)
+#endif
+
+/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
+ points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
+ we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+ degree 7 (or 6), given the 8 (rsp. 7) values:
+
+ r1 = limit at infinity of f(x) / x^7,
+ r2 = f(4),
+ r3 = f(-4),
+ r4 = f(2),
+ r5 = f(-2),
+ r6 = f(1),
+ r7 = f(-1),
+ r8 = f(0).
+
+ All couples of the form f(n),f(-n) must be already mixed with
+ toom_couple_handling(f(n),...,f(-n),...)
+
+ The result is stored in {pp, spt + 7*n (or 6*n)}.
+ At entry, r8 is stored at {pp, 2n},
+ r5 is stored at {pp + 3n, 3n + 1}.
+
+ The other values are 2n+... limbs each (with most significant limbs small).
+
+ All intermediate results are positive.
+ Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
+ mp_ptr r3, mp_ptr r7,
+ mp_size_t spt, mp_ptr ws)
+{
+ mp_limb_signed_t cy;
+ mp_ptr r5, r1;
+ r5 = (pp + 3 * n); /* 3n+1 */
+ r1 = (pp + 7 * n); /* spt */
+
+ /******************************* interpolation *****************************/
+
+ DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
+ cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
+ MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
+
+ DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
+ cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
+ MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
+
+ r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
+ cy = mpn_sub_n (r7, r7, r1, spt);
+ MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
+
+ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+ ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
+
+ ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
+
+ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+
+ mpn_divexact_by45 (r3, r3, 3 * n + 1);
+
+ ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
+
+ ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
+
+ /* last interpolation steps... */
+ /* ... are mixed with recomposition */
+
+ /***************************** recomposition *******************************/
+ /*
+ pp[] prior to operations:
+ |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
+
+ summation scheme for remaining operations:
+ |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
+ |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
+ ||_H r3|_M r3|_L*r3|
+ ||_H_r7|_M_r7|_L_r7|
+ ||-H r3|-M r3|-L*r3|
+ ||-H*r5|-M_r5|-L_r5|
+ */
+
+ cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
+ cy-= mpn_sub_n (pp + n, pp + n, r5, n);
+ if (cy > 0) {
+ MPN_INCR_U (r7 + n, 2*n + 1, 1);
+ cy = 0;
+ }
+
+ cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
+ MPN_DECR_U (r7 + 2*n, n + 1, cy);
+
+ cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
+ r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
+ cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
+ if (UNLIKELY(0 > cy))
+ MPN_DECR_U (r5 + n + 1, 2*n, 1);
+ else
+ MPN_INCR_U (r5 + n + 1, 2*n, cy);
+
+ ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
+
+ cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
+ MPN_INCR_U (r3 + 2*n, n + 1, cy);
+ cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
+ if (LIKELY(spt != n))
+ MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
+ else
+ ASSERT (r3[3*n] + cy == 0);
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/trialdiv.c b/vendor/gmp-6.3.0/mpn/generic/trialdiv.c
new file mode 100644
index 0000000..65e089f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/trialdiv.c
@@ -0,0 +1,131 @@
+/* mpn_trialdiv -- find small factors of an mpn number using trial division.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/*
+ This function finds the first (smallest) factor represented in
+ trialdivtab.h. It does not stop the factoring effort just because it has
+ reached some sensible limit, such as the square root of the input number.
+
+ The caller can limit the factoring effort by passing NPRIMES. The function
+ will then divide until that limit, or perhaps a few primes more. A position
+ which only mpn_trialdiv can make sense of is returned in the WHERE
+ parameter. It can be used for restarting the factoring effort; the first
+ call should pass 0 here.
+
+ Input: 1. A non-negative number T = {tp,tn}
+ 2. NPRIMES as described above,
+ 3. *WHERE as described above.
+ Output: 1. *WHERE updated as described above.
+ 2. Return value is non-zero if we found a factor, else zero
+ To get the actual prime factor, compute the mod B inverse
+ of the return value.
+*/
+
+#include "gmp-impl.h"
+
+struct gmp_primes_dtab {
+ mp_limb_t binv;
+ mp_limb_t lim;
+};
+
+struct gmp_primes_ptab {
+ mp_limb_t ppp; /* primes, multiplied together */
+ mp_limb_t cps[7]; /* ppp values pre-computed for mpn_mod_1s_4p */
+ gmp_uint_least32_t idx:24; /* index of first primes in dtab */
+ gmp_uint_least32_t np :8; /* number of primes related to this entry */
+};
+
+
+static const struct gmp_primes_dtab gmp_primes_dtab[] =
+{
+#define WANT_dtab
+#define P(p,inv,lim) {inv,lim}
+#include "trialdivtab.h"
+#undef WANT_dtab
+#undef P
+ {0,0}
+};
+
+static const struct gmp_primes_ptab gmp_primes_ptab[] =
+{
+#define WANT_ptab
+#include "trialdivtab.h"
+#undef WANT_ptab
+};
+
+#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
+
+/* FIXME: We could optimize out one of the outer loop conditions if we
+ had a final ptab entry with a huge np field. */
+mp_limb_t
+mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
+{
+ mp_limb_t ppp;
+ const mp_limb_t *cps;
+ const struct gmp_primes_dtab *dp;
+ long i, j, idx, np;
+ mp_limb_t r, q;
+
+ ASSERT (tn >= 1);
+
+ for (i = *where; i < PTAB_LINES; i++)
+ {
+ ppp = gmp_primes_ptab[i].ppp;
+ cps = gmp_primes_ptab[i].cps;
+
+ r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+
+ idx = gmp_primes_ptab[i].idx;
+ np = gmp_primes_ptab[i].np;
+
+ /* Check divisibility by individual primes. */
+ dp = &gmp_primes_dtab[idx] + np;
+ for (j = -np; j < 0; j++)
+ {
+ q = r * dp[j].binv;
+ if (q <= dp[j].lim)
+ {
+ *where = i;
+ return dp[j].binv;
+ }
+ }
+
+ nprimes -= np;
+ if (nprimes <= 0)
+ return 0;
+ }
+ return 0;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/udiv_w_sdiv.c b/vendor/gmp-6.3.0/mpn/generic/udiv_w_sdiv.c
new file mode 100644
index 0000000..7907135
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/udiv_w_sdiv.c
@@ -0,0 +1,141 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+ division.
+
+ Contributed by Peter L. Montgomery.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY SAFE
+ TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE
+ GNU MP RELEASE.
+
+
+Copyright 1992, 1994, 1996, 2000, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
+{
+ mp_limb_t q, r;
+ mp_limb_t c0, c1, b1;
+
+ ASSERT (d != 0);
+ ASSERT (a1 < d);
+
+ if ((mp_limb_signed_t) d >= 0)
+ {
+ if (a1 < d - a1 - (a0 >> (GMP_LIMB_BITS - 1)))
+ {
+ /* dividend, divisor, and quotient are nonnegative */
+ sdiv_qrnnd (q, r, a1, a0, d);
+ }
+ else
+ {
+ /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+ sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (GMP_LIMB_BITS - 1));
+ /* Divide (c1*2^32 + c0) by d */
+ sdiv_qrnnd (q, r, c1, c0, d);
+ /* Add 2^31 to quotient */
+ q += (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+ }
+ }
+ else
+ {
+ b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */
+ c1 = a1 >> 1; /* A/2 */
+ c0 = (a1 << (GMP_LIMB_BITS - 1)) + (a0 >> 1);
+
+ if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */
+ {
+ sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+ r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */
+ if ((d & 1) != 0)
+ {
+ if (r >= q)
+ r = r - q;
+ else if (q - r <= d)
+ {
+ r = r - q + d;
+ q--;
+ }
+ else
+ {
+ r = r - q + 2*d;
+ q -= 2;
+ }
+ }
+ }
+ else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */
+ {
+ c1 = (b1 - 1) - c1;
+ c0 = ~c0; /* logical NOT */
+
+ sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+ q = ~q; /* (A/2)/b1 */
+ r = (b1 - 1) - r;
+
+ r = 2*r + (a0 & 1); /* A/(2*b1) */
+
+ if ((d & 1) != 0)
+ {
+ if (r >= q)
+ r = r - q;
+ else if (q - r <= d)
+ {
+ r = r - q + d;
+ q--;
+ }
+ else
+ {
+ r = r - q + 2*d;
+ q -= 2;
+ }
+ }
+ }
+ else /* Implies c1 = b1 */
+ { /* Hence a1 = d - 1 = 2*b1 - 1 */
+ if (a0 >= -d)
+ {
+ q = -CNST_LIMB(1);
+ r = a0 + d;
+ }
+ else
+ {
+ q = -CNST_LIMB(2);
+ r = a0 + 2*d;
+ }
+ }
+ }
+
+ *rp = r;
+ return q;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/zero.c b/vendor/gmp-6.3.0/mpn/generic/zero.c
new file mode 100644
index 0000000..1a05453
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/zero.c
@@ -0,0 +1,41 @@
+/* mpn_zero
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+void
+mpn_zero (mp_ptr rp, mp_size_t n)
+{
+ mp_size_t i;
+
+ rp += n;
+ for (i = -n; i != 0; i++)
+ rp[i] = 0;
+}
diff --git a/vendor/gmp-6.3.0/mpn/generic/zero_p.c b/vendor/gmp-6.3.0/mpn/generic/zero_p.c
new file mode 100644
index 0000000..c92f9b8
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/generic/zero_p.c
@@ -0,0 +1,33 @@
+/* mpn_zero_p (x,xsize) -- Return 1 if X is zero, 0 if it is non-zero.
+
+Copyright 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_FORCE_mpn_zero_p 1
+
+#include "gmp-impl.h"