1 files changed, 707 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpz/bin_uiui.c b/vendor/gmp-6.3.0/mpz/bin_uiui.c
new file mode 100644
index 0000000..542d485
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpz/bin_uiui.c
@@ -0,0 +1,707 @@
+/* mpz_bin_uiui - compute n over k.
+
+Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+Copyright 2010-2012, 2015-2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BIN_GOETGHELUCK_THRESHOLD
+#define BIN_GOETGHELUCK_THRESHOLD  512
+#endif
+#ifndef BIN_UIUI_ENABLE_SMALLDC
+#define BIN_UIUI_ENABLE_SMALLDC    1
+#endif
+#ifndef BIN_UIUI_RECURSIVE_SMALLDC
+#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
+#endif
+
+/* Algorithm:
+
+   Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)
+   which are then accumulated into mpn numbers.  The first inner loop
+   accumulates divisor factors, the 2nd inner loop accumulates exactly the same
+   number of dividend factors.  We avoid accumulating more for the divisor,
+   even with its smaller factors, since we else cannot guarantee divisibility.
+
+   Since we know each division will yield an integer, we compute the quotient
+   using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod
+   2^t.
+
+   Improvements:
+
+   (1) An obvious improvement to this code would be to compute mod 2^t
+   everywhere.  Unfortunately, we cannot determine t beforehand, unless we
+   invoke some approximation, such as Stirling's formula.  Of course, we don't
+   need t to be tight.  However, it is not clear that this would help much,
+   our numbers are kept reasonably small already.
+
+   (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.
+   Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,
+   would make it both reasonably accurate and fast.  (We could use a table
+   stored into a limb, perhaps.)  The table should take the removed factors of
+   2 into account (those done on-the-fly in mulN).
+
+   (3) The first time in the loop we compute the odd part of a
+   factorial in kp, we might use oddfac_1 for this task.
+ */
+
+/* This threshold determines how large divisor to accumulate before we call
+   bdiv.  Perhaps we should never call bdiv, and accumulate all we are told,
+   since we are just basecase code anyway?  Presumably, this depends on the
+   relative speed of the asymptotically fast code and this code.  */
+#define SOME_THRESHOLD 20
+
+/* Multiply-into-limb functions.  These remove factors of 2 on-the-fly.  FIXME:
+   All versions of MAXFACS don't take this 2 removal into account now, meaning
+   that then, shifting just adds some overhead.  (We remove factors from the
+   completed limb anyway.)  */
+
+static mp_limb_t
+mul1 (mp_limb_t m)
+{
+  return m;
+}
+
+static mp_limb_t
+mul2 (mp_limb_t m)
+{
+  /* We need to shift before multiplying, to avoid an overflow. */
+  mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
+  return m01;
+}
+
+static mp_limb_t
+mul3 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
+  mp_limb_t m2 = (m + 2);
+  return m01 * m2;
+}
+
+static mp_limb_t
+mul4 (mp_limb_t m)
+{
+  mp_limb_t m03 = (m + 0) * (m + 3) >> 1;
+  return m03 * (m03 + 1); /* mul2 (m03) ? */
+}
+
+static mp_limb_t
+mul5 (mp_limb_t m)
+{
+  mp_limb_t m03 = (m + 0) * (m + 3) >> 1;
+  mp_limb_t m034 = m03 * (m + 4);
+  return (m03 + 1) * m034;
+}
+
+static mp_limb_t
+mul6 (mp_limb_t m)
+{
+  mp_limb_t m05 = (m + 0) * (m + 5);
+  mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3;
+  return m1234 * (m05 >> 1);
+}
+
+static mp_limb_t
+mul7 (mp_limb_t m)
+{
+  mp_limb_t m05 = (m + 0) * (m + 5);
+  mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3;
+  mp_limb_t m056 = m05 * (m + 6) >> 1;
+  return m1234 * m056;
+}
+
+static mp_limb_t
+mul8 (mp_limb_t m)
+{
+  mp_limb_t m07 = (m + 0) * (m + 7);
+  mp_limb_t m0257 = m07 * (m07 + 10) >> 3;
+  mp_limb_t m1346 = m07 + 9 + m0257;
+  return m0257 * m1346;
+}
+
+/*
+static mp_limb_t
+mul9 (mp_limb_t m)
+{
+  return (m + 8) * mul8 (m) ;
+}
+
+static mp_limb_t
+mul10 (mp_limb_t m)
+{
+  mp_limb_t m09 = (m + 0) * (m + 9);
+  mp_limb_t m18 = (m09 >> 1) + 4;
+  mp_limb_t m0369 = m09 * (m09 + 18) >> 3;
+  mp_limb_t m2457 = m09 * 2 + 35 + m0369;
+  return ((m0369 * m2457) >> 1) * m18;
+}
+*/
+
+typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
+
+static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8 /* ,mul9,mul10 */};
+#define M (numberof(mulfunc))
+
+/* Number of factors-of-2 removed by the corresponding mulN function.  */
+static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6 /*,6,8*/};
+
+#if 1
+/* This variant is inaccurate but share the code with other functions.  */
+#define MAXFACS(max,l)							\
+  do {									\
+    (max) = log_n_max (l);						\
+  } while (0)
+#else
+
+/* This variant is exact(?) but uses a loop.  It takes the 2 removal
+ of mulN into account.  */
+static const unsigned long ftab[] =
+#if GMP_NUMB_BITS == 64
+  /* 1 to 8 factors per iteration */
+  {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x16a09e667),0x32cbfc,0x16a08,0x24c0,0xa11,0x345,0x1ab /*,0xe9,0x8e */};
+#endif
+#if GMP_NUMB_BITS == 32
+  /* 1 to 7 factors per iteration */
+  {0xffffffff,0x16a09,0x7ff,0x168,0x6f,0x3d,0x20 /* ,0x17 */};
+#endif
+
+#define MAXFACS(max,l)							\
+  do {									\
+    int __i;								\
+    for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--)		\
+      ;									\
+    (max) = __i + 1;							\
+  } while (0)
+#endif
+
+/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis
+   is an odd integer. */
+static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
+
+static void
+mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  unsigned nmax, kmax, nmaxnow, numfac;
+  mp_ptr np, kp;
+  mp_size_t nn, kn, alloc;
+  mp_limb_t i, j, t, iii, jjj, cy, dinv;
+  int cnt;
+  mp_size_t maxn;
+  TMP_DECL;
+
+  ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
+  TMP_MARK;
+
+  maxn = 1 + n / GMP_NUMB_BITS;    /* absolutely largest result size (limbs) */
+
+  /* FIXME: This allocation might be insufficient, but is usually way too
+     large.  */
+  alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
+  alloc = MIN (alloc, (mp_size_t) k) + 1;
+  TMP_ALLOC_LIMBS_2 (np, alloc, kp, SOME_THRESHOLD + 1);
+
+  MAXFACS (nmax, n);
+  ASSERT (nmax <= M);
+  MAXFACS (kmax, k);
+  ASSERT (kmax <= M);
+  ASSERT (k >= M);
+
+  i = n - k + 1;
+
+  np[0] = 1; nn = 1;
+
+  numfac = 1;
+  j = ODD_FACTORIAL_TABLE_LIMIT + 1;
+  jjj = ODD_FACTORIAL_TABLE_MAX;
+  ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
+
+  while (1)
+    {
+      kp[0] = jjj;				/* store new factors */
+      kn = 1;
+      t = k - j + 1;
+      kmax = MIN (kmax, t);
+
+      while (kmax != 0 && kn < SOME_THRESHOLD)
+	{
+	  jjj = mulfunc[kmax - 1] (j);
+	  j += kmax;				/* number of factors used */
+	  count_trailing_zeros (cnt, jjj);	/* count low zeros */
+	  jjj >>= cnt;				/* remove remaining low zeros */
+	  cy = mpn_mul_1 (kp, kp, kn, jjj);	/* accumulate new factors */
+	  kp[kn] = cy;
+	  kn += cy != 0;
+	  t = k - j + 1;
+	  kmax = MIN (kmax, t);
+	}
+      numfac = j - numfac;
+
+      while (numfac != 0)
+	{
+	  nmaxnow = MIN (nmax, numfac);
+	  iii = mulfunc[nmaxnow - 1] (i);
+	  i += nmaxnow;				/* number of factors used */
+	  count_trailing_zeros (cnt, iii);	/* count low zeros */
+	  iii >>= cnt;				/* remove remaining low zeros */
+	  cy = mpn_mul_1 (np, np, nn, iii);	/* accumulate new factors */
+	  np[nn] = cy;
+	  nn += cy != 0;
+	  numfac -= nmaxnow;
+	}
+
+      ASSERT (nn < alloc);
+
+      binvert_limb (dinv, kp[0]);
+      nn += (np[nn - 1] >= kp[kn - 1]);
+      nn -= kn;
+      mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
+      mpn_neg (np, np, nn);
+
+      if (kmax == 0)
+	break;
+      numfac = j;
+
+      jjj = mulfunc[kmax - 1] (j);
+      j += kmax;				/* number of factors used */
+      count_trailing_zeros (cnt, jjj);		/* count low zeros */
+      jjj >>= cnt;				/* remove remaining low zeros */
+    }
+
+  /* Put back the right number of factors of 2.  */
+  popc_limb (cnt, n - k);
+  popc_limb (j, k);
+  cnt += j;
+  popc_limb (j, n);
+  cnt -= j;
+  if (cnt != 0)
+    {
+      ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */
+      cy = mpn_lshift (np, np, nn, cnt);
+      np[nn] = cy;
+      nn += cy != 0;
+    }
+
+  nn -= np[nn - 1] == 0;	/* normalisation */
+
+  kp = MPZ_NEWALLOC (r, nn);
+  SIZ(r) = nn;
+  MPN_COPY (kp, np, nn);
+  TMP_FREE;
+}
+
+static void
+mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  unsigned nmax, numfac;
+  mp_ptr rp;
+  mp_size_t rn, alloc;
+  mp_limb_t i, iii, cy;
+  unsigned i2cnt, cnt;
+
+  MAXFACS (nmax, n);
+  nmax = MIN (nmax, M);
+
+  i = n - k + 1;
+
+  i2cnt = __gmp_fac2cnt_table[k / 2 - 1];		/* low zeros count */
+  if (nmax >= k)
+    {
+      MPZ_NEWALLOC (r, 1) [0] = mulfunc[k - 1] (i) * facinv[k - 2] >>
+	(i2cnt - tcnttab[k - 1]);
+      SIZ(r) = 1;
+      return;
+    }
+
+  count_leading_zeros (cnt, (mp_limb_t) n);
+  cnt = GMP_LIMB_BITS - cnt;
+  alloc = cnt * k / GMP_NUMB_BITS + 3;	/* FIXME: ensure rounding is enough. */
+  rp = MPZ_NEWALLOC (r, alloc);
+
+  rp[0] = mulfunc[nmax - 1] (i);
+  rn = 1;
+  i += nmax;				/* number of factors used */
+  i2cnt -= tcnttab[nmax - 1];		/* low zeros count */
+  numfac = k - nmax;
+  do
+    {
+      nmax = MIN (nmax, numfac);
+      iii = mulfunc[nmax - 1] (i);
+      i += nmax;			/* number of factors used */
+      i2cnt -= tcnttab[nmax - 1];	/* update low zeros count */
+      cy = mpn_mul_1 (rp, rp, rn, iii);	/* accumulate new factors */
+      rp[rn] = cy;
+      rn += cy != 0;
+      numfac -= nmax;
+    } while (numfac != 0);
+
+  ASSERT (rn < alloc);
+
+  mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], i2cnt);
+  /* A two-fold, branch-free normalisation is possible :*/
+  /* rn -= rp[rn - 1] == 0; */
+  /* rn -= rp[rn - 1] == 0; */
+  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+  SIZ(r) = rn;
+}
+
+/* Algorithm:
+
+   Plain and simply multiply things together.
+
+   We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such
+   that k!/2^t is odd).
+
+*/
+
+static mp_limb_t
+bc_bin_uiui (unsigned int n, unsigned int k)
+{
+  return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
+    << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
+    & GMP_NUMB_MASK;
+}
+
+/* Algorithm:
+
+   Recursively exploit the relation
+   bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .
+
+   Values for binomial(k,k>>1) that fit in a limb are precomputed
+   (with inverses).
+*/
+
+/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =
+   binomial(i*2,i)/2^t (where t is chosen so that it is odd). */
+static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
+
+/* bin2kkinv[i] = bin2kk[i]^-1 mod B */
+static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
+
+/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */
+static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
+
+static void
+mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  mp_ptr rp;
+  mp_size_t rn;
+  unsigned long int hk;
+
+  hk = k >> 1;
+
+  if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
+    mpz_smallk_bin_uiui (r, n, hk);
+  else
+    mpz_smallkdc_bin_uiui (r, n, hk);
+  k -= hk;
+  n -= hk;
+  if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
+    mp_limb_t cy;
+    rn = SIZ (r);
+    rp = MPZ_REALLOC (r, rn + 1);
+    cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
+    rp [rn] = cy;
+    rn += cy != 0;
+  } else {
+    mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
+    mpz_t t;
+
+    ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
+    PTR (t) = buffer;
+    if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
+      mpz_smallk_bin_uiui (t, n, k);
+    else
+      mpz_smallkdc_bin_uiui (t, n, k);
+    mpz_mul (r, r, t);
+    rp = PTR (r);
+    rn = SIZ (r);
+  }
+
+  mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+		    bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+		    fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
+  /* A two-fold, branch-free normalisation is possible :*/
+  /* rn -= rp[rn - 1] == 0; */
+  /* rn -= rp[rn - 1] == 0; */
+  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+  SIZ(r) = rn;
+}
+
+/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).
+ *
+ * Contributed to the GNU project by Marco Bodrato.
+ *
+ * Implementation of the algorithm by P. Goetgheluck, "Computing
+ * Binomial Coefficients", The American Mathematical Monthly, Vol. 94,
+ * No. 4 (April 1987), pp. 360-365.
+ *
+ * Acknowledgment: Peter Luschny did spot the slowness of the previous
+ * code and suggested the reference.
+ */
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
+  if ((PR) > (MAX_PR)) {					\
+    (VEC)[(I)++] = (PR);					\
+    (PR) = 1;							\
+  }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end)			\
+    __max_i = (end);						\
+								\
+    do {							\
+      ++__i;							\
+      if ((*__sieve & __mask) == 0)				\
+	{							\
+	  mp_limb_t prime;					\
+	  prime = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
+  do {								\
+    mp_limb_t __mask, *__sieve, __max_i, __i;			\
+								\
+    __i = (start)-(off);					\
+    __sieve = (sieve) + __i / GMP_LIMB_BITS;			\
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
+    __i += (off);						\
+								\
+    LOOP_ON_SIEVE_CONTINUE(prime,end)
+
+#define LOOP_ON_SIEVE_STOP					\
+	}							\
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
+      __sieve += __mask & 1;					\
+    }  while (__i <= __max_i)
+
+#define LOOP_ON_SIEVE_END					\
+    LOOP_ON_SIEVE_STOP;						\
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+/*********************************************************/
+/* Section binomial: fast binomial implementation        */
+/*********************************************************/
+
+#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)	\
+  do {							\
+    mp_limb_t __a, __b, __prime, __ma,__mb;		\
+    __prime = (P);					\
+    __a = (N); __b = (K); __mb = 0;			\
+    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);		\
+    do {						\
+      __mb += __b % __prime; __b /= __prime;		\
+      __ma = __a % __prime; __a /= __prime;		\
+      if (__ma < __mb) {				\
+        __mb = 1; (PR) *= __prime;			\
+      } else  __mb = 0;					\
+    } while (__a >= __prime);				\
+  } while (0)
+
+#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)	\
+  do {							\
+    mp_limb_t __prime;					\
+    __prime = (P);					\
+    if (((N) % __prime) < ((K) % __prime)) {		\
+      FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I);	\
+    }							\
+  } while (0)
+
+/* Returns an approximation of the sqare root of x.
+ * It gives:
+ *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
+ * or
+ *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
+ */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x);
+  s = (GMP_LIMB_BITS - s) >> 1;
+  return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
+}
+
+static void
+mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  mp_limb_t *sieve, *factors, count;
+  mp_limb_t prod, max_prod;
+  mp_size_t j;
+  TMP_DECL;
+
+  ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
+  ASSERT (n >= 25);
+
+  TMP_MARK;
+  sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+
+  count = gmp_primesieve (sieve, n) + 1;
+  factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
+
+  max_prod = GMP_NUMB_MAX / n;
+
+  /* Handle primes = 2, 3 separately. */
+  popc_limb (count, n - k);
+  popc_limb (j, k);
+  count += j;
+  popc_limb (j, n);
+  count -= j;
+  prod = CNST_LIMB(1) << count;
+
+  j = 0;
+  COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
+
+  /* Accumulate prime factors from 5 to n/2 */
+    {
+      mp_limb_t s;
+
+      s = limb_apprsqrt(n);
+      s = n_to_bit (s);
+      ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
+      ASSERT (s <= n_to_bit (n >> 1));
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+      COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_STOP;
+
+      ASSERT (max_prod <= GMP_NUMB_MAX / 2);
+      max_prod <<= 1;
+
+      LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n >> 1));
+      SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_END;
+
+      max_prod >>= 1;
+    }
+
+  /* Store primes from (n-k)+1 to n */
+  ASSERT (n_to_bit (n - k) < n_to_bit (n));
+
+  LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
+  FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+  LOOP_ON_SIEVE_END;
+
+  if (LIKELY (j != 0))
+    {
+      factors[j++] = prod;
+      mpz_prodlimbs (r, factors, j);
+    }
+  else
+    {
+      MPZ_NEWALLOC (r, 1)[0] = prod;
+      SIZ (r) = 1;
+    }
+  TMP_FREE;
+}
+
+#undef COUNT_A_PRIME
+#undef SH_COUNT_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+
+/*********************************************************/
+/* End of implementation of Goetgheluck's algorithm      */
+/*********************************************************/
+
+void
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  if (UNLIKELY (n < k)) {
+    SIZ (r) = 0;
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  } else if (UNLIKELY (n > GMP_NUMB_MAX)) {
+    mpz_t tmp;
+
+    mpz_init_set_ui (tmp, n);
+    mpz_bin_ui (r, tmp, k);
+    mpz_clear (tmp);
+#endif
+  } else {
+    ASSERT (n <= GMP_NUMB_MAX);
+    /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+    k = MIN (k, n - k);
+    if (k < 2) {
+      MPZ_NEWALLOC (r, 1)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */
+      SIZ(r) = 1;
+    } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */
+      MPZ_NEWALLOC (r, 1)[0] = bc_bin_uiui (n, k);
+      SIZ(r) = 1;
+    } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
+      mpz_smallk_bin_uiui (r, n, k);
+    else if (BIN_UIUI_ENABLE_SMALLDC &&
+	     k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
+      mpz_smallkdc_bin_uiui (r, n, k);
+    else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
+	     k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */
+      mpz_goetgheluck_bin_uiui (r, n, k);
+    else
+      mpz_bdiv_bin_uiui (r, n, k);
+  }
+}