diff options
Diffstat (limited to 'vendor/gmp-6.3.0/mpz/bin_uiui.c')
-rw-r--r-- | vendor/gmp-6.3.0/mpz/bin_uiui.c | 707 |
1 files changed, 707 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpz/bin_uiui.c b/vendor/gmp-6.3.0/mpz/bin_uiui.c new file mode 100644 index 0000000..542d485 --- /dev/null +++ b/vendor/gmp-6.3.0/mpz/bin_uiui.c @@ -0,0 +1,707 @@ +/* mpz_bin_uiui - compute n over k. + +Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. + +Copyright 2010-2012, 2015-2018, 2020 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef BIN_GOETGHELUCK_THRESHOLD +#define BIN_GOETGHELUCK_THRESHOLD 512 +#endif +#ifndef BIN_UIUI_ENABLE_SMALLDC +#define BIN_UIUI_ENABLE_SMALLDC 1 +#endif +#ifndef BIN_UIUI_RECURSIVE_SMALLDC +#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32) +#endif + +/* Algorithm: + + Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8) + which are then accumulated into mpn numbers. The first inner loop + accumulates divisor factors, the 2nd inner loop accumulates exactly the same + number of dividend factors. We avoid accumulating more for the divisor, + even with its smaller factors, since we else cannot guarantee divisibility. + + Since we know each division will yield an integer, we compute the quotient + using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod + 2^t. + + Improvements: + + (1) An obvious improvement to this code would be to compute mod 2^t + everywhere. Unfortunately, we cannot determine t beforehand, unless we + invoke some approximation, such as Stirling's formula. Of course, we don't + need t to be tight. However, it is not clear that this would help much, + our numbers are kept reasonably small already. + + (2) Compute nmax/kmax semi-accurately, without scalar division or a loop. + Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index, + would make it both reasonably accurate and fast. (We could use a table + stored into a limb, perhaps.) The table should take the removed factors of + 2 into account (those done on-the-fly in mulN). + + (3) The first time in the loop we compute the odd part of a + factorial in kp, we might use oddfac_1 for this task. + */ + +/* This threshold determines how large divisor to accumulate before we call + bdiv. Perhaps we should never call bdiv, and accumulate all we are told, + since we are just basecase code anyway? Presumably, this depends on the + relative speed of the asymptotically fast code and this code. */ +#define SOME_THRESHOLD 20 + +/* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME: + All versions of MAXFACS don't take this 2 removal into account now, meaning + that then, shifting just adds some overhead. (We remove factors from the + completed limb anyway.) */ + +static mp_limb_t +mul1 (mp_limb_t m) +{ + return m; +} + +static mp_limb_t +mul2 (mp_limb_t m) +{ + /* We need to shift before multiplying, to avoid an overflow. */ + mp_limb_t m01 = (m | 1) * ((m + 1) >> 1); + return m01; +} + +static mp_limb_t +mul3 (mp_limb_t m) +{ + mp_limb_t m01 = (m + 0) * (m + 1) >> 1; + mp_limb_t m2 = (m + 2); + return m01 * m2; +} + +static mp_limb_t +mul4 (mp_limb_t m) +{ + mp_limb_t m03 = (m + 0) * (m + 3) >> 1; + return m03 * (m03 + 1); /* mul2 (m03) ? */ +} + +static mp_limb_t +mul5 (mp_limb_t m) +{ + mp_limb_t m03 = (m + 0) * (m + 3) >> 1; + mp_limb_t m034 = m03 * (m + 4); + return (m03 + 1) * m034; +} + +static mp_limb_t +mul6 (mp_limb_t m) +{ + mp_limb_t m05 = (m + 0) * (m + 5); + mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3; + return m1234 * (m05 >> 1); +} + +static mp_limb_t +mul7 (mp_limb_t m) +{ + mp_limb_t m05 = (m + 0) * (m + 5); + mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3; + mp_limb_t m056 = m05 * (m + 6) >> 1; + return m1234 * m056; +} + +static mp_limb_t +mul8 (mp_limb_t m) +{ + mp_limb_t m07 = (m + 0) * (m + 7); + mp_limb_t m0257 = m07 * (m07 + 10) >> 3; + mp_limb_t m1346 = m07 + 9 + m0257; + return m0257 * m1346; +} + +/* +static mp_limb_t +mul9 (mp_limb_t m) +{ + return (m + 8) * mul8 (m) ; +} + +static mp_limb_t +mul10 (mp_limb_t m) +{ + mp_limb_t m09 = (m + 0) * (m + 9); + mp_limb_t m18 = (m09 >> 1) + 4; + mp_limb_t m0369 = m09 * (m09 + 18) >> 3; + mp_limb_t m2457 = m09 * 2 + 35 + m0369; + return ((m0369 * m2457) >> 1) * m18; +} +*/ + +typedef mp_limb_t (* mulfunc_t) (mp_limb_t); + +static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8 /* ,mul9,mul10 */}; +#define M (numberof(mulfunc)) + +/* Number of factors-of-2 removed by the corresponding mulN function. */ +static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6 /*,6,8*/}; + +#if 1 +/* This variant is inaccurate but share the code with other functions. */ +#define MAXFACS(max,l) \ + do { \ + (max) = log_n_max (l); \ + } while (0) +#else + +/* This variant is exact(?) but uses a loop. It takes the 2 removal + of mulN into account. */ +static const unsigned long ftab[] = +#if GMP_NUMB_BITS == 64 + /* 1 to 8 factors per iteration */ + {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x16a09e667),0x32cbfc,0x16a08,0x24c0,0xa11,0x345,0x1ab /*,0xe9,0x8e */}; +#endif +#if GMP_NUMB_BITS == 32 + /* 1 to 7 factors per iteration */ + {0xffffffff,0x16a09,0x7ff,0x168,0x6f,0x3d,0x20 /* ,0x17 */}; +#endif + +#define MAXFACS(max,l) \ + do { \ + int __i; \ + for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \ + ; \ + (max) = __i + 1; \ + } while (0) +#endif + +/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis + is an odd integer. */ +static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE }; + +static void +mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) +{ + unsigned nmax, kmax, nmaxnow, numfac; + mp_ptr np, kp; + mp_size_t nn, kn, alloc; + mp_limb_t i, j, t, iii, jjj, cy, dinv; + int cnt; + mp_size_t maxn; + TMP_DECL; + + ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT); + TMP_MARK; + + maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */ + + /* FIXME: This allocation might be insufficient, but is usually way too + large. */ + alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD); + alloc = MIN (alloc, (mp_size_t) k) + 1; + TMP_ALLOC_LIMBS_2 (np, alloc, kp, SOME_THRESHOLD + 1); + + MAXFACS (nmax, n); + ASSERT (nmax <= M); + MAXFACS (kmax, k); + ASSERT (kmax <= M); + ASSERT (k >= M); + + i = n - k + 1; + + np[0] = 1; nn = 1; + + numfac = 1; + j = ODD_FACTORIAL_TABLE_LIMIT + 1; + jjj = ODD_FACTORIAL_TABLE_MAX; + ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX); + + while (1) + { + kp[0] = jjj; /* store new factors */ + kn = 1; + t = k - j + 1; + kmax = MIN (kmax, t); + + while (kmax != 0 && kn < SOME_THRESHOLD) + { + jjj = mulfunc[kmax - 1] (j); + j += kmax; /* number of factors used */ + count_trailing_zeros (cnt, jjj); /* count low zeros */ + jjj >>= cnt; /* remove remaining low zeros */ + cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */ + kp[kn] = cy; + kn += cy != 0; + t = k - j + 1; + kmax = MIN (kmax, t); + } + numfac = j - numfac; + + while (numfac != 0) + { + nmaxnow = MIN (nmax, numfac); + iii = mulfunc[nmaxnow - 1] (i); + i += nmaxnow; /* number of factors used */ + count_trailing_zeros (cnt, iii); /* count low zeros */ + iii >>= cnt; /* remove remaining low zeros */ + cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */ + np[nn] = cy; + nn += cy != 0; + numfac -= nmaxnow; + } + + ASSERT (nn < alloc); + + binvert_limb (dinv, kp[0]); + nn += (np[nn - 1] >= kp[kn - 1]); + nn -= kn; + mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv); + mpn_neg (np, np, nn); + + if (kmax == 0) + break; + numfac = j; + + jjj = mulfunc[kmax - 1] (j); + j += kmax; /* number of factors used */ + count_trailing_zeros (cnt, jjj); /* count low zeros */ + jjj >>= cnt; /* remove remaining low zeros */ + } + + /* Put back the right number of factors of 2. */ + popc_limb (cnt, n - k); + popc_limb (j, k); + cnt += j; + popc_limb (j, n); + cnt -= j; + if (cnt != 0) + { + ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */ + cy = mpn_lshift (np, np, nn, cnt); + np[nn] = cy; + nn += cy != 0; + } + + nn -= np[nn - 1] == 0; /* normalisation */ + + kp = MPZ_NEWALLOC (r, nn); + SIZ(r) = nn; + MPN_COPY (kp, np, nn); + TMP_FREE; +} + +static void +mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) +{ + unsigned nmax, numfac; + mp_ptr rp; + mp_size_t rn, alloc; + mp_limb_t i, iii, cy; + unsigned i2cnt, cnt; + + MAXFACS (nmax, n); + nmax = MIN (nmax, M); + + i = n - k + 1; + + i2cnt = __gmp_fac2cnt_table[k / 2 - 1]; /* low zeros count */ + if (nmax >= k) + { + MPZ_NEWALLOC (r, 1) [0] = mulfunc[k - 1] (i) * facinv[k - 2] >> + (i2cnt - tcnttab[k - 1]); + SIZ(r) = 1; + return; + } + + count_leading_zeros (cnt, (mp_limb_t) n); + cnt = GMP_LIMB_BITS - cnt; + alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */ + rp = MPZ_NEWALLOC (r, alloc); + + rp[0] = mulfunc[nmax - 1] (i); + rn = 1; + i += nmax; /* number of factors used */ + i2cnt -= tcnttab[nmax - 1]; /* low zeros count */ + numfac = k - nmax; + do + { + nmax = MIN (nmax, numfac); + iii = mulfunc[nmax - 1] (i); + i += nmax; /* number of factors used */ + i2cnt -= tcnttab[nmax - 1]; /* update low zeros count */ + cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */ + rp[rn] = cy; + rn += cy != 0; + numfac -= nmax; + } while (numfac != 0); + + ASSERT (rn < alloc); + + mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], i2cnt); + /* A two-fold, branch-free normalisation is possible :*/ + /* rn -= rp[rn - 1] == 0; */ + /* rn -= rp[rn - 1] == 0; */ + MPN_NORMALIZE_NOT_ZERO (rp, rn); + + SIZ(r) = rn; +} + +/* Algorithm: + + Plain and simply multiply things together. + + We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such + that k!/2^t is odd). + +*/ + +static mp_limb_t +bc_bin_uiui (unsigned int n, unsigned int k) +{ + return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2]) + << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1])) + & GMP_NUMB_MASK; +} + +/* Algorithm: + + Recursively exploit the relation + bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) . + + Values for binomial(k,k>>1) that fit in a limb are precomputed + (with inverses). +*/ + +/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] = + binomial(i*2,i)/2^t (where t is chosen so that it is odd). */ +static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE }; + +/* bin2kkinv[i] = bin2kk[i]^-1 mod B */ +static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE }; + +/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */ +static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE }; + +static void +mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) +{ + mp_ptr rp; + mp_size_t rn; + unsigned long int hk; + + hk = k >> 1; + + if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT) + mpz_smallk_bin_uiui (r, n, hk); + else + mpz_smallkdc_bin_uiui (r, n, hk); + k -= hk; + n -= hk; + if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { + mp_limb_t cy; + rn = SIZ (r); + rp = MPZ_REALLOC (r, rn + 1); + cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k)); + rp [rn] = cy; + rn += cy != 0; + } else { + mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3]; + mpz_t t; + + ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3; + PTR (t) = buffer; + if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT) + mpz_smallk_bin_uiui (t, n, k); + else + mpz_smallkdc_bin_uiui (t, n, k); + mpz_mul (r, r, t); + rp = PTR (r); + rn = SIZ (r); + } + + mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET], + bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET], + fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk)); + /* A two-fold, branch-free normalisation is possible :*/ + /* rn -= rp[rn - 1] == 0; */ + /* rn -= rp[rn - 1] == 0; */ + MPN_NORMALIZE_NOT_ZERO (rp, rn); + + SIZ(r) = rn; +} + +/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K). + * + * Contributed to the GNU project by Marco Bodrato. + * + * Implementation of the algorithm by P. Goetgheluck, "Computing + * Binomial Coefficients", The American Mathematical Monthly, Vol. 94, + * No. 4 (April 1987), pp. 360-365. + * + * Acknowledgment: Peter Luschny did spot the slowness of the previous + * code and suggested the reference. + */ + +/* TODO: Remove duplicated constants / macros / static functions... + */ + +/*************************************************************/ +/* Section macros: common macros, for swing/fac/bin (&sieve) */ +/*************************************************************/ + +#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ + if ((PR) > (MAX_PR)) { \ + (VEC)[(I)++] = (PR); \ + (PR) = 1; \ + } + +#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ + do { \ + if ((PR) > (MAX_PR)) { \ + (VEC)[(I)++] = (PR); \ + (PR) = (P); \ + } else \ + (PR) *= (P); \ + } while (0) + +#define LOOP_ON_SIEVE_CONTINUE(prime,end) \ + __max_i = (end); \ + \ + do { \ + ++__i; \ + if ((*__sieve & __mask) == 0) \ + { \ + mp_limb_t prime; \ + prime = id_to_n(__i) + +#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ + do { \ + mp_limb_t __mask, *__sieve, __max_i, __i; \ + \ + __i = (start)-(off); \ + __sieve = (sieve) + __i / GMP_LIMB_BITS; \ + __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ + __i += (off); \ + \ + LOOP_ON_SIEVE_CONTINUE(prime,end) + +#define LOOP_ON_SIEVE_STOP \ + } \ + __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ + __sieve += __mask & 1; \ + } while (__i <= __max_i) + +#define LOOP_ON_SIEVE_END \ + LOOP_ON_SIEVE_STOP; \ + } while (0) + +/*********************************************************/ +/* Section sieve: sieving functions and tools for primes */ +/*********************************************************/ + +#if WANT_ASSERT +static mp_limb_t +bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } +#endif + +/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ +static mp_limb_t +id_to_n (mp_limb_t id) { return id*3+1+(id&1); } + +/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ +static mp_limb_t +n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } + +static mp_size_t +primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } + +/*********************************************************/ +/* Section binomial: fast binomial implementation */ +/*********************************************************/ + +#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __a, __b, __prime, __ma,__mb; \ + __prime = (P); \ + __a = (N); __b = (K); __mb = 0; \ + FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ + do { \ + __mb += __b % __prime; __b /= __prime; \ + __ma = __a % __prime; __a /= __prime; \ + if (__ma < __mb) { \ + __mb = 1; (PR) *= __prime; \ + } else __mb = 0; \ + } while (__a >= __prime); \ + } while (0) + +#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __prime; \ + __prime = (P); \ + if (((N) % __prime) < ((K) % __prime)) { \ + FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \ + } \ + } while (0) + +/* Returns an approximation of the sqare root of x. + * It gives: + * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2 + * or + * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8 + */ +static mp_limb_t +limb_apprsqrt (mp_limb_t x) +{ + int s; + + ASSERT (x > 2); + count_leading_zeros (s, x); + s = (GMP_LIMB_BITS - s) >> 1; + return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s)); +} + +static void +mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) +{ + mp_limb_t *sieve, *factors, count; + mp_limb_t prod, max_prod; + mp_size_t j; + TMP_DECL; + + ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13); + ASSERT (n >= 25); + + TMP_MARK; + sieve = TMP_ALLOC_LIMBS (primesieve_size (n)); + + count = gmp_primesieve (sieve, n) + 1; + factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1); + + max_prod = GMP_NUMB_MAX / n; + + /* Handle primes = 2, 3 separately. */ + popc_limb (count, n - k); + popc_limb (j, k); + count += j; + popc_limb (j, n); + count -= j; + prod = CNST_LIMB(1) << count; + + j = 0; + COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j); + + /* Accumulate prime factors from 5 to n/2 */ + { + mp_limb_t s; + + s = limb_apprsqrt(n); + s = n_to_bit (s); + ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n); + ASSERT (s <= n_to_bit (n >> 1)); + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); + COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); + LOOP_ON_SIEVE_STOP; + + ASSERT (max_prod <= GMP_NUMB_MAX / 2); + max_prod <<= 1; + + LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n >> 1)); + SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); + LOOP_ON_SIEVE_END; + + max_prod >>= 1; + } + + /* Store primes from (n-k)+1 to n */ + ASSERT (n_to_bit (n - k) < n_to_bit (n)); + + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve); + FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); + LOOP_ON_SIEVE_END; + + if (LIKELY (j != 0)) + { + factors[j++] = prod; + mpz_prodlimbs (r, factors, j); + } + else + { + MPZ_NEWALLOC (r, 1)[0] = prod; + SIZ (r) = 1; + } + TMP_FREE; +} + +#undef COUNT_A_PRIME +#undef SH_COUNT_A_PRIME +#undef LOOP_ON_SIEVE_END +#undef LOOP_ON_SIEVE_STOP +#undef LOOP_ON_SIEVE_BEGIN +#undef LOOP_ON_SIEVE_CONTINUE + +/*********************************************************/ +/* End of implementation of Goetgheluck's algorithm */ +/*********************************************************/ + +void +mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) +{ + if (UNLIKELY (n < k)) { + SIZ (r) = 0; +#if BITS_PER_ULONG > GMP_NUMB_BITS + } else if (UNLIKELY (n > GMP_NUMB_MAX)) { + mpz_t tmp; + + mpz_init_set_ui (tmp, n); + mpz_bin_ui (r, tmp, k); + mpz_clear (tmp); +#endif + } else { + ASSERT (n <= GMP_NUMB_MAX); + /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */ + k = MIN (k, n - k); + if (k < 2) { + MPZ_NEWALLOC (r, 1)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */ + SIZ(r) = 1; + } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */ + MPZ_NEWALLOC (r, 1)[0] = bc_bin_uiui (n, k); + SIZ(r) = 1; + } else if (k <= ODD_FACTORIAL_TABLE_LIMIT) + mpz_smallk_bin_uiui (r, n, k); + else if (BIN_UIUI_ENABLE_SMALLDC && + k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2) + mpz_smallkdc_bin_uiui (r, n, k); + else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) && + k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */ + mpz_goetgheluck_bin_uiui (r, n, k); + else + mpz_bdiv_bin_uiui (r, n, k); + } +} |