diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:36:36 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:42:26 +0200 | 
| commit | a89a14ef5da44684a16b204e7a70460cc8c4922a (patch) | |
| tree | b23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c | |
| parent | 1db63fcedab0b288820d66e100b1877b1a5a8851 (diff) | |
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c')
| -rw-r--r-- | vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c | 368 | 
1 files changed, 368 insertions, 0 deletions
| diff --git a/vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c b/vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c new file mode 100644 index 0000000..0ef7e03 --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/generic/mu_divappr_q.c @@ -0,0 +1,368 @@ +/* mpn_mu_divappr_q, mpn_preinv_mu_divappr_q. + +   Compute Q = floor(N / D) + e.  N is nn limbs, D is dn limbs and must be +   normalized, and Q must be nn-dn limbs, 0 <= e <= 4.  The requirement that Q +   is nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us +   to let N be unmodified during the operation. + +   Contributed to the GNU project by Torbjorn Granlund. + +   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY +   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST +   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. + +Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + +  * the GNU Lesser General Public License as published by the Free +    Software Foundation; either version 3 of the License, or (at your +    option) any later version. + +or + +  * the GNU General Public License as published by the Free Software +    Foundation; either version 2 of the License, or (at your option) any +    later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library.  If not, +see https://www.gnu.org/licenses/.  */ + + +/* +   The idea of the algorithm used herein is to compute a smaller inverted value +   than used in the standard Barrett algorithm, and thus save time in the +   Newton iterations, and pay just a small price when using the inverted value +   for developing quotient bits.  This algorithm was presented at ICMS 2006. +*/ + +/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync. + + Things to work on: + +  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed, +    demonstrated by the fact that the mpn_invertappr function's scratch needs +    mean that we need to keep a large allocation long after it is needed. +    Things are worse as mpn_mul_fft does not accept any scratch parameter, +    which means we'll have a large memory hole while in mpn_mul_fft.  In +    general, a peak scratch need in the beginning of a function isn't +    well-handled by the itch/scratch scheme. +*/ + +#ifdef STAT +#undef STAT +#define STAT(x) x +#else +#define STAT(x) +#endif + +#include <stdlib.h>		/* for NULL */ +#include "gmp-impl.h" + +static mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, +			 mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); +static mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int); + +mp_limb_t +mpn_mu_divappr_q (mp_ptr qp, +		  mp_srcptr np, +		  mp_size_t nn, +		  mp_srcptr dp, +		  mp_size_t dn, +		  mp_ptr scratch) +{ +  mp_size_t qn, in; +  mp_limb_t cy, qh; +  mp_ptr ip, tp; + +  ASSERT (dn > 1); + +  qn = nn - dn; + +  /* If Q is smaller than D, truncate operands. */ +  if (qn + 1 < dn) +    { +      np += dn - (qn + 1); +      nn -= dn - (qn + 1); +      dp += dn - (qn + 1); +      dn = qn + 1; +    } + +  /* Compute the inverse size.  */ +  in = mpn_mu_divappr_q_choose_in (qn, dn, 0); +  ASSERT (in <= dn); + +#if 1 +  /* This alternative inverse computation method gets slightly more accurate +     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function +     not adapted (3) mpn_invertappr scratch needs not met.  */ +  ip = scratch; +  tp = scratch + in + 1; + +  /* compute an approximate inverse on (in+1) limbs */ +  if (dn == in) +    { +      MPN_COPY (tp + 1, dp, in); +      tp[0] = 1; +      mpn_invertappr (ip, tp, in + 1, tp + in + 1); +      MPN_COPY_INCR (ip, ip + 1, in); +    } +  else +    { +      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1); +      if (UNLIKELY (cy != 0)) +	MPN_ZERO (ip, in); +      else +	{ +	  mpn_invertappr (ip, tp, in + 1, tp + in + 1); +	  MPN_COPY_INCR (ip, ip + 1, in); +	} +    } +#else +  /* This older inverse computation method gets slightly worse results than the +     one above.  */ +  ip = scratch; +  tp = scratch + in; + +  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the +     inversion function should do this automatically.  */ +  if (dn == in) +    { +      tp[in + 1] = 0; +      MPN_COPY (tp + in + 2, dp, in); +      mpn_invertappr (tp, tp + in + 1, in + 1, NULL); +    } +  else +    { +      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL); +    } +  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT); +  if (UNLIKELY (cy != 0)) +    MPN_ZERO (tp + 1, in); +  MPN_COPY (ip, tp + 1, in); +#endif + +  qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in); + +  return qh; +} + +static mp_limb_t +mpn_preinv_mu_divappr_q (mp_ptr qp, +			 mp_srcptr np, +			 mp_size_t nn, +			 mp_srcptr dp, +			 mp_size_t dn, +			 mp_srcptr ip, +			 mp_size_t in, +			 mp_ptr scratch) +{ +  mp_size_t qn; +  mp_limb_t cy, cx, qh; +  mp_limb_t r; +  mp_size_t tn, wn; + +#define rp           scratch +#define tp           (scratch + dn) +#define scratch_out  (scratch + dn + tn) + +  qn = nn - dn; + +  np += qn; +  qp += qn; + +  qh = mpn_cmp (np, dp, dn) >= 0; +  if (qh != 0) +    mpn_sub_n (rp, np, dp, dn); +  else +    MPN_COPY (rp, np, dn); + +  if (UNLIKELY (qn == 0)) +    return qh;			/* Degenerate use.  Should we allow this? */ + +  for (;;) /* The exit condition (qn == 0) is verified in the loop. */ +    { +      if (qn < in) +	{ +	  ip += in - qn; +	  in = qn; +	} +      np -= in; +      qp -= in; + +      /* Compute the next block of quotient limbs by multiplying the inverse I +	 by the upper part of the partial remainder R.  */ +      mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */ +      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */ +      ASSERT_ALWAYS (cy == 0); + +      qn -= in; +      if (qn == 0) +	break; + +      /* Compute the product of the quotient block and the divisor D, to be +	 subtracted from the partial remainder combined with new limbs from the +	 dividend N.  We only really need the low dn limbs.  */ + +      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) +	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */ +      else +	{ +	  tn = mpn_mulmod_bnm1_next_size (dn + 1); +	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out); +	  wn = dn + in - tn;			/* number of wrapped limbs */ +	  if (wn > 0) +	    { +	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn); +	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy); +	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0; +	      ASSERT_ALWAYS (cx >= cy); +	      mpn_incr_u (tp, cx - cy); +	    } +	} + +      r = rp[dn - in] - tp[dn]; + +      /* Subtract the product from the partial remainder combined with new +	 limbs from the dividend N, generating a new partial remainder R.  */ +      if (dn != in) +	{ +	  cy = mpn_sub_n (tp, np, tp, in);	/* get next 'in' limbs from N */ +	  cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy); +	  MPN_COPY (rp, tp, dn);		/* FIXME: try to avoid this */ +	} +      else +	{ +	  cy = mpn_sub_n (rp, np, tp, in);	/* get next 'in' limbs from N */ +	} + +      STAT (int i; int err = 0; +	    static int errarr[5]; static int err_rec; static int tot); + +      /* Check the remainder R and adjust the quotient as needed.  */ +      r -= cy; +      while (r != 0) +	{ +	  /* We loop 0 times with about 69% probability, 1 time with about 31% +	     probability, 2 times with about 0.6% probability, if inverse is +	     computed as recommended.  */ +	  mpn_incr_u (qp, 1); +	  cy = mpn_sub_n (rp, rp, dp, dn); +	  r -= cy; +	  STAT (err++); +	} +      if (mpn_cmp (rp, dp, dn) >= 0) +	{ +	  /* This is executed with about 76% probability.  */ +	  mpn_incr_u (qp, 1); +	  cy = mpn_sub_n (rp, rp, dp, dn); +	  STAT (err++); +	} + +      STAT ( +	    tot++; +	    errarr[err]++; +	    if (err > err_rec) +	      err_rec = err; +	    if (tot % 0x10000 == 0) +	      { +		for (i = 0; i <= err_rec; i++) +		  printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot); +		printf ("\n"); +	      } +	    ); +    } + +  /* FIXME: We should perhaps be somewhat more elegant in our rounding of the +     quotient.  For now, just make sure the returned quotient is >= the real +     quotient; add 3 with saturating arithmetic.  */ +  qn = nn - dn; +  cy += mpn_add_1 (qp, qp, qn, 3); +  if (cy != 0) +    { +      if (qh != 0) +	{ +	  /* Return a quotient of just 1-bits, with qh set.  */ +	  mp_size_t i; +	  for (i = 0; i < qn; i++) +	    qp[i] = GMP_NUMB_MAX; +	} +      else +	{ +	  /* Propagate carry into qh.  */ +	  qh = 1; +	} +    } + +  return qh; +} + +/* In case k=0 (automatic choice), we distinguish 3 cases: +   (a) dn < qn:         in = ceil(qn / ceil(qn/dn)) +   (b) dn/3 < qn <= dn: in = ceil(qn / 2) +   (c) qn < dn/3:       in = qn +   In all cases we have in <= dn. + */ +static mp_size_t +mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k) +{ +  mp_size_t in; + +  if (k == 0) +    { +      mp_size_t b; +      if (qn > dn) +	{ +	  /* Compute an inverse size that is a nice partition of the quotient.  */ +	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */ +	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ +	} +      else if (3 * qn > dn) +	{ +	  in = (qn - 1) / 2 + 1;	/* b = 2 */ +	} +      else +	{ +	  in = (qn - 1) / 1 + 1;	/* b = 1 */ +	} +    } +  else +    { +      mp_size_t xn; +      xn = MIN (dn, qn); +      in = (xn - 1) / k + 1; +    } + +  return in; +} + +mp_size_t +mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k) +{ +  mp_size_t qn, in, itch_local, itch_out, itch_invapp; + +  qn = nn - dn; +  if (qn + 1 < dn) +    { +      dn = qn + 1; +    } +  in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k); + +  itch_local = mpn_mulmod_bnm1_next_size (dn + 1); +  itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in); +  itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */ + +  ASSERT (dn + itch_local + itch_out >= itch_invapp); +  return in + MAX (dn + itch_local + itch_out, itch_invapp); +} |