diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:36:36 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:42:26 +0200 |
commit | a89a14ef5da44684a16b204e7a70460cc8c4922a (patch) | |
tree | b23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/generic/mulmid.c | |
parent | 1db63fcedab0b288820d66e100b1877b1a5a8851 (diff) |
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/generic/mulmid.c')
-rw-r--r-- | vendor/gmp-6.3.0/mpn/generic/mulmid.c | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/generic/mulmid.c b/vendor/gmp-6.3.0/mpn/generic/mulmid.c new file mode 100644 index 0000000..f35c5fb --- /dev/null +++ b/vendor/gmp-6.3.0/mpn/generic/mulmid.c @@ -0,0 +1,255 @@ +/* mpn_mulmid -- middle product + + Contributed by David Harvey. + + THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY + SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 2011 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + + +#include "gmp-impl.h" + + +#define CHUNK (200 + MULMID_TOOM42_THRESHOLD) + + +void +mpn_mulmid (mp_ptr rp, + mp_srcptr ap, mp_size_t an, + mp_srcptr bp, mp_size_t bn) +{ + mp_size_t rn, k; + mp_ptr scratch, temp; + + ASSERT (an >= bn); + ASSERT (bn >= 1); + ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an)); + ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn)); + + if (bn < MULMID_TOOM42_THRESHOLD) + { + /* region not tall enough to make toom42 worthwhile for any portion */ + + if (an < CHUNK) + { + /* region not too wide either, just call basecase directly */ + mpn_mulmid_basecase (rp, ap, an, bp, bn); + return; + } + + /* Region quite wide. For better locality, use basecase on chunks: + + AAABBBCC.. + .AAABBBCC. + ..AAABBBCC + */ + + k = CHUNK - bn + 1; /* number of diagonals per chunk */ + + /* first chunk (marked A in the above diagram) */ + mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); + + /* remaining chunks (B, C, etc) */ + an -= k; + + while (an >= CHUNK) + { + mp_limb_t t0, t1, cy; + ap += k, rp += k; + t0 = rp[0], t1 = rp[1]; + mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); + ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ + MPN_INCR_U (rp + 1, k + 1, t1 + cy); + an -= k; + } + + if (an >= bn) + { + /* last remaining chunk */ + mp_limb_t t0, t1, cy; + ap += k, rp += k; + t0 = rp[0], t1 = rp[1]; + mpn_mulmid_basecase (rp, ap, an, bp, bn); + ADDC_LIMB (cy, rp[0], rp[0], t0); + MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy); + } + + return; + } + + /* region is tall enough for toom42 */ + + rn = an - bn + 1; + + if (rn < MULMID_TOOM42_THRESHOLD) + { + /* region not wide enough to make toom42 worthwhile for any portion */ + + TMP_DECL; + + if (bn < CHUNK) + { + /* region not too tall either, just call basecase directly */ + mpn_mulmid_basecase (rp, ap, an, bp, bn); + return; + } + + /* Region quite tall. For better locality, use basecase on chunks: + + AAAAA.... + .AAAAA... + ..BBBBB.. + ...BBBBB. + ....CCCCC + */ + + TMP_MARK; + + temp = TMP_ALLOC_LIMBS (rn + 2); + + /* first chunk (marked A in the above diagram) */ + bp += bn - CHUNK, an -= bn - CHUNK; + mpn_mulmid_basecase (rp, ap, an, bp, CHUNK); + + /* remaining chunks (B, C, etc) */ + bn -= CHUNK; + + while (bn >= CHUNK) + { + ap += CHUNK, bp -= CHUNK; + mpn_mulmid_basecase (temp, ap, an, bp, CHUNK); + mpn_add_n (rp, rp, temp, rn + 2); + bn -= CHUNK; + } + + if (bn) + { + /* last remaining chunk */ + ap += CHUNK, bp -= bn; + mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn); + mpn_add_n (rp, rp, temp, rn + 2); + } + + TMP_FREE; + return; + } + + /* we're definitely going to use toom42 somewhere */ + + if (bn > rn) + { + /* slice region into chunks, use toom42 on all chunks except possibly + the last: + + AA.... + .AA... + ..BB.. + ...BB. + ....CC + */ + + TMP_DECL; + TMP_MARK; + + temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn)); + scratch = temp + rn + 2; + + /* first chunk (marked A in the above diagram) */ + bp += bn - rn; + mpn_toom42_mulmid (rp, ap, bp, rn, scratch); + + /* remaining chunks (B, C, etc) */ + bn -= rn; + + while (bn >= rn) + { + ap += rn, bp -= rn; + mpn_toom42_mulmid (temp, ap, bp, rn, scratch); + mpn_add_n (rp, rp, temp, rn + 2); + bn -= rn; + } + + if (bn) + { + /* last remaining chunk */ + ap += rn, bp -= bn; + mpn_mulmid (temp, ap, rn + bn - 1, bp, bn); + mpn_add_n (rp, rp, temp, rn + 2); + } + + TMP_FREE; + } + else + { + /* slice region into chunks, use toom42 on all chunks except possibly + the last: + + AAABBBCC.. + .AAABBBCC. + ..AAABBBCC + */ + + TMP_DECL; + TMP_MARK; + + scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn)); + + /* first chunk (marked A in the above diagram) */ + mpn_toom42_mulmid (rp, ap, bp, bn, scratch); + + /* remaining chunks (B, C, etc) */ + rn -= bn; + + while (rn >= bn) + { + mp_limb_t t0, t1, cy; + ap += bn, rp += bn; + t0 = rp[0], t1 = rp[1]; + mpn_toom42_mulmid (rp, ap, bp, bn, scratch); + ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ + MPN_INCR_U (rp + 1, bn + 1, t1 + cy); + rn -= bn; + } + + TMP_FREE; + + if (rn) + { + /* last remaining chunk */ + mp_limb_t t0, t1, cy; + ap += bn, rp += bn; + t0 = rp[0], t1 = rp[1]; + mpn_mulmid (rp, ap, rn + bn - 1, bp, bn); + ADDC_LIMB (cy, rp[0], rp[0], t0); + MPN_INCR_U (rp + 1, rn + 1, t1 + cy); + } + } +} |