aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/x86_64/k10
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-21 23:36:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-21 23:42:26 +0200
commita89a14ef5da44684a16b204e7a70460cc8c4922a (patch)
treeb23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/x86_64/k10
parent1db63fcedab0b288820d66e100b1877b1a5a8851 (diff)
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/x86_64/k10')
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_11.asm37
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_22.asm142
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/gmp-mparam.h248
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/hamdist.asm109
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/lshift.asm37
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/lshiftc.asm37
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/popcount.asm138
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/rshift.asm37
-rw-r--r--vendor/gmp-6.3.0/mpn/x86_64/k10/sec_tabselect.asm37
9 files changed, 822 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_11.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_11.asm
new file mode 100644
index 0000000..4723093
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_11.asm
@@ -0,0 +1,37 @@
+dnl AMD64 mpn_gcd_11.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_11)
+include_mpn(`x86_64/core2/gcd_11.asm')
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_22.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_22.asm
new file mode 100644
index 0000000..f58b4cc
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/gcd_22.asm
@@ -0,0 +1,142 @@
+dnl AMD64 mpn_gcd_22. Assumes useful bsf, useless shrd, no tzcnt, no shlx.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit
+C AMD K8,K9 ?
+C AMD K10 7.4
+C AMD bd1 9.9
+C AMD bd2 ?
+C AMD bd3 ?
+C AMD bd4 ?
+C AMD bt1 ?
+C AMD bt2 ?
+C AMD zn1 ?
+C AMD zn2 ?
+C Intel P4 ?
+C Intel CNR ?
+C Intel PNR ?
+C Intel NHM 9.2
+C Intel WSM 9.0
+C Intel SBR ?
+C Intel IBR ?
+C Intel HWL ?
+C Intel BWL ?
+C Intel SKL ?
+C Intel atom ?
+C Intel SLM ?
+C Intel GLM ?
+C Intel GLM+ ?
+C VIA nano ?
+
+
+define(`u1', `%rdi')
+define(`u0', `%rsi')
+define(`v1', `%rdx')
+define(`v0_param', `%rcx')
+
+define(`v0', `%rax')
+define(`cnt', `%rcx')
+
+define(`s0', `%r8')
+define(`s1', `%r9')
+define(`t0', `%r10')
+define(`t1', `%r11')
+
+dnl ABI_SUPPORT(DOS64) C returns mp_double_limb_t in memory
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_gcd_22)
+ FUNC_ENTRY(4)
+ mov v0_param, v0
+
+ ALIGN(16)
+L(top): mov v0, t0
+ sub u0, t0
+ jz L(lowz) C jump when low limb result = 0
+ mov v1, t1
+ sbb u1, t1
+
+ mov u0, s0
+ mov u1, s1
+
+ bsf t0, cnt
+
+ sub v0, u0
+ sbb v1, u1
+
+L(bck): cmovc t0, u0 C u = |u - v|
+ cmovnc u1, t1 C u = |u - v|
+ cmovc s0, v0 C v = min(u,v)
+ cmovc s1, v1 C v = min(u,v)
+
+ shr R8(cnt), u0
+ mov t1, u1
+ shr R8(cnt), u1
+ neg cnt
+ shl R8(cnt), t1
+ or t1, u0
+
+ test u1, u1
+ jnz L(top)
+ test v1, v1
+ jnz L(top)
+
+L(gcd_11):
+ mov v0, %rdi
+C mov u0, %rsi
+ TCALL( mpn_gcd_11)
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ mov v1, t0
+ sub u1, t0
+ je L(end)
+
+ xor t1, t1
+ mov u0, s0
+ mov u1, s1
+ bsf t0, cnt
+ mov u1, u0
+ xor u1, u1
+ sub v1, u0
+ jmp L(bck)
+
+L(end): C mov v0, %rax
+ C mov v1, %rdx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/x86_64/k10/gmp-mparam.h
new file mode 100644
index 0000000..349bace
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/gmp-mparam.h
@@ -0,0 +1,248 @@
+/* AMD K10 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#if 0
+#undef mpn_sublsh_n
+#define mpn_sublsh_n(rp,up,vp,n,c) \
+ (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c)) \
+ : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
+#endif
+
+/* 3200-3600 MHz K10 Thuban */
+/* FFT tuning limit = 427,161,280 */
+/* Generated by tuneup.c, 2019-10-22, gcc 8.3 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 17
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 28
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
+
+#define DIV_1_VS_MUL_1_PERCENT 324
+
+#define MUL_TOOM22_THRESHOLD 27
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 232
+#define MUL_TOOM6H_THRESHOLD 363
+#define MUL_TOOM8H_THRESHOLD 478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 155
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 145
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 160
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 142
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 117
+#define SQR_TOOM4_THRESHOLD 280
+#define SQR_TOOM6_THRESHOLD 446
+#define SQR_TOOM8_THRESHOLD 547
+
+#define MULMID_TOOM42_THRESHOLD 34
+
+#define MULMOD_BNM1_THRESHOLD 15
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define MUL_FFT_MODF_THRESHOLD 530 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 530, 5}, { 24, 6}, { 13, 5}, { 27, 6}, \
+ { 27, 7}, { 14, 6}, { 29, 7}, { 15, 6}, \
+ { 31, 7}, { 29, 8}, { 15, 7}, { 32, 8}, \
+ { 17, 7}, { 36, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \
+ { 25, 7}, { 51, 8}, { 29, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
+ { 51, 9}, { 27, 8}, { 55,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 35, 8}, { 71, 9}, \
+ { 39, 8}, { 81, 9}, { 43,10}, { 23, 9}, \
+ { 55,11}, { 15,10}, { 31, 9}, { 71,10}, \
+ { 39, 9}, { 87,10}, { 47, 9}, { 99,10}, \
+ { 55,11}, { 31,10}, { 87,11}, { 47,10}, \
+ { 111,12}, { 31,11}, { 63,10}, { 143,11}, \
+ { 79,10}, { 167,11}, { 95,10}, { 191,11}, \
+ { 111,12}, { 63,11}, { 143,10}, { 287,11}, \
+ { 159,12}, { 95,11}, { 207,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,12}, { 159,11}, { 319,10}, \
+ { 639,11}, { 335,10}, { 671,11}, { 351,10}, \
+ { 703,12}, { 191,11}, { 383,10}, { 767,11}, \
+ { 415,12}, { 223,11}, { 447,13}, { 127,12}, \
+ { 255,11}, { 543,12}, { 287,11}, { 575,10}, \
+ { 1151,11}, { 607,12}, { 319,11}, { 671,12}, \
+ { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
+ { 767,12}, { 415,11}, { 831,12}, { 447,14}, \
+ { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \
+ { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \
+ { 703,11}, { 1407,12}, { 735,13}, { 383,12}, \
+ { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1343,13}, { 703,12}, { 1471,14}, \
+ { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \
+ { 1663,13}, { 959,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1471,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,14}, { 1279,13}, { 2559,14}, { 1407,15}, \
+ { 767,14}, { 1791,16}, { 511,15}, { 1023,14}, \
+ { 2431,15}, { 1279,14}, { 2943,15}, { 1535,14}, \
+ { 3199,15}, { 1791,14}, { 3583,16}, { 1023,15}, \
+ { 2047,14}, { 4223,15}, { 2303,14}, { 4863,15}, \
+ { 2559,14}, { 5247,15}, { 2815,16}, { 1535,15}, \
+ { 3071,14}, { 6271,15}, { 3327,14}, { 6911,15}, \
+ { 3583,17}, { 1023,16}, { 2047,15}, { 4351,14}, \
+ { 8959,15}, { 4863,16}, { 2559,15}, { 5887,14}, \
+ { 11775,16}, { 3071,15}, { 6911,16}, { 3583,15}, \
+ { 7167,17}, { 2047,16}, { 4095,15}, { 8959,16}, \
+ { 4607,15}, { 9983,16}, { 5631,15}, { 11775,17}, \
+ { 3071,16}, { 6143,15}, { 12543,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 207
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 476, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 29, 7}, { 28, 8}, \
+ { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \
+ { 23, 7}, { 47, 8}, { 29, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
+ { 49, 9}, { 27, 8}, { 55,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 43,10}, { 23, 9}, \
+ { 55,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 83,10}, { 47, 9}, { 95,10}, \
+ { 55,11}, { 31,10}, { 79,11}, { 47,10}, \
+ { 103,12}, { 31,11}, { 63,10}, { 135,11}, \
+ { 79,10}, { 167,11}, { 111,12}, { 63,11}, \
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319,12}, { 95,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 399,13}, \
+ { 63,12}, { 127,11}, { 255,10}, { 511,11}, \
+ { 271,10}, { 543,11}, { 287,10}, { 575,12}, \
+ { 159,11}, { 319,10}, { 639,11}, { 335,10}, \
+ { 671,11}, { 351,10}, { 703,11}, { 367,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,12}, { 223,11}, { 447,13}, { 127,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 543,12}, \
+ { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \
+ { 319,11}, { 639,10}, { 1279,11}, { 671,12}, \
+ { 351,11}, { 703,10}, { 1407,13}, { 191,12}, \
+ { 383,11}, { 799,12}, { 415,11}, { 831,12}, \
+ { 447,14}, { 127,13}, { 255,12}, { 511,11}, \
+ { 1023,12}, { 543,11}, { 1087,12}, { 575,11}, \
+ { 1151,12}, { 607,13}, { 319,12}, { 639,11}, \
+ { 1279,12}, { 671,11}, { 1343,12}, { 703,11}, \
+ { 1407,12}, { 735,13}, { 383,12}, { 767,11}, \
+ { 1535,12}, { 831,11}, { 1663,13}, { 447,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1087,13}, \
+ { 575,12}, { 1215,13}, { 639,12}, { 1343,13}, \
+ { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \
+ { 1535,13}, { 831,12}, { 1727,13}, { 895,12}, \
+ { 1791,13}, { 959,15}, { 255,14}, { 511,13}, \
+ { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1471,14}, { 767,13}, { 1727,14}, { 895,13}, \
+ { 1791,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,13}, { 2303,14}, { 1279,13}, { 2559,14}, \
+ { 1407,15}, { 767,14}, { 1791,16}, { 511,15}, \
+ { 1023,14}, { 2303,15}, { 1279,14}, { 2815,15}, \
+ { 1535,14}, { 3199,15}, { 1791,16}, { 1023,15}, \
+ { 2047,14}, { 4223,15}, { 2303,14}, { 4863,15}, \
+ { 2559,14}, { 5247,15}, { 2815,16}, { 1535,15}, \
+ { 3071,14}, { 6271,15}, { 3327,14}, { 6911,17}, \
+ { 1023,16}, { 2047,15}, { 4351,14}, { 8959,15}, \
+ { 4863,16}, { 2559,15}, { 5887,14}, { 11775,16}, \
+ { 3071,15}, { 6911,16}, { 3583,15}, { 7679,17}, \
+ { 2047,16}, { 4095,15}, { 8959,16}, { 4607,15}, \
+ { 9983,16}, { 5119,15}, { 10495,16}, { 5631,15}, \
+ { 11775,17}, { 3071,16}, { 6143,15}, { 12287,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 224
+#define SQR_FFT_THRESHOLD 5568
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 61
+#define MULLO_MUL_N_THRESHOLD 14281
+#define SQRLO_BASECASE_THRESHOLD 9
+#define SQRLO_DC_THRESHOLD 0 /* never mpn_sqrlo_basecase */
+#define SQRLO_SQR_THRESHOLD 10950
+
+#define DC_DIV_QR_THRESHOLD 54
+#define DC_DIVAPPR_Q_THRESHOLD 238
+#define DC_BDIV_QR_THRESHOLD 54
+#define DC_BDIV_Q_THRESHOLD 42
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 252
+#define INV_APPR_THRESHOLD 230
+
+#define BINV_NEWTON_THRESHOLD 327
+#define REDC_1_TO_REDC_2_THRESHOLD 25
+#define REDC_2_TO_REDC_N_THRESHOLD 67
+
+#define MU_DIV_QR_THRESHOLD 1620
+#define MU_DIVAPPR_Q_THRESHOLD 1620
+#define MUPI_DIV_QR_THRESHOLD 104
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 1652
+
+#define POWM_SEC_TABLE 1,22,321,473,2144
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 248
+#define SET_STR_PRECOMPUTE_THRESHOLD 1304
+
+#define FAC_DSC_THRESHOLD 470
+#define FAC_ODD_THRESHOLD 25
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD2_DIV1_METHOD 5 /* 8.38% faster than 4 */
+#define HGCD_THRESHOLD 115
+#define HGCD_APPR_THRESHOLD 146
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 535
+#define GCDEXT_DC_THRESHOLD 460
+#define JACOBI_BASE_METHOD 1 /* 0.90% faster than 4 */
+
+/* Tuneup completed successfully, took 448763 seconds */
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/hamdist.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/hamdist.asm
new file mode 100644
index 0000000..f70494a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/hamdist.asm
@@ -0,0 +1,109 @@
+dnl AMD64 mpn_hamdist -- hamming distance.
+
+dnl Copyright 2008, 2010-2012, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 -
+C AMD K10 2.0 =
+C AMD bd1 ~4.4 =
+C AMD bd2 ~4.4 =
+C AMD bd3
+C AMD bd4
+C AMD bobcat 7.55 =
+C AMD jaguar 2.52 -
+C Intel P4 -
+C Intel core2 -
+C Intel NHM 2.03 +
+C Intel SBR 2.01 +
+C Intel IBR 1.96 +
+C Intel HWL 1.64 =
+C Intel BWL 1.56 -
+C Intel SKL 1.52 =
+C Intel atom
+C Intel SLM 3.0 -
+C VIA nano
+
+define(`ap', `%rdi')
+define(`bp', `%rsi')
+define(`n', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_hamdist)
+ FUNC_ENTRY(3)
+ mov (ap), %r8
+ xor (bp), %r8
+
+ lea (ap,n,8), ap C point at A operand end
+ lea (bp,n,8), bp C point at B operand end
+ neg n
+
+ test $1, R8(n)
+ jz L(2)
+
+L(1): .byte 0xf3,0x49,0x0f,0xb8,0xc0 C popcnt %r8, %rax
+ xor R32(%r10), R32(%r10)
+ inc n
+ js L(top)
+ FUNC_EXIT()
+ ret
+
+ ALIGN(16)
+L(2): mov 8(ap,n,8), %r9
+ .byte 0xf3,0x49,0x0f,0xb8,0xc0 C popcnt %r8, %rax
+ xor 8(bp,n,8), %r9
+ .byte 0xf3,0x4d,0x0f,0xb8,0xd1 C popcnt %r9, %r10
+ add $2, n
+ js L(top)
+ lea (%r10, %rax), %rax
+ FUNC_EXIT()
+ ret
+
+ ALIGN(16)
+L(top): mov (ap,n,8), %r8
+ lea (%r10, %rax), %rax
+ mov 8(ap,n,8), %r9
+ xor (bp,n,8), %r8
+ xor 8(bp,n,8), %r9
+ .byte 0xf3,0x49,0x0f,0xb8,0xc8 C popcnt %r8, %rcx
+ lea (%rcx, %rax), %rax
+ .byte 0xf3,0x4d,0x0f,0xb8,0xd1 C popcnt %r9, %r10
+ add $2, n
+ js L(top)
+
+ lea (%r10, %rax), %rax
+ FUNC_EXIT()
+ ret
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/lshift.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/lshift.asm
new file mode 100644
index 0000000..cadf9b9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/lshift.asm
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_lshift optimised for AMD K10.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/lshiftc.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/lshiftc.asm
new file mode 100644
index 0000000..48a92e5
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/lshiftc.asm
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_lshiftc optimised for AMD K10.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshiftc)
+include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/popcount.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/popcount.asm
new file mode 100644
index 0000000..3814aea
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/popcount.asm
@@ -0,0 +1,138 @@
+dnl AMD64 mpn_popcount -- population count.
+
+dnl Copyright 2008, 2010-2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 n/a
+C AMD K10 1.125
+C Intel P4 n/a
+C Intel core2 n/a
+C Intel corei 1.25
+C Intel atom n/a
+C VIA nano n/a
+
+C * The zero-offset of popcount is misassembled to the offset-less form, which
+C is one byte shorter and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetBSD cannot handle the POPCNT insn,
+C which is the main reason for our usage of '.byte'.
+
+C TODO
+C * Improve switching code, the current code sucks.
+
+define(`up', `%rdi')
+define(`n', `%rsi')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_popcount)
+ FUNC_ENTRY(2)
+
+ifelse(1,1,`
+ lea (up,n,8), up
+
+C mov R32(n), R32(%rcx)
+C neg R32(%rcx)
+ imul $-1, R32(n), R32(%rcx)
+ and $8-1, R32(%rcx)
+
+ neg n
+
+ mov R32(%rcx), R32(%rax)
+ neg %rax
+ lea (up,%rax,8),up
+
+ xor R32(%rax), R32(%rax)
+
+ lea (%rcx,%rcx,4), %rcx
+
+ lea L(top)(%rip), %rdx
+ lea (%rdx,%rcx,2), %rdx
+ jmp *%rdx
+',`
+ lea (up,n,8), up
+
+ mov R32(n), R32(%rcx)
+ neg R32(%rcx)
+ and $8-1, R32(%rcx)
+
+ neg n
+
+ mov R32(%rcx), R32(%rax)
+ shl $3, R32(%rax)
+ sub %rax, up
+
+ xor R32(%rax), R32(%rax)
+
+C add R32(%rcx), R32(%rcx) C 2x
+C lea (%rcx,%rcx,4), %rcx C 10x
+ imul $10, R32(%rcx)
+
+ lea L(top)(%rip), %rdx
+ add %rcx, %rdx
+ jmp *%rdx
+')
+
+ ALIGN(32)
+L(top):
+C 0 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x00 C popcnt 0(up,n,8), %r8
+ add %r8, %rax
+C 7 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x08 C popcnt 8(up,n,8), %r9
+ add %r9, %rax
+C 6 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x10 C popcnt 16(up,n,8), %r8
+ add %r8, %rax
+C 5 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x18 C popcnt 24(up,n,8), %r9
+ add %r9, %rax
+C 4 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x20 C popcnt 32(up,n,8), %r8
+ add %r8, %rax
+C 3 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x28 C popcnt 40(up,n,8), %r9
+ add %r9, %rax
+C 2 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x30 C popcnt 48(up,n,8), %r8
+ add %r8, %rax
+C 1 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x38 C popcnt 56(up,n,8), %r9
+ add %r9, %rax
+
+ add $8, n
+ js L(top)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/rshift.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/rshift.asm
new file mode 100644
index 0000000..249051a
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/rshift.asm
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_rshift optimised for AMD K10.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
diff --git a/vendor/gmp-6.3.0/mpn/x86_64/k10/sec_tabselect.asm b/vendor/gmp-6.3.0/mpn/x86_64/k10/sec_tabselect.asm
new file mode 100644
index 0000000..e436034
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/x86_64/k10/sec_tabselect.asm
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_sec_tabselect.
+
+dnl Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_sec_tabselect)
+include_mpn(`x86_64/fastsse/sec_tabselect.asm')