aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/pa64
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/pa64')
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/README78
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/addmul_1.asm693
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/aors_n.asm130
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/aorslsh1_n.asm228
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/gmp-mparam.h247
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/lshift.asm114
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/mul_1.asm646
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/rshift.asm111
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/sqr_diagonal.asm191
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/submul_1.asm700
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/udiv.asm125
-rw-r--r--vendor/gmp-6.3.0/mpn/pa64/umul.asm97
12 files changed, 3360 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/pa64/README b/vendor/gmp-6.3.0/mpn/pa64/README
new file mode 100644
index 0000000..a51ce02
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/README
@@ -0,0 +1,78 @@
+Copyright 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+This directory contains mpn functions for 64-bit PA-RISC 2.0.
+
+PIPELINE SUMMARY
+
+The PA8x00 processors have an orthogonal 4-way out-of-order pipeline. Each
+cycle two ALU operations and two MEM operations can issue, but just one of the
+MEM operations may be a store. The two ALU operations can be almost any
+combination of non-memory operations. Unlike every other processor, integer
+and fp operations are completely equal here; they both count as just ALU
+operations.
+
+Unfortunately, some operations cause hickups in the pipeline. Combining
+carry-consuming operations like ADD,DC with operations that does not set carry
+like ADD,L cause long delays. Skip operations also seem to cause hickups. If
+several ADD,DC are issued consecutively, or if plain carry-generating ADD feed
+ADD,DC, stalling does not occur. We can effectively issue two ADD,DC
+operations/cycle.
+
+Latency scheduling is not as important as making sure to have a mix of ALU and
+MEM operations, but for full pipeline utilization, it is still a good idea to
+do some amount of latency scheduling.
+
+Like for all other processors, RAW memory scheduling is critically important.
+Since integer multiplication takes place in the floating-point unit, the GMP
+code needs to handle this problem frequently.
+
+STATUS
+
+* mpn_lshift and mpn_rshift run at 1.5 cycles/limb on PA8000 and at 1.0
+ cycles/limb on PA8500. With latency scheduling, the numbers could
+ probably be improved to 1.0 cycles/limb for all PA8x00 chips.
+
+* mpn_add_n and mpn_sub_n run at 2.0 cycles/limb on PA8000 and at about
+ 1.6875 cycles/limb on PA8500. With latency scheduling, this could
+ probably be improved to get close to 1.5 cycles/limb. A problem is the
+ stalling of carry-inputting instructions after instructions that do not
+ write to carry.
+
+* mpn_mul_1, mpn_addmul_1, and mpn_submul_1 run at between 5.625 and 6.375
+ on PA8500 and later, and about a cycle/limb slower on older chips. The
+ code uses ADD,DC for adjacent limbs, and relies heavily on reordering.
+
+
+REFERENCES
+
+Hewlett Packard, "64-Bit Runtime Architecture for PA-RISC 2.0", version 3.3,
+October 1997.
diff --git a/vendor/gmp-6.3.0/mpn/pa64/addmul_1.asm b/vendor/gmp-6.3.0/mpn/pa64/addmul_1.asm
new file mode 100644
index 0000000..2cb9af9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/addmul_1.asm
@@ -0,0 +1,693 @@
+dnl HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 8000,8200: 7
+C 8500,8600,8700: 6.375
+
+C The feed-in and wind-down code has not yet been scheduled. Many cycles
+C could be saved there per call.
+
+C DESCRIPTION:
+C The main loop "BIG" is 4-way unrolled, mainly to allow
+C effective use of ADD,DC. Delays in moving data via the cache from the FP
+C registers to the IU registers, have demanded a deep software pipeline, and
+C a lot of stack slots for partial products in flight.
+C
+C CODE STRUCTURE:
+C save-some-registers
+C do 0, 1, 2, or 3 limbs
+C if done, restore-some-regs and return
+C save-many-regs
+C do 4, 8, ... limb
+C restore-all-regs
+
+C STACK LAYOUT:
+C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the
+C slots marked FREE, as well as some slots in the caller's "frame marker".
+C
+C -00 <- r30
+C -08 FREE
+C -10 tmp
+C -18 tmp
+C -20 tmp
+C -28 tmp
+C -30 tmp
+C -38 tmp
+C -40 tmp
+C -48 tmp
+C -50 tmp
+C -58 tmp
+C -60 tmp
+C -68 tmp
+C -70 tmp
+C -78 tmp
+C -80 tmp
+C -88 tmp
+C -90 FREE
+C -98 FREE
+C -a0 FREE
+C -a8 FREE
+C -b0 r13
+C -b8 r12
+C -c0 r11
+C -c8 r10
+C -d0 r8
+C -d8 r8
+C -e0 r7
+C -e8 r6
+C -f0 r5
+C -f8 r4
+C -100 r3
+C Previous frame:
+C [unused area]
+C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS:
+define(`rp',`%r26') C
+define(`up',`%r25') C
+define(`n',`%r24') C
+define(`vlimb',`%r23') C
+
+define(`climb',`%r23') C
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_addmul_1)
+
+ifdef(`HAVE_ABI_2_0w',
+` std vlimb, -0x38(%r30) C store vlimb into "home" slot
+')
+ std,ma %r3, 0x100(%r30)
+ std %r4, -0xf8(%r30)
+ std %r5, -0xf0(%r30)
+ ldo 0(%r0), climb C clear climb
+ fldd -0x138(%r30), %fr8 C put vlimb in fp register
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19') C
+
+define(`m032',`%r20') C
+define(`m096',`%r21') C
+
+define(`p000a',`%r22') C
+define(`p064a',`%r29') C
+
+define(`s000',`%r31') C
+
+define(`ma000',`%r4') C
+define(`ma064',`%r20') C
+
+define(`r000',`%r3') C
+
+ extrd,u n, 63, 2, %r5
+ cmpb,= %r5, %r0, L(BIG)
+ nop
+
+ fldd 0(up), %fr4
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ addib,<> -1, %r5, L(two_or_more)
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+LDEF(one)
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldd -0x80(%r30), p000a
+ b L(0_one_out)
+ ldd -0x68(%r30), p064a
+
+LDEF(two_or_more)
+ fldd 0(up), %fr4
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ ldd -0x78(%r30), p032a1
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ ldd -0x70(%r30), p032a2
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ ldd -0x80(%r30), p000a
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ ldd -0x68(%r30), p064a
+ addib,<> -1, %r5, L(three_or_more)
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+LDEF(two)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+ b L(0_two_out)
+ depd m096, 31, 32, ma064
+
+LDEF(three_or_more)
+ fldd 0(up), %fr4
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+C addib,= -1, %r5, L(0_out)
+ depd m096, 31, 32, ma064
+LDEF(loop0)
+C xmpyu %fr8R, %fr4L, %fr22
+C xmpyu %fr8L, %fr4R, %fr23
+C ldd -0x78(%r30), p032a1
+C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+C
+C xmpyu %fr8R, %fr4R, %fr24
+C xmpyu %fr8L, %fr4L, %fr25
+C ldd -0x70(%r30), p032a2
+C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+C
+C ldo 8(rp), rp
+C add climb, p000a, s000
+C ldd -0x80(%r30), p000a
+C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+C
+C add,dc p064a, %r0, climb
+C ldo 8(up), up
+C ldd -0x68(%r30), p064a
+C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+C
+C add ma000, s000, s000
+C add,dc ma064, climb, climb
+C fldd 0(up), %fr4
+C
+C add r000, s000, s000
+C add,dc %r0, climb, climb
+C std s000, -8(rp)
+C
+C add p032a1, p032a2, m032
+C add,dc %r0, %r0, m096
+C
+C depd,z m032, 31, 32, ma000
+C extrd,u m032, 31, 32, ma064
+C ldd 0(rp), r000
+C addib,<> -1, %r5, L(loop0)
+C depd m096, 31, 32, ma064
+LDEF(0_out)
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ ldd -0x78(%r30), p032a1
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ ldd -0x70(%r30), p032a2
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ ldo 8(rp), rp
+ add climb, p000a, s000
+ ldd -0x80(%r30), p000a
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ add,dc p064a, %r0, climb
+ ldd -0x68(%r30), p064a
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ add r000, s000, s000
+ add,dc %r0, climb, climb
+ std s000, -8(rp)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+ depd m096, 31, 32, ma064
+LDEF(0_two_out)
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldo 8(rp), rp
+ add climb, p000a, s000
+ ldd -0x80(%r30), p000a
+ add,dc p064a, %r0, climb
+ ldd -0x68(%r30), p064a
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ add r000, s000, s000
+ add,dc %r0, climb, climb
+ std s000, -8(rp)
+LDEF(0_one_out)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+ depd m096, 31, 32, ma064
+
+ add climb, p000a, s000
+ add,dc p064a, %r0, climb
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ add r000, s000, s000
+ add,dc %r0, climb, climb
+ std s000, 0(rp)
+
+ cmpib,>= 4, n, L(done)
+ ldo 8(rp), rp
+
+C 4-way unrolled code.
+
+LDEF(BIG)
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19') C
+define(`p096b1',`%r20') C
+define(`p096b2',`%r21') C
+define(`p160c1',`%r22') C
+define(`p160c2',`%r29') C
+define(`p224d1',`%r31') C
+define(`p224d2',`%r3') C
+ C
+define(`m032',`%r4') C
+define(`m096',`%r5') C
+define(`m160',`%r6') C
+define(`m224',`%r7') C
+define(`m288',`%r8') C
+ C
+define(`p000a',`%r1') C
+define(`p064a',`%r19') C
+define(`p064b',`%r20') C
+define(`p128b',`%r21') C
+define(`p128c',`%r22') C
+define(`p192c',`%r29') C
+define(`p192d',`%r31') C
+define(`p256d',`%r3') C
+ C
+define(`s000',`%r10') C
+define(`s064',`%r11') C
+define(`s128',`%r12') C
+define(`s192',`%r13') C
+ C
+define(`ma000',`%r9') C
+define(`ma064',`%r4') C
+define(`ma128',`%r5') C
+define(`ma192',`%r6') C
+define(`ma256',`%r7') C
+ C
+define(`r000',`%r1') C
+define(`r064',`%r19') C
+define(`r128',`%r20') C
+define(`r192',`%r21') C
+
+ std %r6, -0xe8(%r30)
+ std %r7, -0xe0(%r30)
+ std %r8, -0xd8(%r30)
+ std %r9, -0xd0(%r30)
+ std %r10, -0xc8(%r30)
+ std %r11, -0xc0(%r30)
+ std %r12, -0xb8(%r30)
+ std %r13, -0xb0(%r30)
+
+ifdef(`HAVE_ABI_2_0w',
+` extrd,u n, 61, 62, n C right shift 2
+',` extrd,u n, 61, 30, n C right shift 2, zero extend
+')
+
+LDEF(4_or_more)
+ fldd 0(up), %fr4
+ fldd 8(up), %fr5
+ fldd 16(up), %fr6
+ fldd 24(up), %fr7
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ xmpyu %fr8R, %fr5L, %fr24
+ xmpyu %fr8L, %fr5R, %fr25
+ xmpyu %fr8R, %fr6L, %fr26
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr7L, %fr28
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ xmpyu %fr8R, %fr4R, %fr30
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+ xmpyu %fr8R, %fr5R, %fr22
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+ xmpyu %fr8R, %fr6R, %fr24
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+ xmpyu %fr8R, %fr7R, %fr26
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ addib,<> -1, n, L(8_or_more)
+ xmpyu %fr8L, %fr7L, %fr27
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldd -0x38(%r30), p096b1
+ ldd -0x30(%r30), p096b2
+ ldd -0x58(%r30), p160c1
+ ldd -0x50(%r30), p160c2
+ ldd -0x18(%r30), p224d1
+ ldd -0x10(%r30), p224d2
+ b L(end1)
+ nop
+
+LDEF(8_or_more)
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ ldo 32(up), up
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ fldd 0(up), %fr4
+ fldd 8(up), %fr5
+ fldd 16(up), %fr6
+ fldd 24(up), %fr7
+ xmpyu %fr8R, %fr4L, %fr22
+ ldd -0x78(%r30), p032a1
+ xmpyu %fr8L, %fr4R, %fr23
+ xmpyu %fr8R, %fr5L, %fr24
+ ldd -0x70(%r30), p032a2
+ xmpyu %fr8L, %fr5R, %fr25
+ xmpyu %fr8R, %fr6L, %fr26
+ ldd -0x38(%r30), p096b1
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr7L, %fr28
+ ldd -0x30(%r30), p096b2
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ xmpyu %fr8R, %fr4R, %fr30
+ ldd -0x58(%r30), p160c1
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+ xmpyu %fr8R, %fr5R, %fr22
+ ldd -0x50(%r30), p160c2
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+ xmpyu %fr8R, %fr6R, %fr24
+ ldd -0x18(%r30), p224d1
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+ xmpyu %fr8R, %fr7R, %fr26
+ ldd -0x10(%r30), p224d2
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ addib,= -1, n, L(end2)
+ xmpyu %fr8L, %fr7L, %fr27
+LDEF(loop)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ ldo 32(up), up
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+
+ add,dc p064a, p064b, s064
+ ldd 0(rp), r000
+ add,dc p128b, p128c, s128
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+
+ add,dc p192c, p192d, s192
+ ldd 8(rp), r064
+ add,dc p256d, %r0, climb
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+
+ ldd 16(rp), r128
+ add ma000, s000, s000 C accum mid 0
+ ldd 24(rp), r192
+ add,dc ma064, s064, s064 C accum mid 1
+
+ add,dc ma128, s128, s128 C accum mid 2
+ fldd 0(up), %fr4
+ add,dc ma192, s192, s192 C accum mid 3
+ fldd 8(up), %fr5
+
+ add,dc ma256, climb, climb
+ fldd 16(up), %fr6
+ add r000, s000, s000 C accum rlimb 0
+ fldd 24(up), %fr7
+
+ add,dc r064, s064, s064 C accum rlimb 1
+ add,dc r128, s128, s128 C accum rlimb 2
+ std s000, 0(rp)
+
+ add,dc r192, s192, s192 C accum rlimb 3
+ add,dc %r0, climb, climb
+ std s064, 8(rp)
+
+ xmpyu %fr8R, %fr4L, %fr22
+ ldd -0x78(%r30), p032a1
+ xmpyu %fr8L, %fr4R, %fr23
+ std s128, 16(rp)
+
+ xmpyu %fr8R, %fr5L, %fr24
+ ldd -0x70(%r30), p032a2
+ xmpyu %fr8L, %fr5R, %fr25
+ std s192, 24(rp)
+
+ xmpyu %fr8R, %fr6L, %fr26
+ ldd -0x38(%r30), p096b1
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+
+ xmpyu %fr8R, %fr7L, %fr28
+ ldd -0x30(%r30), p096b2
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+
+ xmpyu %fr8R, %fr4R, %fr30
+ ldd -0x58(%r30), p160c1
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+
+ xmpyu %fr8R, %fr5R, %fr22
+ ldd -0x50(%r30), p160c2
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+
+ xmpyu %fr8R, %fr6R, %fr24
+ ldd -0x18(%r30), p224d1
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+
+ xmpyu %fr8R, %fr7R, %fr26
+ ldd -0x10(%r30), p224d2
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ xmpyu %fr8L, %fr7L, %fr27
+
+ addib,<> -1, n, L(loop)
+ ldo 32(rp), rp
+
+LDEF(end2)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ add,dc p064a, p064b, s064
+ ldd 0(rp), r000
+ add,dc p128b, p128c, s128
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ add,dc p192c, p192d, s192
+ ldd 8(rp), r064
+ add,dc p256d, %r0, climb
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ ldd 16(rp), r128
+ add ma000, s000, s000 C accum mid 0
+ ldd 24(rp), r192
+ add,dc ma064, s064, s064 C accum mid 1
+ add,dc ma128, s128, s128 C accum mid 2
+ add,dc ma192, s192, s192 C accum mid 3
+ add,dc ma256, climb, climb
+ add r000, s000, s000 C accum rlimb 0
+ add,dc r064, s064, s064 C accum rlimb 1
+ add,dc r128, s128, s128 C accum rlimb 2
+ std s000, 0(rp)
+ add,dc r192, s192, s192 C accum rlimb 3
+ add,dc %r0, climb, climb
+ std s064, 8(rp)
+ ldd -0x78(%r30), p032a1
+ std s128, 16(rp)
+ ldd -0x70(%r30), p032a2
+ std s192, 24(rp)
+ ldd -0x38(%r30), p096b1
+ ldd -0x30(%r30), p096b2
+ ldd -0x58(%r30), p160c1
+ ldd -0x50(%r30), p160c2
+ ldd -0x18(%r30), p224d1
+ ldd -0x10(%r30), p224d2
+ ldo 32(rp), rp
+
+LDEF(end1)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ add,dc p064a, p064b, s064
+ ldd 0(rp), r000
+ add,dc p128b, p128c, s128
+ add,dc p192c, p192d, s192
+ ldd 8(rp), r064
+ add,dc p256d, %r0, climb
+ ldd 16(rp), r128
+ add ma000, s000, s000 C accum mid 0
+ ldd 24(rp), r192
+ add,dc ma064, s064, s064 C accum mid 1
+ add,dc ma128, s128, s128 C accum mid 2
+ add,dc ma192, s192, s192 C accum mid 3
+ add,dc ma256, climb, climb
+ add r000, s000, s000 C accum rlimb 0
+ add,dc r064, s064, s064 C accum rlimb 1
+ add,dc r128, s128, s128 C accum rlimb 2
+ std s000, 0(rp)
+ add,dc r192, s192, s192 C accum rlimb 3
+ add,dc %r0, climb, climb
+ std s064, 8(rp)
+ std s128, 16(rp)
+ std s192, 24(rp)
+
+ ldd -0xb0(%r30), %r13
+ ldd -0xb8(%r30), %r12
+ ldd -0xc0(%r30), %r11
+ ldd -0xc8(%r30), %r10
+ ldd -0xd0(%r30), %r9
+ ldd -0xd8(%r30), %r8
+ ldd -0xe0(%r30), %r7
+ ldd -0xe8(%r30), %r6
+LDEF(done)
+ifdef(`HAVE_ABI_2_0w',
+` copy climb, %r28
+',` extrd,u climb, 63, 32, %r29
+ extrd,u climb, 31, 32, %r28
+')
+ ldd -0xf0(%r30), %r5
+ ldd -0xf8(%r30), %r4
+ bve (%r2)
+ ldd,mb -0x100(%r30), %r3
+EPILOGUE(mpn_addmul_1)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/aors_n.asm b/vendor/gmp-6.3.0/mpn/pa64/aors_n.asm
new file mode 100644
index 0000000..ab4536f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/aors_n.asm
@@ -0,0 +1,130 @@
+dnl HP-PA 2.0 mpn_add_n, mpn_sub_n
+
+dnl Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500. It
+dnl should be possible to reach the cache bandwidth 1.5 cycles/limb at least
+dnl with PA8500. The problem now is stalling of the first ADD,DC after LDO,
+dnl where the processor gets confused about where carry comes from.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`vp',`%r24')
+define(`n',`%r23')
+
+ifdef(`OPERATION_add_n', `
+ define(ADCSBC, `add,dc')
+ define(INITCY, `addi -1,%r22,%r0')
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADCSBC, `sub,db')
+ define(INITCY, `subi 0,%r22,%r0')
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(func_nc)
+ifdef(`HAVE_ABI_2_0w',
+` b L(com)
+ nop
+',` b L(com)
+ ldw -52(%r30), %r22
+')
+EPILOGUE()
+PROLOGUE(func)
+ ldi 0, %r22
+LDEF(com)
+ sub %r0, n, %r21
+ depw,z %r21, 30, 3, %r28 C r28 = 2 * (-n & 7)
+ depw,z %r21, 28, 3, %r21 C r21 = 8 * (-n & 7)
+ sub up, %r21, up C offset up
+ sub vp, %r21, vp C offset vp
+ sub rp, %r21, rp C offset rp
+ blr %r28, %r0 C branch into loop
+ INITCY
+
+LDEF(loop)
+ ldd 0(up), %r20
+ ldd 0(vp), %r31
+ ADCSBC %r20, %r31, %r20
+ std %r20, 0(rp)
+LDEF(7) ldd 8(up), %r21
+ ldd 8(vp), %r19
+ ADCSBC %r21, %r19, %r21
+ std %r21, 8(rp)
+LDEF(6) ldd 16(up), %r20
+ ldd 16(vp), %r31
+ ADCSBC %r20, %r31, %r20
+ std %r20, 16(rp)
+LDEF(5) ldd 24(up), %r21
+ ldd 24(vp), %r19
+ ADCSBC %r21, %r19, %r21
+ std %r21, 24(rp)
+LDEF(4) ldd 32(up), %r20
+ ldd 32(vp), %r31
+ ADCSBC %r20, %r31, %r20
+ std %r20, 32(rp)
+LDEF(3) ldd 40(up), %r21
+ ldd 40(vp), %r19
+ ADCSBC %r21, %r19, %r21
+ std %r21, 40(rp)
+LDEF(2) ldd 48(up), %r20
+ ldd 48(vp), %r31
+ ADCSBC %r20, %r31, %r20
+ std %r20, 48(rp)
+LDEF(1) ldd 56(up), %r21
+ ldd 56(vp), %r19
+ ADCSBC %r21, %r19, %r21
+ ldo 64(up), up
+ std %r21, 56(rp)
+ ldo 64(vp), vp
+ addib,> -8, n, L(loop)
+ ldo 64(rp), rp
+
+ add,dc %r0, %r0, %r29
+ifdef(`OPERATION_sub_n',`
+ subi 1, %r29, %r29
+')
+ bve (%r2)
+ifdef(`HAVE_ABI_2_0w',
+` copy %r29, %r28
+',` ldi 0, %r28
+')
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/pa64/aorslsh1_n.asm b/vendor/gmp-6.3.0/mpn/pa64/aorslsh1_n.asm
new file mode 100644
index 0000000..2a55dde
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/aorslsh1_n.asm
@@ -0,0 +1,228 @@
+dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl Copyright 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 8000,8200: 2
+C 8500,8600,8700: 1.75
+
+C TODO
+C * Write special feed-in code for each (n mod 8). (See the ia64 code.)
+C * Try to make this run at closer to 1.5 c/l.
+C * Set up register aliases (define(`u0',`%r19')).
+C * Explicitly align loop.
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`vp',`%r24')
+define(`n',`%r23')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADCSBC, `add,dc')
+ define(INITC, `ldi 0,')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+ define(ADCSBC, `sub,db')
+ define(INITC, `ldi 1,')
+ define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ifdef(`HAVE_ABI_2_0w',`
+ define(LEVEL, `.level 2.0w')
+ define(RETREG, `%r28')
+ define(CLRRET1, `dnl')
+')
+ifdef(`HAVE_ABI_2_0n',`
+ define(LEVEL, `.level 2.0')
+ define(RETREG, `%r29')
+ define(CLRRET1, `ldi 0, %r28')
+')
+
+ LEVEL
+PROLOGUE(func)
+ std,ma %r3, 0x100(%r30) C save reg
+
+ INITC %r1 C init saved cy
+
+C Primitive code for the first (n mod 8) limbs:
+ extrd,u n, 63, 3, %r22 C count for loop0
+ comib,= 0, %r22, L(unrolled) C skip loop0?
+ copy %r0, %r28
+LDEF(loop0)
+ ldd 0(vp), %r21
+ ldo 8(vp), vp
+ ldd 0(up), %r19
+ ldo 8(up), up
+ shrpd %r21, %r28, 63, %r31
+ addi -1, %r1, %r0 C restore cy
+ ADCSBC %r19, %r31, %r29
+ std %r29, 0(rp)
+ add,dc %r0, %r0, %r1 C save cy
+ copy %r21, %r28
+ addib,> -1, %r22, L(loop0)
+ ldo 8(rp), rp
+
+ addib,>= -8, n, L(unrolled)
+ addi -1, %r1, %r0 C restore cy
+
+ shrpd %r0, %r28, 63, %r28
+ ADCSBC %r0, %r28, RETREG
+ifdef(`OPERATION_sublsh1_n',
+` sub %r0, RETREG, RETREG')
+ CLRRET1
+
+ bve (%r2)
+ ldd,mb -0x100(%r30), %r3
+
+
+LDEF(unrolled)
+ std %r4, -0xf8(%r30) C save reg
+ ldd 0(vp), %r4
+ std %r5, -0xf0(%r30) C save reg
+ ldd 8(vp), %r5
+ std %r6, -0xe8(%r30) C save reg
+ ldd 16(vp), %r6
+ std %r7, -0xe0(%r30) C save reg
+
+ ldd 24(vp), %r7
+ shrpd %r4, %r28, 63, %r31
+ std %r8, -0xd8(%r30) C save reg
+ ldd 32(vp), %r8
+ shrpd %r5, %r4, 63, %r4
+ std %r9, -0xd0(%r30) C save reg
+ ldd 40(vp), %r9
+ shrpd %r6, %r5, 63, %r5
+ ldd 48(vp), %r3
+ shrpd %r7, %r6, 63, %r6
+ ldd 56(vp), %r28
+ shrpd %r8, %r7, 63, %r7
+ ldd 0(up), %r19
+ shrpd %r9, %r8, 63, %r8
+ ldd 8(up), %r20
+ shrpd %r3, %r9, 63, %r9
+ ldd 16(up), %r21
+ shrpd %r28, %r3, 63, %r3
+ ldd 24(up), %r22
+
+ nop C alignment FIXME
+ addib,<= -8, n, L(end)
+ addi -1, %r1, %r0 C restore cy
+LDEF(loop)
+ ADCSBC %r19, %r31, %r29
+ ldd 32(up), %r19
+ std %r29, 0(rp)
+ ADCSBC %r20, %r4, %r29
+ ldd 40(up), %r20
+ std %r29, 8(rp)
+ ADCSBC %r21, %r5, %r29
+ ldd 48(up), %r21
+ std %r29, 16(rp)
+ ADCSBC %r22, %r6, %r29
+ ldd 56(up), %r22
+ std %r29, 24(rp)
+ ADCSBC %r19, %r7, %r29
+ ldd 64(vp), %r4
+ std %r29, 32(rp)
+ ADCSBC %r20, %r8, %r29
+ ldd 72(vp), %r5
+ std %r29, 40(rp)
+ ADCSBC %r21, %r9, %r29
+ ldd 80(vp), %r6
+ std %r29, 48(rp)
+ ADCSBC %r22, %r3, %r29
+ std %r29, 56(rp)
+
+ add,dc %r0, %r0, %r1 C save cy
+
+ ldd 88(vp), %r7
+ shrpd %r4, %r28, 63, %r31
+ ldd 96(vp), %r8
+ shrpd %r5, %r4, 63, %r4
+ ldd 104(vp), %r9
+ shrpd %r6, %r5, 63, %r5
+ ldd 112(vp), %r3
+ shrpd %r7, %r6, 63, %r6
+ ldd 120(vp), %r28
+ shrpd %r8, %r7, 63, %r7
+ ldd 64(up), %r19
+ shrpd %r9, %r8, 63, %r8
+ ldd 72(up), %r20
+ shrpd %r3, %r9, 63, %r9
+ ldd 80(up), %r21
+ shrpd %r28, %r3, 63, %r3
+ ldd 88(up), %r22
+
+ ldo 64(vp), vp
+ ldo 64(rp), rp
+ ldo 64(up), up
+ addib,> -8, n, L(loop)
+ addi -1, %r1, %r0 C restore cy
+LDEF(end)
+ ADCSBC %r19, %r31, %r29
+ ldd 32(up), %r19
+ std %r29, 0(rp)
+ ADCSBC %r20, %r4, %r29
+ ldd 40(up), %r20
+ std %r29, 8(rp)
+ ADCSBC %r21, %r5, %r29
+ ldd 48(up), %r21
+ std %r29, 16(rp)
+ ADCSBC %r22, %r6, %r29
+ ldd 56(up), %r22
+ std %r29, 24(rp)
+ ADCSBC %r19, %r7, %r29
+ ldd -0xf8(%r30), %r4 C restore reg
+ std %r29, 32(rp)
+ ADCSBC %r20, %r8, %r29
+ ldd -0xf0(%r30), %r5 C restore reg
+ std %r29, 40(rp)
+ ADCSBC %r21, %r9, %r29
+ ldd -0xe8(%r30), %r6 C restore reg
+ std %r29, 48(rp)
+ ADCSBC %r22, %r3, %r29
+ ldd -0xe0(%r30), %r7 C restore reg
+ std %r29, 56(rp)
+
+ shrpd %r0, %r28, 63, %r28
+ ldd -0xd8(%r30), %r8 C restore reg
+ ADCSBC %r0, %r28, RETREG
+ifdef(`OPERATION_sublsh1_n',
+` sub %r0, RETREG, RETREG')
+ CLRRET1
+
+ ldd -0xd0(%r30), %r9 C restore reg
+ bve (%r2)
+ ldd,mb -0x100(%r30), %r3 C restore reg
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/pa64/gmp-mparam.h b/vendor/gmp-6.3.0/mpn/pa64/gmp-mparam.h
new file mode 100644
index 0000000..c2719c3
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/gmp-mparam.h
@@ -0,0 +1,247 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 440MHz PA8200 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD 21
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 31
+#define MUL_TOOM33_THRESHOLD 114
+#define MUL_TOOM44_THRESHOLD 179
+#define MUL_TOOM6H_THRESHOLD 222
+#define MUL_TOOM8H_THRESHOLD 296
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 130
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 229
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 54
+
+#define SQR_BASECASE_THRESHOLD 5
+#define SQR_TOOM2_THRESHOLD 58
+#define SQR_TOOM3_THRESHOLD 153
+#define SQR_TOOM4_THRESHOLD 278
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 56
+
+#define MULMOD_BNM1_THRESHOLD 15
+#define SQRMOD_BNM1_THRESHOLD 19
+
+#define POWM_SEC_TABLE 2,23,228,1084
+
+#define MUL_FFT_MODF_THRESHOLD 336 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 336, 5}, { 11, 4}, { 23, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
+ { 23, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 15, 7}, { 31, 8}, { 19, 7}, { 39, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 95,10}, { 55,11}, { 31,10}, \
+ { 63, 9}, { 127,10}, { 71, 8}, { 287,10}, \
+ { 79,11}, { 47,10}, { 95, 9}, { 191, 8}, \
+ { 383, 7}, { 767,10}, { 103, 9}, { 207, 8}, \
+ { 415, 7}, { 831,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 543, 7}, { 1087, 6}, \
+ { 2175,10}, { 143, 9}, { 287, 8}, { 575,11}, \
+ { 79, 9}, { 319, 8}, { 639, 7}, { 1279, 9}, \
+ { 335, 8}, { 671,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95,10}, { 191, 9}, { 383, 8}, \
+ { 767,10}, { 207, 9}, { 415, 8}, { 831, 7}, \
+ { 1663,11}, { 111,10}, { 223, 9}, { 447, 8}, \
+ { 895,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 543, 8}, { 1087, 7}, { 2175,10}, { 287, 9}, \
+ { 575, 8}, { 1215, 7}, { 2431,10}, { 319, 9}, \
+ { 639, 8}, { 1279,10}, { 335, 9}, { 671, 8}, \
+ { 1343, 9}, { 703, 8}, { 1407,12}, { 95,11}, \
+ { 191,10}, { 383,11}, { 207, 9}, { 831, 8}, \
+ { 1663,11}, { 223,10}, { 447, 9}, { 959,13}, \
+ { 63,12}, { 127,11}, { 255, 8}, { 2047,11}, \
+ { 271,10}, { 543, 9}, { 1087, 8}, { 2175,11}, \
+ { 287,10}, { 575, 9}, { 1215, 8}, { 2431,11}, \
+ { 319,10}, { 671, 9}, { 1343, 8}, { 2687,11}, \
+ { 351,10}, { 703, 9}, { 1471, 8}, { 2943,12}, \
+ { 191,11}, { 383, 8}, { 3071,11}, { 415,10}, \
+ { 831, 9}, { 1663,11}, { 479,10}, { 959, 9}, \
+ { 1919, 8}, { 3839,13}, { 127,12}, { 255,11}, \
+ { 543,10}, { 1087, 9}, { 2175,12}, { 287,11}, \
+ { 607,10}, { 1215, 9}, { 2431, 8}, { 4863,12}, \
+ { 319,11}, { 671,10}, { 1343,13}, { 191, 9}, \
+ { 3071,12}, { 415,11}, { 831,10}, { 1663, 8}, \
+ { 6655, 9}, { 3455,12}, { 447, 9}, { 3583,13}, \
+ { 255,12}, { 511,11}, { 1023,10}, { 2175,13}, \
+ { 319,11}, { 1279,12}, { 671,10}, { 2815,12}, \
+ { 735,10}, { 2943, 9}, { 5887,13}, { 383,12}, \
+ { 767,11}, { 1535,10}, { 3071,13}, { 447,10}, \
+ { 3583,12}, { 959,13}, { 511,12}, { 1087,13}, \
+ { 639,12}, { 1343,13}, { 767,11}, { 3071,13}, \
+ { 831,12}, { 1663,11}, { 3455,10}, { 6911,13}, \
+ { 895,14}, { 511,13}, { 1023,12}, { 2047,13}, \
+ { 1087,12}, { 2303,13}, { 1215,12}, { 2431,14}, \
+ { 639,13}, { 1279,12}, { 2559,13}, { 1343,12}, \
+ { 2687,11}, { 5375,13}, { 1407,12}, { 2815,11}, \
+ { 5631,12}, { 2943,13}, { 1535,12}, { 3199,13}, \
+ { 1663,12}, { 3327,13}, { 1727,14}, { 895,13}, \
+ { 1791,12}, { 3583,13}, { 1919,15}, { 511,14}, \
+ { 1023,13}, { 2047,12}, { 4095,14}, { 1151,13}, \
+ { 2431,14}, { 1279,13}, { 2687,14}, { 1407,13}, \
+ { 2815,12}, { 5631,15}, { 767,14}, { 1535,13}, \
+ { 3071,14}, { 1663,13}, { 3327,14}, { 1791,13}, \
+ { 3583,14}, { 1919,15}, { 1023,14}, { 2303,13}, \
+ { 4607,14}, { 2431,13}, { 4863,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 252
+#define MUL_FFT_THRESHOLD 2368
+
+#define SQR_FFT_MODF_THRESHOLD 284 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 284, 5}, { 9, 4}, { 21, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 25, 7}, { 25, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
+ { 31,10}, { 63, 8}, { 255, 7}, { 511,10}, \
+ { 71, 8}, { 287, 7}, { 575,10}, { 79,11}, \
+ { 47,10}, { 95, 9}, { 191, 8}, { 383, 7}, \
+ { 767,10}, { 103, 9}, { 207, 8}, { 415,12}, \
+ { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
+ { 543, 7}, { 1087, 8}, { 575, 7}, { 1151,11}, \
+ { 79, 8}, { 639, 7}, { 1279, 9}, { 335, 8}, \
+ { 671, 7}, { 1343,10}, { 175, 8}, { 703, 7}, \
+ { 1407,11}, { 95,10}, { 191, 9}, { 383, 8}, \
+ { 767,10}, { 207, 9}, { 415, 8}, { 831, 7}, \
+ { 1663, 9}, { 447, 8}, { 895,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 543, 8}, { 1087, 7}, \
+ { 2175, 9}, { 575, 8}, { 1151,10}, { 303, 9}, \
+ { 607, 8}, { 1215, 7}, { 2431,10}, { 319, 9}, \
+ { 639, 8}, { 1279, 9}, { 671, 8}, { 1343, 7}, \
+ { 2687,10}, { 351, 9}, { 703, 8}, { 1407,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 207,10}, { 415, 9}, { 831, 8}, { 1663,11}, \
+ { 223,10}, { 447, 9}, { 895,13}, { 63,11}, \
+ { 255,10}, { 543, 8}, { 2175,11}, { 287,10}, \
+ { 575, 9}, { 1151,10}, { 607, 9}, { 1215, 8}, \
+ { 2431,11}, { 319, 9}, { 1279,10}, { 671, 9}, \
+ { 1343, 8}, { 2687,11}, { 351,10}, { 703, 9}, \
+ { 1407,10}, { 735,12}, { 191,11}, { 383,10}, \
+ { 831, 9}, { 1663,12}, { 223,11}, { 447,10}, \
+ { 895,11}, { 479, 9}, { 1919, 8}, { 3839,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
+ { 1087, 9}, { 2175,12}, { 287,11}, { 575,10}, \
+ { 1151,11}, { 607,10}, { 1215, 9}, { 2431, 8}, \
+ { 4863,10}, { 1279,11}, { 671,10}, { 1343, 9}, \
+ { 2687,12}, { 351,11}, { 703,10}, { 1407,11}, \
+ { 735,13}, { 191, 9}, { 3071, 7}, { 12287,11}, \
+ { 799,12}, { 415,11}, { 831,10}, { 1663,12}, \
+ { 447, 8}, { 7167,12}, { 479, 9}, { 3839,14}, \
+ { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \
+ { 543,10}, { 2175, 9}, { 4607,11}, { 1215,10}, \
+ { 2431,11}, { 1279,10}, { 2559,13}, { 383,12}, \
+ { 767,11}, { 1535,12}, { 799,10}, { 3199, 9}, \
+ { 6399,12}, { 895,13}, { 511,12}, { 1023,11}, \
+ { 2047,12}, { 1087,13}, { 575,12}, { 1151,10}, \
+ { 4607,13}, { 639,12}, { 1279,11}, { 2687,14}, \
+ { 383,13}, { 767,11}, { 3071,12}, { 1599,13}, \
+ { 895,12}, { 1791,11}, { 3583,13}, { 959,15}, \
+ { 255,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1279,12}, { 2559,13}, { 1343,12}, { 2687,13}, \
+ { 1471,11}, { 5887,14}, { 767,13}, { 1535,12}, \
+ { 3071,13}, { 1599,12}, { 3199,13}, { 1663,12}, \
+ { 3327,13}, { 1727,14}, { 895,13}, { 1791,12}, \
+ { 3583,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,12}, { 4607,13}, { 2431,14}, { 1279,13}, \
+ { 2687,14}, { 1407,13}, { 2815,15}, { 767,13}, \
+ { 3199,14}, { 1663,13}, { 3327,14}, { 1791,13}, \
+ { 3583,14}, { 1919,15}, { 1023,14}, { 2047,13}, \
+ { 4095,14}, { 2303,13}, { 4607,14}, { 2431,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 257
+#define SQR_FFT_THRESHOLD 1856
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 113
+#define MULLO_MUL_N_THRESHOLD 4658
+
+#define DC_DIV_QR_THRESHOLD 123
+#define DC_DIVAPPR_Q_THRESHOLD 372
+#define DC_BDIV_QR_THRESHOLD 142
+#define DC_BDIV_Q_THRESHOLD 312
+
+#define INV_MULMOD_BNM1_THRESHOLD 58
+#define INV_NEWTON_THRESHOLD 315
+#define INV_APPR_THRESHOLD 315
+
+#define BINV_NEWTON_THRESHOLD 360
+#define REDC_1_TO_REDC_N_THRESHOLD 101
+
+#define MU_DIV_QR_THRESHOLD 979
+#define MU_DIVAPPR_Q_THRESHOLD 1142
+#define MUPI_DIV_QR_THRESHOLD 93
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 1187
+
+#define MATRIX22_STRASSEN_THRESHOLD 9
+#define HGCD_THRESHOLD 234
+#define HGCD_APPR_THRESHOLD 300
+#define HGCD_REDUCE_THRESHOLD 1553
+#define GCD_DC_THRESHOLD 684
+#define GCDEXT_DC_THRESHOLD 525
+#define JACOBI_BASE_METHOD 2
+
+#define GET_STR_DC_THRESHOLD 21
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 1951
+#define SET_STR_PRECOMPUTE_THRESHOLD 4034
diff --git a/vendor/gmp-6.3.0/mpn/pa64/lshift.asm b/vendor/gmp-6.3.0/mpn/pa64/lshift.asm
new file mode 100644
index 0000000..c0fc292
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/lshift.asm
@@ -0,0 +1,114 @@
+dnl HP-PA 2.0 mpn_lshift -- Left shift.
+
+dnl Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+define(`cnt',`%r23')
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_lshift)
+ shladd n, 3, up, up
+ shladd n, 3, rp, rp
+ subi 64, cnt, cnt
+ mtsar cnt
+ ldd -8(up), %r21
+ addib,= -1, n, L(end)
+ shrpd %r0, %r21, %sar, %r29 C compute carry out limb
+ depw,z n, 31, 3, %r28 C r28 = (size & 7)
+ sub %r0, n, %r22
+ depw,z %r22, 28, 3, %r22 C r22 = 8 * (-size & 7)
+ add up, %r22, up C offset up
+ blr %r28, %r0 C branch into jump table
+ add rp, %r22, rp C offset rp
+ b L(0)
+ nop
+ b L(1)
+ copy %r21, %r20
+ b L(2)
+ nop
+ b L(3)
+ copy %r21, %r20
+ b L(4)
+ nop
+ b L(5)
+ copy %r21, %r20
+ b L(6)
+ nop
+ b L(7)
+ copy %r21, %r20
+
+LDEF(loop)
+LDEF(0) ldd -16(up), %r20
+ shrpd %r21, %r20, %sar, %r21
+ std %r21, -8(rp)
+LDEF(7) ldd -24(up), %r21
+ shrpd %r20, %r21, %sar, %r20
+ std %r20, -16(rp)
+LDEF(6) ldd -32(up), %r20
+ shrpd %r21, %r20, %sar, %r21
+ std %r21, -24(rp)
+LDEF(5) ldd -40(up), %r21
+ shrpd %r20, %r21, %sar, %r20
+ std %r20, -32(rp)
+LDEF(4) ldd -48(up), %r20
+ shrpd %r21, %r20, %sar, %r21
+ std %r21, -40(rp)
+LDEF(3) ldd -56(up), %r21
+ shrpd %r20, %r21, %sar, %r20
+ std %r20, -48(rp)
+LDEF(2) ldd -64(up), %r20
+ shrpd %r21, %r20, %sar, %r21
+ std %r21, -56(rp)
+LDEF(1) ldd -72(up), %r21
+ ldo -64(up), up
+ shrpd %r20, %r21, %sar, %r20
+ std %r20, -64(rp)
+ addib,> -8, n, L(loop)
+ ldo -64(rp), rp
+
+LDEF(end)
+ shrpd %r21, %r0, %sar, %r21
+ std %r21, -8(rp)
+ bve (%r2)
+ifdef(`HAVE_ABI_2_0w',
+` copy %r29,%r28
+',` extrd,u %r29, 31, 32, %r28
+')
+EPILOGUE(mpn_lshift)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/mul_1.asm b/vendor/gmp-6.3.0/mpn/pa64/mul_1.asm
new file mode 100644
index 0000000..6935c23
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/mul_1.asm
@@ -0,0 +1,646 @@
+dnl HP-PA 2.0 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 8000,8200: 6.5
+C 8500,8600,8700: 5.625
+
+C The feed-in and wind-down code has not yet been scheduled. Many cycles
+C could be saved there per call.
+
+C DESCRIPTION:
+C The main loop "BIG" is 4-way unrolled, mainly to allow
+C effective use of ADD,DC. Delays in moving data via the cache from the FP
+C registers to the IU registers, have demanded a deep software pipeline, and
+C a lot of stack slots for partial products in flight.
+C
+C CODE STRUCTURE:
+C save-some-registers
+C do 0, 1, 2, or 3 limbs
+C if done, restore-some-regs and return
+C save-many-regs
+C do 4, 8, ... limb
+C restore-all-regs
+
+C STACK LAYOUT:
+C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the
+C slots marked FREE, as well as some slots in the caller's "frame marker".
+C
+C -00 <- r30
+C -08 FREE
+C -10 tmp
+C -18 tmp
+C -20 tmp
+C -28 tmp
+C -30 tmp
+C -38 tmp
+C -40 tmp
+C -48 tmp
+C -50 tmp
+C -58 tmp
+C -60 tmp
+C -68 tmp
+C -70 tmp
+C -78 tmp
+C -80 tmp
+C -88 tmp
+C -90 FREE
+C -98 FREE
+C -a0 FREE
+C -a8 FREE
+C -b0 r13
+C -b8 r12
+C -c0 r11
+C -c8 r10
+C -d0 r8
+C -d8 r8
+C -e0 r7
+C -e8 r6
+C -f0 r5
+C -f8 r4
+C -100 r3
+C Previous frame:
+C [unused area]
+C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS:
+define(`rp',`%r26') C
+define(`up',`%r25') C
+define(`n',`%r24') C
+define(`vlimb',`%r23') C
+
+define(`climb',`%r23') C
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_mul_1)
+
+ifdef(`HAVE_ABI_2_0w',
+` std vlimb, -0x38(%r30) C store vlimb into "home" slot
+')
+ std,ma %r3, 0x100(%r30)
+ std %r4, -0xf8(%r30)
+ std %r5, -0xf0(%r30)
+ ldo 0(%r0), climb C clear climb
+ fldd -0x138(%r30), %fr8 C put vlimb in fp register
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19') C
+
+define(`m032',`%r20') C
+define(`m096',`%r21') C
+
+define(`p000a',`%r22') C
+define(`p064a',`%r29') C
+
+define(`s000',`%r31') C
+
+define(`ma000',`%r4') C
+define(`ma064',`%r20') C
+
+C define(`r000',`%r3') C FIXME don't save r3 for n < 4.
+
+ extrd,u n, 63, 2, %r5
+ cmpb,= %r5, %r0, L(BIG)
+ nop
+
+ fldd 0(up), %fr4
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ addib,<> -1, %r5, L(two_or_more)
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+LDEF(one)
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldd -0x80(%r30), p000a
+ b L(0_one_out)
+ ldd -0x68(%r30), p064a
+
+LDEF(two_or_more)
+ fldd 0(up), %fr4
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ ldd -0x78(%r30), p032a1
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ ldd -0x70(%r30), p032a2
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ ldd -0x80(%r30), p000a
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ ldd -0x68(%r30), p064a
+ addib,<> -1, %r5, L(three_or_more)
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+LDEF(two)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ b L(0_two_out)
+ depd m096, 31, 32, ma064
+
+LDEF(three_or_more)
+ fldd 0(up), %fr4
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+C addib,= -1, %r5, L(0_out)
+ depd m096, 31, 32, ma064
+LDEF(loop0)
+C xmpyu %fr8R, %fr4L, %fr22
+C xmpyu %fr8L, %fr4R, %fr23
+C ldd -0x78(%r30), p032a1
+C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+C
+C xmpyu %fr8R, %fr4R, %fr24
+C xmpyu %fr8L, %fr4L, %fr25
+C ldd -0x70(%r30), p032a2
+C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+C
+C ldo 8(rp), rp
+C add climb, p000a, s000
+C ldd -0x80(%r30), p000a
+C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+C
+C add,dc p064a, %r0, climb
+C ldo 8(up), up
+C ldd -0x68(%r30), p064a
+C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+C
+C add ma000, s000, s000
+C add,dc ma064, climb, climb
+C fldd 0(up), %fr4
+C
+C std s000, -8(rp)
+C
+C add p032a1, p032a2, m032
+C add,dc %r0, %r0, m096
+C
+C depd,z m032, 31, 32, ma000
+C extrd,u m032, 31, 32, ma064
+C addib,<> -1, %r5, L(loop0)
+C depd m096, 31, 32, ma064
+LDEF(0_out)
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ ldd -0x78(%r30), p032a1
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ ldd -0x70(%r30), p032a2
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ ldo 8(rp), rp
+ add climb, p000a, s000
+ ldd -0x80(%r30), p000a
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ add,dc p064a, %r0, climb
+ ldd -0x68(%r30), p064a
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ std s000, -8(rp)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ depd m096, 31, 32, ma064
+LDEF(0_two_out)
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldo 8(rp), rp
+ add climb, p000a, s000
+ ldd -0x80(%r30), p000a
+ add,dc p064a, %r0, climb
+ ldd -0x68(%r30), p064a
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ std s000, -8(rp)
+LDEF(0_one_out)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ depd m096, 31, 32, ma064
+
+ add climb, p000a, s000
+ add,dc p064a, %r0, climb
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ std s000, 0(rp)
+
+ cmpib,>= 4, n, L(done)
+ ldo 8(rp), rp
+
+C 4-way unrolled code.
+
+LDEF(BIG)
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19') C
+define(`p096b1',`%r20') C
+define(`p096b2',`%r21') C
+define(`p160c1',`%r22') C
+define(`p160c2',`%r29') C
+define(`p224d1',`%r31') C
+define(`p224d2',`%r3') C
+ C
+define(`m032',`%r4') C
+define(`m096',`%r5') C
+define(`m160',`%r6') C
+define(`m224',`%r7') C
+define(`m288',`%r8') C
+ C
+define(`p000a',`%r1') C
+define(`p064a',`%r19') C
+define(`p064b',`%r20') C
+define(`p128b',`%r21') C
+define(`p128c',`%r22') C
+define(`p192c',`%r29') C
+define(`p192d',`%r31') C
+define(`p256d',`%r3') C
+ C
+define(`s000',`%r10') C
+define(`s064',`%r11') C
+define(`s128',`%r12') C
+define(`s192',`%r13') C
+ C
+define(`ma000',`%r9') C
+define(`ma064',`%r4') C
+define(`ma128',`%r5') C
+define(`ma192',`%r6') C
+define(`ma256',`%r7') C
+
+ std %r6, -0xe8(%r30)
+ std %r7, -0xe0(%r30)
+ std %r8, -0xd8(%r30)
+ std %r9, -0xd0(%r30)
+ std %r10, -0xc8(%r30)
+ std %r11, -0xc0(%r30)
+ std %r12, -0xb8(%r30)
+ std %r13, -0xb0(%r30)
+
+ifdef(`HAVE_ABI_2_0w',
+` extrd,u n, 61, 62, n C right shift 2
+',` extrd,u n, 61, 30, n C right shift 2, zero extend
+')
+
+LDEF(4_or_more)
+ fldd 0(up), %fr4
+ fldd 8(up), %fr5
+ fldd 16(up), %fr6
+ fldd 24(up), %fr7
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ xmpyu %fr8R, %fr5L, %fr24
+ xmpyu %fr8L, %fr5R, %fr25
+ xmpyu %fr8R, %fr6L, %fr26
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr7L, %fr28
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ xmpyu %fr8R, %fr4R, %fr30
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+ xmpyu %fr8R, %fr5R, %fr22
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+ xmpyu %fr8R, %fr6R, %fr24
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+ xmpyu %fr8R, %fr7R, %fr26
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ addib,<> -1, n, L(8_or_more)
+ xmpyu %fr8L, %fr7L, %fr27
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldd -0x38(%r30), p096b1
+ ldd -0x30(%r30), p096b2
+ ldd -0x58(%r30), p160c1
+ ldd -0x50(%r30), p160c2
+ ldd -0x18(%r30), p224d1
+ ldd -0x10(%r30), p224d2
+ b L(end1)
+ nop
+
+LDEF(8_or_more)
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ ldo 32(up), up
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ fldd 0(up), %fr4
+ fldd 8(up), %fr5
+ fldd 16(up), %fr6
+ fldd 24(up), %fr7
+ xmpyu %fr8R, %fr4L, %fr22
+ ldd -0x78(%r30), p032a1
+ xmpyu %fr8L, %fr4R, %fr23
+ xmpyu %fr8R, %fr5L, %fr24
+ ldd -0x70(%r30), p032a2
+ xmpyu %fr8L, %fr5R, %fr25
+ xmpyu %fr8R, %fr6L, %fr26
+ ldd -0x38(%r30), p096b1
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr7L, %fr28
+ ldd -0x30(%r30), p096b2
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ xmpyu %fr8R, %fr4R, %fr30
+ ldd -0x58(%r30), p160c1
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+ xmpyu %fr8R, %fr5R, %fr22
+ ldd -0x50(%r30), p160c2
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+ xmpyu %fr8R, %fr6R, %fr24
+ ldd -0x18(%r30), p224d1
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+ xmpyu %fr8R, %fr7R, %fr26
+ ldd -0x10(%r30), p224d2
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ addib,= -1, n, L(end2)
+ xmpyu %fr8L, %fr7L, %fr27
+LDEF(loop)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ ldo 32(up), up
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+
+ add,dc p064a, p064b, s064
+ add,dc p128b, p128c, s128
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+
+ add,dc p192c, p192d, s192
+ add,dc p256d, %r0, climb
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+
+ add ma000, s000, s000 C accum mid 0
+ fldd 0(up), %fr4
+ add,dc ma064, s064, s064 C accum mid 1
+ std s000, 0(rp)
+
+ add,dc ma128, s128, s128 C accum mid 2
+ fldd 8(up), %fr5
+ add,dc ma192, s192, s192 C accum mid 3
+ std s064, 8(rp)
+
+ add,dc ma256, climb, climb
+ fldd 16(up), %fr6
+ std s128, 16(rp)
+
+ xmpyu %fr8R, %fr4L, %fr22
+ ldd -0x78(%r30), p032a1
+ xmpyu %fr8L, %fr4R, %fr23
+ fldd 24(up), %fr7
+
+ xmpyu %fr8R, %fr5L, %fr24
+ ldd -0x70(%r30), p032a2
+ xmpyu %fr8L, %fr5R, %fr25
+ std s192, 24(rp)
+
+ xmpyu %fr8R, %fr6L, %fr26
+ ldd -0x38(%r30), p096b1
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+
+ xmpyu %fr8R, %fr7L, %fr28
+ ldd -0x30(%r30), p096b2
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+
+ xmpyu %fr8R, %fr4R, %fr30
+ ldd -0x58(%r30), p160c1
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+
+ xmpyu %fr8R, %fr5R, %fr22
+ ldd -0x50(%r30), p160c2
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+
+ xmpyu %fr8R, %fr6R, %fr24
+ ldd -0x18(%r30), p224d1
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+
+ xmpyu %fr8R, %fr7R, %fr26
+ ldd -0x10(%r30), p224d2
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ xmpyu %fr8L, %fr7L, %fr27
+
+ addib,<> -1, n, L(loop)
+ ldo 32(rp), rp
+
+LDEF(end2)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ add,dc p064a, p064b, s064
+ add,dc p128b, p128c, s128
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ add,dc p192c, p192d, s192
+ add,dc p256d, %r0, climb
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ add ma000, s000, s000 C accum mid 0
+ add,dc ma064, s064, s064 C accum mid 1
+ add,dc ma128, s128, s128 C accum mid 2
+ add,dc ma192, s192, s192 C accum mid 3
+ add,dc ma256, climb, climb
+ std s000, 0(rp)
+ std s064, 8(rp)
+ ldd -0x78(%r30), p032a1
+ std s128, 16(rp)
+ ldd -0x70(%r30), p032a2
+ std s192, 24(rp)
+ ldd -0x38(%r30), p096b1
+ ldd -0x30(%r30), p096b2
+ ldd -0x58(%r30), p160c1
+ ldd -0x50(%r30), p160c2
+ ldd -0x18(%r30), p224d1
+ ldd -0x10(%r30), p224d2
+ ldo 32(rp), rp
+
+LDEF(end1)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ add,dc p064a, p064b, s064
+ add,dc p128b, p128c, s128
+ add,dc p192c, p192d, s192
+ add,dc p256d, %r0, climb
+ add ma000, s000, s000 C accum mid 0
+ add,dc ma064, s064, s064 C accum mid 1
+ add,dc ma128, s128, s128 C accum mid 2
+ add,dc ma192, s192, s192 C accum mid 3
+ add,dc ma256, climb, climb
+ std s000, 0(rp)
+ std s064, 8(rp)
+ std s128, 16(rp)
+ std s192, 24(rp)
+
+ ldd -0xb0(%r30), %r13
+ ldd -0xb8(%r30), %r12
+ ldd -0xc0(%r30), %r11
+ ldd -0xc8(%r30), %r10
+ ldd -0xd0(%r30), %r9
+ ldd -0xd8(%r30), %r8
+ ldd -0xe0(%r30), %r7
+ ldd -0xe8(%r30), %r6
+LDEF(done)
+ifdef(`HAVE_ABI_2_0w',
+` copy climb, %r28
+',` extrd,u climb, 63, 32, %r29
+ extrd,u climb, 31, 32, %r28
+')
+ ldd -0xf0(%r30), %r5
+ ldd -0xf8(%r30), %r4
+ bve (%r2)
+ ldd,mb -0x100(%r30), %r3
+EPILOGUE(mpn_mul_1)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/rshift.asm b/vendor/gmp-6.3.0/mpn/pa64/rshift.asm
new file mode 100644
index 0000000..cfc242e
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/rshift.asm
@@ -0,0 +1,111 @@
+dnl HP-PA 2.0 mpn_rshift -- Right shift.
+
+dnl Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+define(`cnt',`%r23')
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_rshift)
+ mtsar cnt
+ ldd 0(up), %r21
+ addib,= -1, n, L(end)
+ shrpd %r21, %r0, %sar, %r29 C compute carry out limb
+ depw,z n, 31, 3, %r28 C r28 = (size & 7)
+ sub %r0, n, %r22
+ depw,z %r22, 28, 3, %r22 C r22 = 8 * (-size & 7)
+ sub up, %r22, up C offset up
+ blr %r28, %r0 C branch into jump table
+ sub rp, %r22, rp C offset rp
+ b L(0)
+ nop
+ b L(1)
+ copy %r21, %r20
+ b L(2)
+ nop
+ b L(3)
+ copy %r21, %r20
+ b L(4)
+ nop
+ b L(5)
+ copy %r21, %r20
+ b L(6)
+ nop
+ b L(7)
+ copy %r21, %r20
+
+LDEF(loop)
+LDEF(0) ldd 8(up), %r20
+ shrpd %r20, %r21, %sar, %r21
+ std %r21, 0(rp)
+LDEF(7) ldd 16(up), %r21
+ shrpd %r21, %r20, %sar, %r20
+ std %r20, 8(rp)
+LDEF(6) ldd 24(up), %r20
+ shrpd %r20, %r21, %sar, %r21
+ std %r21, 16(rp)
+LDEF(5) ldd 32(up), %r21
+ shrpd %r21, %r20, %sar, %r20
+ std %r20, 24(rp)
+LDEF(4) ldd 40(up), %r20
+ shrpd %r20, %r21, %sar, %r21
+ std %r21, 32(rp)
+LDEF(3) ldd 48(up), %r21
+ shrpd %r21, %r20, %sar, %r20
+ std %r20, 40(rp)
+LDEF(2) ldd 56(up), %r20
+ shrpd %r20, %r21, %sar, %r21
+ std %r21, 48(rp)
+LDEF(1) ldd 64(up), %r21
+ ldo 64(up), up
+ shrpd %r21, %r20, %sar, %r20
+ std %r20, 56(rp)
+ addib,> -8, n, L(loop)
+ ldo 64(rp), rp
+
+LDEF(end)
+ shrpd %r0, %r21, %sar, %r21
+ std %r21, 0(rp)
+ bve (%r2)
+ifdef(`HAVE_ABI_2_0w',
+` copy %r29,%r28
+',` extrd,u %r29, 31, 32, %r28
+')
+EPILOGUE(mpn_rshift)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/sqr_diagonal.asm b/vendor/gmp-6.3.0/mpn/pa64/sqr_diagonal.asm
new file mode 100644
index 0000000..f6fadc9
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/sqr_diagonal.asm
@@ -0,0 +1,191 @@
+dnl HP-PA 2.0 64-bit mpn_sqr_diagonal.
+
+dnl Copyright 2001-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl This code runs at 7.25 cycles/limb on PA8000 and 7.75 cycles/limb on
+dnl PA8500. The cache would saturate at 5 cycles/limb, so there is some room
+dnl for optimization.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+define(`p00',`%r28')
+define(`p32',`%r29')
+define(`p64',`%r31')
+define(`t0',`%r19')
+define(`t1',`%r20')
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_sqr_diagonal)
+ ldo 128(%r30),%r30
+
+ fldds,ma 8(up),%fr8
+ addib,= -1,n,L(end1)
+ nop
+ fldds,ma 8(up),%fr4
+ xmpyu %fr8l,%fr8r,%fr10
+ fstd %fr10,-120(%r30)
+ xmpyu %fr8r,%fr8r,%fr9
+ fstd %fr9,0(rp)
+ xmpyu %fr8l,%fr8l,%fr11
+ fstd %fr11,8(rp)
+ addib,= -1,n,L(end2)
+ ldo 16(rp),rp
+
+LDEF(loop)
+ fldds,ma 8(up),%fr8 C load next up limb
+ xmpyu %fr4l,%fr4r,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr4r,%fr4r,%fr5 C multiply in fp regs
+ fstd %fr5,0(rp)
+ xmpyu %fr4l,%fr4l,%fr7
+ fstd %fr7,8(rp)
+ ldd -120(%r30),p32
+ ldd -16(rp),p00 C accumulate in int regs
+ ldd -8(rp),p64
+ depd,z p32,30,31,t0
+ add t0,p00,p00
+ std p00,-16(rp)
+ extrd,u p32,32,33,t1
+ add,dc t1,p64,p64
+ std p64,-8(rp)
+ addib,= -1,n,L(exit)
+ ldo 16(rp),rp
+
+ fldds,ma 8(up),%fr4
+ xmpyu %fr8l,%fr8r,%fr10
+ fstd %fr10,-120(%r30)
+ xmpyu %fr8r,%fr8r,%fr9
+ fstd %fr9,0(rp)
+ xmpyu %fr8l,%fr8l,%fr11
+ fstd %fr11,8(rp)
+ ldd -128(%r30),p32
+ ldd -16(rp),p00
+ ldd -8(rp),p64
+ depd,z p32,30,31,t0
+ add t0,p00,p00
+ std p00,-16(rp)
+ extrd,u p32,32,33,t1
+ add,dc t1,p64,p64
+ std p64,-8(rp)
+ addib,<> -1,n,L(loop)
+ ldo 16(rp),rp
+
+LDEF(end2)
+ xmpyu %fr4l,%fr4r,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr4r,%fr4r,%fr5
+ fstd %fr5,0(rp)
+ xmpyu %fr4l,%fr4l,%fr7
+ fstd %fr7,8(rp)
+ ldd -120(%r30),p32
+ ldd -16(rp),p00
+ ldd -8(rp),p64
+ depd,z p32,30,31,t0
+ add t0,p00,p00
+ std p00,-16(rp)
+ extrd,u p32,32,33,t1
+ add,dc t1,p64,p64
+ std p64,-8(rp)
+ ldo 16(rp),rp
+ ldd -128(%r30),p32
+ ldd -16(rp),p00
+ ldd -8(rp),p64
+ depd,z p32,30,31,t0
+ add t0,p00,p00
+ std p00,-16(rp)
+ extrd,u p32,32,33,t1
+ add,dc t1,p64,p64
+ std p64,-8(rp)
+ bve (%r2)
+ ldo -128(%r30),%r30
+
+LDEF(exit)
+ xmpyu %fr8l,%fr8r,%fr10
+ fstd %fr10,-120(%r30)
+ xmpyu %fr8r,%fr8r,%fr9
+ fstd %fr9,0(rp)
+ xmpyu %fr8l,%fr8l,%fr11
+ fstd %fr11,8(rp)
+ ldd -128(%r30),p32
+ ldd -16(rp),p00
+ ldd -8(rp),p64
+ depd,z p32,31,32,t0
+ add t0,p00,p00
+ extrd,u p32,31,32,t1
+ add,dc t1,p64,p64
+ add t0,p00,p00
+ add,dc t1,p64,p64
+ std p00,-16(rp)
+ std p64,-8(rp)
+ ldo 16(rp),rp
+ ldd -120(%r30),p32
+ ldd -16(rp),p00
+ ldd -8(rp),p64
+ depd,z p32,31,32,t0
+ add t0,p00,p00
+ extrd,u p32,31,32,t1
+ add,dc t1,p64,p64
+ add t0,p00,p00
+ add,dc t1,p64,p64
+ std p00,-16(rp)
+ std p64,-8(rp)
+ bve (%r2)
+ ldo -128(%r30),%r30
+
+LDEF(end1)
+ xmpyu %fr8l,%fr8r,%fr10
+ fstd %fr10,-128(%r30)
+ xmpyu %fr8r,%fr8r,%fr9
+ fstd %fr9,0(rp)
+ xmpyu %fr8l,%fr8l,%fr11
+ fstd %fr11,8(rp)
+ ldo 16(rp),rp
+ ldd -128(%r30),p32
+ ldd -16(rp),p00
+ ldd -8(rp),p64
+ depd,z p32,31,32,t0
+ add t0,p00,p00
+ extrd,u p32,31,32,t1
+ add,dc t1,p64,p64
+ add t0,p00,p00
+ add,dc t1,p64,p64
+ std p00,-16(rp)
+ std p64,-8(rp)
+ bve (%r2)
+ ldo -128(%r30),%r30
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/submul_1.asm b/vendor/gmp-6.3.0/mpn/pa64/submul_1.asm
new file mode 100644
index 0000000..f8a1968
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/submul_1.asm
@@ -0,0 +1,700 @@
+dnl HP-PA 2.0 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 8000,8200: 7
+C 8500,8600,8700: 6.5
+
+C The feed-in and wind-down code has not yet been scheduled. Many cycles
+C could be saved there per call.
+
+C DESCRIPTION:
+C The main loop "BIG" is 4-way unrolled, mainly to allow
+C effective use of ADD,DC. Delays in moving data via the cache from the FP
+C registers to the IU registers, have demanded a deep software pipeline, and
+C a lot of stack slots for partial products in flight.
+C
+C CODE STRUCTURE:
+C save-some-registers
+C do 0, 1, 2, or 3 limbs
+C if done, restore-some-regs and return
+C save-many-regs
+C do 4, 8, ... limb
+C restore-all-regs
+
+C STACK LAYOUT:
+C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the
+C slots marked FREE, as well as some slots in the caller's "frame marker".
+C
+C -00 <- r30
+C -08 FREE
+C -10 tmp
+C -18 tmp
+C -20 tmp
+C -28 tmp
+C -30 tmp
+C -38 tmp
+C -40 tmp
+C -48 tmp
+C -50 tmp
+C -58 tmp
+C -60 tmp
+C -68 tmp
+C -70 tmp
+C -78 tmp
+C -80 tmp
+C -88 tmp
+C -90 FREE
+C -98 FREE
+C -a0 FREE
+C -a8 FREE
+C -b0 r13
+C -b8 r12
+C -c0 r11
+C -c8 r10
+C -d0 r8
+C -d8 r8
+C -e0 r7
+C -e8 r6
+C -f0 r5
+C -f8 r4
+C -100 r3
+C Previous frame:
+C [unused area]
+C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS:
+define(`rp',`%r26') C
+define(`up',`%r25') C
+define(`n',`%r24') C
+define(`vlimb',`%r23') C
+
+define(`climb',`%r23') C
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_submul_1)
+
+ifdef(`HAVE_ABI_2_0w',
+` std vlimb, -0x38(%r30) C store vlimb into "home" slot
+')
+ std,ma %r3, 0x100(%r30)
+ std %r4, -0xf8(%r30)
+ std %r5, -0xf0(%r30)
+ ldo 0(%r0), climb C clear climb
+ fldd -0x138(%r30), %fr8 C put vlimb in fp register
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19') C
+
+define(`m032',`%r20') C
+define(`m096',`%r21') C
+
+define(`p000a',`%r22') C
+define(`p064a',`%r29') C
+
+define(`s000',`%r31') C
+
+define(`ma000',`%r4') C
+define(`ma064',`%r20') C
+
+define(`r000',`%r3') C
+
+ extrd,u n, 63, 2, %r5
+ cmpb,= %r5, %r0, L(BIG)
+ nop
+
+ fldd 0(up), %fr4
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ addib,<> -1, %r5, L(two_or_more)
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+LDEF(one)
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldd -0x80(%r30), p000a
+ b L(0_one_out)
+ ldd -0x68(%r30), p064a
+
+LDEF(two_or_more)
+ fldd 0(up), %fr4
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ ldd -0x78(%r30), p032a1
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ ldd -0x70(%r30), p032a2
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ ldd -0x80(%r30), p000a
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ ldd -0x68(%r30), p064a
+ addib,<> -1, %r5, L(three_or_more)
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+LDEF(two)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+ b L(0_two_out)
+ depd m096, 31, 32, ma064
+
+LDEF(three_or_more)
+ fldd 0(up), %fr4
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+C addib,= -1, %r5, L(0_out)
+ depd m096, 31, 32, ma064
+LDEF(loop0)
+C xmpyu %fr8R, %fr4L, %fr22
+C xmpyu %fr8L, %fr4R, %fr23
+C ldd -0x78(%r30), p032a1
+C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+C
+C xmpyu %fr8R, %fr4R, %fr24
+C xmpyu %fr8L, %fr4L, %fr25
+C ldd -0x70(%r30), p032a2
+C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+C
+C ldo 8(rp), rp
+C add climb, p000a, s000
+C ldd -0x80(%r30), p000a
+C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+C
+C add,dc p064a, %r0, climb
+C ldo 8(up), up
+C ldd -0x68(%r30), p064a
+C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+C
+C add ma000, s000, s000
+C add,dc ma064, climb, climb
+C fldd 0(up), %fr4
+C
+C sub r000, s000, s000
+C sub,db %r0, climb, climb
+C sub %r0, climb, climb
+C std s000, -8(rp)
+C
+C add p032a1, p032a2, m032
+C add,dc %r0, %r0, m096
+C
+C depd,z m032, 31, 32, ma000
+C extrd,u m032, 31, 32, ma064
+C ldd 0(rp), r000
+C addib,<> -1, %r5, L(loop0)
+C depd m096, 31, 32, ma064
+LDEF(0_out)
+ ldo 8(up), up
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ ldd -0x78(%r30), p032a1
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr4R, %fr24
+ xmpyu %fr8L, %fr4L, %fr25
+ ldd -0x70(%r30), p032a2
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ ldo 8(rp), rp
+ add climb, p000a, s000
+ ldd -0x80(%r30), p000a
+ fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79
+ add,dc p064a, %r0, climb
+ ldd -0x68(%r30), p064a
+ fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ sub r000, s000, s000
+ sub,db %r0, climb, climb
+ sub %r0, climb, climb
+ std s000, -8(rp)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+ depd m096, 31, 32, ma064
+LDEF(0_two_out)
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldo 8(rp), rp
+ add climb, p000a, s000
+ ldd -0x80(%r30), p000a
+ add,dc p064a, %r0, climb
+ ldd -0x68(%r30), p064a
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ sub r000, s000, s000
+ sub,db %r0, climb, climb
+ sub %r0, climb, climb
+ std s000, -8(rp)
+LDEF(0_one_out)
+ add p032a1, p032a2, m032
+ add,dc %r0, %r0, m096
+ depd,z m032, 31, 32, ma000
+ extrd,u m032, 31, 32, ma064
+ ldd 0(rp), r000
+ depd m096, 31, 32, ma064
+
+ add climb, p000a, s000
+ add,dc p064a, %r0, climb
+ add ma000, s000, s000
+ add,dc ma064, climb, climb
+ sub r000, s000, s000
+ sub,db %r0, climb, climb
+ sub %r0, climb, climb
+ std s000, 0(rp)
+
+ cmpib,>= 4, n, L(done)
+ ldo 8(rp), rp
+
+C 4-way unrolled code.
+
+LDEF(BIG)
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19') C
+define(`p096b1',`%r20') C
+define(`p096b2',`%r21') C
+define(`p160c1',`%r22') C
+define(`p160c2',`%r29') C
+define(`p224d1',`%r31') C
+define(`p224d2',`%r3') C
+ C
+define(`m032',`%r4') C
+define(`m096',`%r5') C
+define(`m160',`%r6') C
+define(`m224',`%r7') C
+define(`m288',`%r8') C
+ C
+define(`p000a',`%r1') C
+define(`p064a',`%r19') C
+define(`p064b',`%r20') C
+define(`p128b',`%r21') C
+define(`p128c',`%r22') C
+define(`p192c',`%r29') C
+define(`p192d',`%r31') C
+define(`p256d',`%r3') C
+ C
+define(`s000',`%r10') C
+define(`s064',`%r11') C
+define(`s128',`%r12') C
+define(`s192',`%r13') C
+ C
+define(`ma000',`%r9') C
+define(`ma064',`%r4') C
+define(`ma128',`%r5') C
+define(`ma192',`%r6') C
+define(`ma256',`%r7') C
+ C
+define(`r000',`%r1') C
+define(`r064',`%r19') C
+define(`r128',`%r20') C
+define(`r192',`%r21') C
+
+ std %r6, -0xe8(%r30)
+ std %r7, -0xe0(%r30)
+ std %r8, -0xd8(%r30)
+ std %r9, -0xd0(%r30)
+ std %r10, -0xc8(%r30)
+ std %r11, -0xc0(%r30)
+ std %r12, -0xb8(%r30)
+ std %r13, -0xb0(%r30)
+
+ifdef(`HAVE_ABI_2_0w',
+` extrd,u n, 61, 62, n C right shift 2
+',` extrd,u n, 61, 30, n C right shift 2, zero extend
+')
+
+LDEF(4_or_more)
+ fldd 0(up), %fr4
+ fldd 8(up), %fr5
+ fldd 16(up), %fr6
+ fldd 24(up), %fr7
+ xmpyu %fr8R, %fr4L, %fr22
+ xmpyu %fr8L, %fr4R, %fr23
+ xmpyu %fr8R, %fr5L, %fr24
+ xmpyu %fr8L, %fr5R, %fr25
+ xmpyu %fr8R, %fr6L, %fr26
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr7L, %fr28
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ xmpyu %fr8R, %fr4R, %fr30
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+ xmpyu %fr8R, %fr5R, %fr22
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+ xmpyu %fr8R, %fr6R, %fr24
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+ xmpyu %fr8R, %fr7R, %fr26
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ addib,<> -1, n, L(8_or_more)
+ xmpyu %fr8L, %fr7L, %fr27
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ ldd -0x78(%r30), p032a1
+ ldd -0x70(%r30), p032a2
+ ldd -0x38(%r30), p096b1
+ ldd -0x30(%r30), p096b2
+ ldd -0x58(%r30), p160c1
+ ldd -0x50(%r30), p160c2
+ ldd -0x18(%r30), p224d1
+ ldd -0x10(%r30), p224d2
+ b L(end1)
+ nop
+
+LDEF(8_or_more)
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ ldo 32(up), up
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ fldd 0(up), %fr4
+ fldd 8(up), %fr5
+ fldd 16(up), %fr6
+ fldd 24(up), %fr7
+ xmpyu %fr8R, %fr4L, %fr22
+ ldd -0x78(%r30), p032a1
+ xmpyu %fr8L, %fr4R, %fr23
+ xmpyu %fr8R, %fr5L, %fr24
+ ldd -0x70(%r30), p032a2
+ xmpyu %fr8L, %fr5R, %fr25
+ xmpyu %fr8R, %fr6L, %fr26
+ ldd -0x38(%r30), p096b1
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+ xmpyu %fr8R, %fr7L, %fr28
+ ldd -0x30(%r30), p096b2
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+ xmpyu %fr8R, %fr4R, %fr30
+ ldd -0x58(%r30), p160c1
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+ xmpyu %fr8R, %fr5R, %fr22
+ ldd -0x50(%r30), p160c2
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+ xmpyu %fr8R, %fr6R, %fr24
+ ldd -0x18(%r30), p224d1
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+ xmpyu %fr8R, %fr7R, %fr26
+ ldd -0x10(%r30), p224d2
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ addib,= -1, n, L(end2)
+ xmpyu %fr8L, %fr7L, %fr27
+LDEF(loop)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ ldo 32(up), up
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+
+ add,dc p064a, p064b, s064
+ ldd 0(rp), r000
+ add,dc p128b, p128c, s128
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+
+ add,dc p192c, p192d, s192
+ ldd 8(rp), r064
+ add,dc p256d, %r0, climb
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+
+ ldd 16(rp), r128
+ add ma000, s000, s000 C accum mid 0
+ ldd 24(rp), r192
+ add,dc ma064, s064, s064 C accum mid 1
+
+ add,dc ma128, s128, s128 C accum mid 2
+ fldd 0(up), %fr4
+ add,dc ma192, s192, s192 C accum mid 3
+ fldd 8(up), %fr5
+
+ add,dc ma256, climb, climb
+ fldd 16(up), %fr6
+ sub r000, s000, s000 C accum rlimb 0
+ fldd 24(up), %fr7
+
+ sub,db r064, s064, s064 C accum rlimb 1
+ sub,db r128, s128, s128 C accum rlimb 2
+ std s000, 0(rp)
+
+ sub,db r192, s192, s192 C accum rlimb 3
+ sub,db %r0, climb, climb
+ sub %r0, climb, climb
+ std s064, 8(rp)
+
+ xmpyu %fr8R, %fr4L, %fr22
+ ldd -0x78(%r30), p032a1
+ xmpyu %fr8L, %fr4R, %fr23
+ std s128, 16(rp)
+
+ xmpyu %fr8R, %fr5L, %fr24
+ ldd -0x70(%r30), p032a2
+ xmpyu %fr8L, %fr5R, %fr25
+ std s192, 24(rp)
+
+ xmpyu %fr8R, %fr6L, %fr26
+ ldd -0x38(%r30), p096b1
+ xmpyu %fr8L, %fr6R, %fr27
+ fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
+
+ xmpyu %fr8R, %fr7L, %fr28
+ ldd -0x30(%r30), p096b2
+ xmpyu %fr8L, %fr7R, %fr29
+ fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
+
+ xmpyu %fr8R, %fr4R, %fr30
+ ldd -0x58(%r30), p160c1
+ xmpyu %fr8L, %fr4L, %fr31
+ fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
+
+ xmpyu %fr8R, %fr5R, %fr22
+ ldd -0x50(%r30), p160c2
+ xmpyu %fr8L, %fr5L, %fr23
+ fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
+
+ xmpyu %fr8R, %fr6R, %fr24
+ ldd -0x18(%r30), p224d1
+ xmpyu %fr8L, %fr6L, %fr25
+ fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
+
+ xmpyu %fr8R, %fr7R, %fr26
+ ldd -0x10(%r30), p224d2
+ fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
+ xmpyu %fr8L, %fr7L, %fr27
+
+ addib,<> -1, n, L(loop)
+ ldo 32(rp), rp
+
+LDEF(end2)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
+ add,dc p064a, p064b, s064
+ ldd 0(rp), r000
+ add,dc p128b, p128c, s128
+ fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
+ add,dc p192c, p192d, s192
+ ldd 8(rp), r064
+ add,dc p256d, %r0, climb
+ fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
+ ldd 16(rp), r128
+ add ma000, s000, s000 C accum mid 0
+ ldd 24(rp), r192
+ add,dc ma064, s064, s064 C accum mid 1
+ add,dc ma128, s128, s128 C accum mid 2
+ add,dc ma192, s192, s192 C accum mid 3
+ add,dc ma256, climb, climb
+ sub r000, s000, s000 C accum rlimb 0
+ sub,db r064, s064, s064 C accum rlimb 1
+ sub,db r128, s128, s128 C accum rlimb 2
+ std s000, 0(rp)
+ sub,db r192, s192, s192 C accum rlimb 3
+ sub,db %r0, climb, climb
+ sub %r0, climb, climb
+ std s064, 8(rp)
+ ldd -0x78(%r30), p032a1
+ std s128, 16(rp)
+ ldd -0x70(%r30), p032a2
+ std s192, 24(rp)
+ ldd -0x38(%r30), p096b1
+ ldd -0x30(%r30), p096b2
+ ldd -0x58(%r30), p160c1
+ ldd -0x50(%r30), p160c2
+ ldd -0x18(%r30), p224d1
+ ldd -0x10(%r30), p224d2
+ ldo 32(rp), rp
+
+LDEF(end1)
+ add p032a1, p032a2, m032
+ ldd -0x80(%r30), p000a
+ add,dc p096b1, p096b2, m096
+ add,dc p160c1, p160c2, m160
+ ldd -0x68(%r30), p064a
+ add,dc p224d1, p224d2, m224
+ add,dc %r0, %r0, m288
+ ldd -0x40(%r30), p064b
+ depd,z m032, 31, 32, ma000
+ ldd -0x28(%r30), p128b
+ extrd,u m032, 31, 32, ma064
+ depd m096, 31, 32, ma064
+ ldd -0x60(%r30), p128c
+ extrd,u m096, 31, 32, ma128
+ depd m160, 31, 32, ma128
+ ldd -0x48(%r30), p192c
+ extrd,u m160, 31, 32, ma192
+ depd m224, 31, 32, ma192
+ ldd -0x20(%r30), p192d
+ extrd,u m224, 31, 32, ma256
+ depd m288, 31, 32, ma256
+ ldd -0x88(%r30), p256d
+ add climb, p000a, s000
+ add,dc p064a, p064b, s064
+ ldd 0(rp), r000
+ add,dc p128b, p128c, s128
+ add,dc p192c, p192d, s192
+ ldd 8(rp), r064
+ add,dc p256d, %r0, climb
+ ldd 16(rp), r128
+ add ma000, s000, s000 C accum mid 0
+ ldd 24(rp), r192
+ add,dc ma064, s064, s064 C accum mid 1
+ add,dc ma128, s128, s128 C accum mid 2
+ add,dc ma192, s192, s192 C accum mid 3
+ add,dc ma256, climb, climb
+ sub r000, s000, s000 C accum rlimb 0
+ sub,db r064, s064, s064 C accum rlimb 1
+ sub,db r128, s128, s128 C accum rlimb 2
+ std s000, 0(rp)
+ sub,db r192, s192, s192 C accum rlimb 3
+ sub,db %r0, climb, climb
+ sub %r0, climb, climb
+ std s064, 8(rp)
+ std s128, 16(rp)
+ std s192, 24(rp)
+
+ ldd -0xb0(%r30), %r13
+ ldd -0xb8(%r30), %r12
+ ldd -0xc0(%r30), %r11
+ ldd -0xc8(%r30), %r10
+ ldd -0xd0(%r30), %r9
+ ldd -0xd8(%r30), %r8
+ ldd -0xe0(%r30), %r7
+ ldd -0xe8(%r30), %r6
+LDEF(done)
+ifdef(`HAVE_ABI_2_0w',
+` copy climb, %r28
+',` extrd,u climb, 63, 32, %r29
+ extrd,u climb, 31, 32, %r28
+')
+ ldd -0xf0(%r30), %r5
+ ldd -0xf8(%r30), %r4
+ bve (%r2)
+ ldd,mb -0x100(%r30), %r3
+EPILOGUE(mpn_submul_1)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/udiv.asm b/vendor/gmp-6.3.0/mpn/pa64/udiv.asm
new file mode 100644
index 0000000..1380a85
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/udiv.asm
@@ -0,0 +1,125 @@
+dnl HP-PA 2.0 64-bit mpn_udiv_qrnnd_r.
+
+dnl Copyright 2001-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This runs at about 280 cycles on both PA8000 and PA8500, corresponding to a
+C bit more than 4 cycles/bit.
+
+C INPUT PARAMETERS
+define(`n1',`%r26')
+define(`n0',`%r25')
+define(`d',`%r24')
+define(`remptr',`%r23')
+
+define(`q',`%r28')
+define(`dn',`%r29')
+
+define(`old_divstep',
+ `add,dc n0,n0,n0
+ add,dc n1,n1,n1
+ sub,*<< n1,d,%r22
+ copy %r22,n1')
+
+define(`divstep',
+ `add n0,n0,n0
+ add,dc n1,n1,n1
+ sub n1,d,%r1
+ add,dc q,q,q
+ cmpclr,*<< n1,d,%r0
+ copy %r1,n1
+')
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_udiv_qrnnd_r)
+ifdef(`HAVE_ABI_2_0n',
+` depd %r25,31,32,%r26
+ depd %r23,31,32,%r24
+ copy %r24,%r25
+ ldd -56(%r30),%r24
+ ldw -60(%r30),%r23
+')
+ ldi 0,q
+ cmpib,*>= 0,d,L(large_divisor)
+ ldi 8,%r31 C setup loop counter
+
+ sub %r0,d,dn
+LDEF(Loop)
+ divstep divstep divstep divstep divstep divstep divstep divstep
+ addib,<> -1,%r31,L(Loop)
+ nop
+
+ifdef(`HAVE_ABI_2_0n',
+` copy %r28,%r29
+ extrd,u %r28,31,32,%r28
+')
+ bve (%r2)
+ std n1,0(remptr) C store remainder
+
+LDEF(large_divisor)
+ extrd,u n0,63,1,%r19 C save lsb of dividend
+ shrpd n1,n0,1,n0 C n0 = lo(n1n0 >> 1)
+ shrpd %r0,n1,1,n1 C n1 = hi(n1n0 >> 1)
+ extrd,u d,63,1,%r20 C save lsb of divisor
+ shrpd %r0,d,1,d C d = floor(orig_d / 2)
+ add,l %r20,d,d C d = ceil(orig_d / 2)
+
+ sub %r0,d,dn
+LDEF(Loop2)
+ divstep divstep divstep divstep divstep divstep divstep divstep
+ addib,<> -1,%r31,L(Loop2)
+ nop
+
+ cmpib,*= 0,%r20,L(even_divisor)
+ shladd n1,1,%r19,n1 C shift in omitted dividend lsb
+
+ add d,d,d C restore orig...
+ sub d,%r20,d C ...d value
+ sub %r0,d,dn C r21 = -d
+
+ add,*nuv n1,q,n1 C fix remainder for omitted divisor lsb
+ add,l n1,dn,n1 C adjust remainder if rem. fix carried
+ add,dc %r0,q,q C adjust quotient accordingly
+
+ sub,*<< n1,d,%r0 C remainder >= divisor?
+ add,l n1,dn,n1 C adjust remainder
+ add,dc %r0,q,q C adjust quotient
+
+LDEF(even_divisor)
+ifdef(`HAVE_ABI_2_0n',
+` copy %r28,%r29
+ extrd,u %r28,31,32,%r28
+')
+ bve (%r2)
+ std n1,0(remptr) C store remainder
+EPILOGUE(mpn_udiv_qrnnd_r)
diff --git a/vendor/gmp-6.3.0/mpn/pa64/umul.asm b/vendor/gmp-6.3.0/mpn/pa64/umul.asm
new file mode 100644
index 0000000..bd5a71f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/pa64/umul.asm
@@ -0,0 +1,97 @@
+dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl Optimizations:
+dnl * Avoid skip instructions
+dnl * Put carry-generating and carry-consuming insns consecutively
+dnl * Don't allocate any stack, "home" positions for parameters could be used.
+
+include(`../config.m4')
+
+define(`p0',`%r28')
+define(`p1',`%r29')
+define(`t32',`%r19')
+define(`t0',`%r20')
+define(`t1',`%r21')
+define(`x',`%r22')
+define(`m0',`%r23')
+define(`m1',`%r24')
+
+ifdef(`HAVE_ABI_2_0w',
+` .level 2.0w
+',` .level 2.0
+')
+PROLOGUE(mpn_umul_ppmm_r)
+ ldo 128(%r30),%r30
+ifdef(`HAVE_ABI_2_0w',
+` std %r26,-64(%r30)
+ std %r25,-56(%r30)
+ copy %r24,%r31
+',`
+ depd %r25,31,32,%r26
+ std %r26,-64(%r30)
+ depd %r23,31,32,%r24
+ std %r24,-56(%r30)
+ ldw -180(%r30),%r31
+')
+
+ fldd -64(%r30),%fr4
+ fldd -56(%r30),%fr5
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+
+ depdi,z 1,31,1,t32 C t32 = 2^32
+
+ ldd -128(%r30),p0 C lo = low 64 bit of product
+ ldd -120(%r30),m0 C m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 C m1 = mid1 64 bit of product
+ ldd -104(%r30),p1 C hi = high 64 bit of product
+
+ add,l,*nuv m0,m1,x C x = m1+m0
+ add,l t32,p1,p1 C propagate carry to mid of p1
+ depd,z x,31,32,t0 C lo32(m1+m0)
+ add t0,p0,p0
+ extrd,u x,31,32,t1 C hi32(m1+m0)
+ add,dc t1,p1,p1
+
+ std p0,0(%r31) C store low half of product
+ifdef(`HAVE_ABI_2_0w',
+` copy p1,%r28 C return val in %r28
+',` extrd,u p1,31,32,%r28 C return val in %r28,%r29
+')
+ bve (%r2)
+ ldo -128(%r30),%r30
+EPILOGUE(mpn_umul_ppmm_r)