aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/arm/v5
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-21 23:36:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-21 23:42:26 +0200
commita89a14ef5da44684a16b204e7a70460cc8c4922a (patch)
treeb23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/arm/v5
parent1db63fcedab0b288820d66e100b1877b1a5a8851 (diff)
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/arm/v5')
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm70
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm117
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm129
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm156
4 files changed, 472 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm
new file mode 100644
index 0000000..3c2b48f
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_11.asm
@@ -0,0 +1,70 @@
+dnl ARM v5 mpn_gcd_11.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn
+dnl Granlund.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale ?
+C Cortex-A5 6.45 obsolete
+C Cortex-A7 6.41 obsolete
+C Cortex-A8 5.0 obsolete
+C Cortex-A9 5.9 obsolete
+C Cortex-A15 4.40 obsolete
+C Cortex-A17 5.68 obsolete
+C Cortex-A53 4.37 obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0', `r0')
+define(`v0', `r1')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+ subs r3, u0, v0 C 0
+ beq L(end) C
+
+ ALIGN(16)
+L(top): sub r2, v0, u0 C 0,5
+ and r12, r2, r3 C 1
+ clz r12, r12 C 2
+ rsb r12, r12, #31 C 3
+ rsbcc r3, r3, #0 C v = abs(u-v), even 1
+ movcs u0, v0 C u = min(u,v) 1
+ lsr v0, r3, r12 C 4
+ subs r3, u0, v0 C 5
+ bne L(top) C
+
+L(end): bx lr
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm
new file mode 100644
index 0000000..0643b7c
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v5/gcd_22.asm
@@ -0,0 +1,117 @@
+dnl ARM v5 mpn_gcd_22.
+
+dnl Copyright 2019, 2022 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale -
+C ARM11 13
+C Cortex-A5 ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 ?
+C Cortex-A12 ?
+C Cortex-A15 ?
+C Cortex-A17 ?
+C Cortex-A53 ?
+
+
+define(`gp', `r0')
+
+define(`u1', `r1')
+define(`u0', `r2')
+define(`v1', `r3')
+define(`v0', `r4')
+
+define(`t0', `r5')
+define(`t1', `r6')
+define(`cnt', `r7')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+ push { r4-r7 }
+
+ ldr v0, [sp,#16] C
+
+L(top): subs t0, u0, v0 C 0 7
+ beq L(lowz)
+ sbcs t1, u1, v1 C 1 8
+
+ sub cnt, v0, u0
+ and cnt, cnt, t0
+
+ negcc t0, t0
+ mvncc t1, t1
+L(bck): movcc v0, u0
+ movcc v1, u1
+
+ clz r12, cnt C 2
+ rsb cnt, r12, #31 C 3
+ add r12, r12, #1
+
+ lsr u0, t0, cnt C 3
+ lsl r12, t1, r12 C 4
+ lsr u1, t1, cnt C 3
+ orr u0, u0, r12 C 5
+
+ orrs r12, u1, v1
+ bne L(top)
+
+
+ str r12, [gp,#4] C high result limb <= 0
+
+ mov r6, gp
+ mov r0, u0 C pass 1st argument
+ mov r1, v0 C pass 2nd argument
+ mov r7, r14 C preserve link register
+ bl mpn_gcd_11
+ str r0, [r6,#0]
+ mov r14, r7
+ pop { r4-r7 }
+ bx r14
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ subs t0, u1, v1
+ beq L(end)
+ mov t1, #0
+ sub cnt, v1, u1
+ and cnt, cnt, t0
+ negcc t0, t0
+ b L(bck)
+
+L(end): str v0, [gp,#0]
+ str v1, [gp,#4]
+ pop { r4-r7 }
+ bx r14
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm
new file mode 100644
index 0000000..3cf0cd7
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_1.asm
@@ -0,0 +1,129 @@
+dnl ARM mpn_mod_1_1p
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 7
+C Cortex-A15 6
+
+define(`ap', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+ push {r4-r10}
+ add r0, r0, r1, asl #2
+ ldr r5, [r0, #-4]!
+ ldr r12, [r0, #-4]!
+ subs r1, r1, #2
+ ble L(4)
+ ldr r8, [r3, #12]
+ mov r4, r12
+ mov r10, r5
+ umull r7, r5, r10, r8
+ sub r1, r1, #1
+ b L(mid)
+
+L(top): adds r12, r6, r7
+ adcs r10, r4, r5
+ sub r1, r1, #1
+ mov r6, #0
+ movcs r6, r8
+ umull r7, r5, r10, r8
+ adds r4, r12, r6
+ subcs r4, r4, r2
+L(mid): ldr r6, [r0, #-4]!
+ teq r1, #0
+ bne L(top)
+
+ adds r12, r6, r7
+ adcs r5, r4, r5
+ subcs r5, r5, r2
+L(4): ldr r1, [r3, #4]
+ cmp r1, #0
+ beq L(7)
+ ldr r4, [r3, #8]
+ umull r0, r6, r5, r4
+ adds r12, r0, r12
+ addcs r6, r6, #1
+ rsb r0, r1, #32
+ mov r0, r12, lsr r0
+ orr r5, r0, r6, asl r1
+ mov r12, r12, asl r1
+ b L(8)
+L(7): cmp r5, r2
+ subcs r5, r5, r2
+L(8): ldr r0, [r3, #0]
+ umull r4, r3, r5, r0
+ add r5, r5, #1
+ adds r0, r4, r12
+ adc r5, r3, r5
+ mul r5, r2, r5
+ sub r12, r12, r5
+ cmp r12, r0
+ addhi r12, r12, r2
+ cmp r2, r12
+ subls r12, r12, r2
+ mov r0, r12, lsr r1
+ pop {r4-r10}
+ bx r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+ stmfd sp!, {r4, r5, r6, r14}
+ mov r5, r0
+ clz r4, r1
+ mov r0, r1, asl r4
+ rsb r6, r0, #0
+ bl mpn_invert_limb
+ str r0, [r5, #0]
+ str r4, [r5, #4]
+ cmp r4, #0
+ beq L(2)
+ rsb r1, r4, #32
+ mov r3, #1
+ mov r3, r3, asl r4
+ orr r3, r3, r0, lsr r1
+ mul r3, r6, r3
+ mov r4, r3, lsr r4
+ str r4, [r5, #8]
+L(2): mul r0, r6, r0
+ str r0, [r5, #12]
+ ldmfd sp!, {r4, r5, r6, pc}
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm
new file mode 100644
index 0000000..aa26ecb
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v5/mod_1_2.asm
@@ -0,0 +1,156 @@
+dnl ARM mpn_mod_1s_2p
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 4.25
+C Cortex-A15 3
+
+define(`ap', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_2p)
+ push {r4-r10}
+ tst n, #1
+ add r7, r3, #8
+ ldmia r7, {r7, r8, r12} C load B1, B2, B3
+ add ap, ap, n, lsl #2 C put ap at operand end
+ beq L(evn)
+
+L(odd): subs n, n, #1
+ beq L(1)
+ ldmdb ap!, {r4,r6,r9}
+ mov r10, #0
+ umlal r4, r10, r6, r7
+ umlal r4, r10, r9, r8
+ b L(com)
+
+L(evn): ldmdb ap!, {r4,r10}
+L(com): subs n, n, #2
+ ble L(end)
+ ldmdb ap!, {r5,r6}
+ b L(mid)
+
+L(top): mov r9, #0
+ umlal r5, r9, r6, r7 C B1
+ umlal r5, r9, r4, r8 C B2
+ ldmdb ap!, {r4,r6}
+ umlal r5, r9, r10, r12 C B3
+ ble L(xit)
+ mov r10, #0
+ umlal r4, r10, r6, r7 C B1
+ umlal r4, r10, r5, r8 C B2
+ ldmdb ap!, {r5,r6}
+ umlal r4, r10, r9, r12 C B3
+L(mid): subs n, n, #4
+ bge L(top)
+
+ mov r9, #0
+ umlal r5, r9, r6, r7 C B1
+ umlal r5, r9, r4, r8 C B2
+ umlal r5, r9, r10, r12 C B3
+ mov r4, r5
+
+L(end): movge r9, r10 C executed iff coming via xit
+ ldr r6, [r3, #4] C cps[1] = cnt
+ mov r5, #0
+ umlal r4, r5, r9, r7
+ mov r7, r5, lsl r6
+L(x): rsb r1, r6, #32
+ orr r8, r7, r4, lsr r1
+ mov r9, r4, lsl r6
+ ldr r5, [r3, #0]
+ add r0, r8, #1
+ umull r12, r1, r8, r5
+ adds r4, r12, r9
+ adc r1, r1, r0
+ mul r5, r2, r1
+ sub r9, r9, r5
+ cmp r9, r4
+ addhi r9, r9, r2
+ cmp r2, r9
+ subls r9, r9, r2
+ mov r0, r9, lsr r6
+ pop {r4-r10}
+ bx r14
+
+L(xit): mov r10, #0
+ umlal r4, r10, r6, r7 C B1
+ umlal r4, r10, r5, r8 C B2
+ umlal r4, r10, r9, r12 C B3
+ b L(end)
+
+L(1): ldr r6, [r3, #4] C cps[1] = cnt
+ ldr r4, [ap, #-4] C ap[0]
+ mov r7, #0
+ b L(x)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_2p_cps)
+ push {r4-r8, r14}
+ clz r4, r1
+ mov r5, r1, lsl r4 C b <<= cnt
+ mov r6, r0 C r6 = cps
+ mov r0, r5
+ bl mpn_invert_limb
+ rsb r3, r4, #32
+ mov r3, r0, lsr r3
+ mov r2, #1
+ orr r3, r3, r2, lsl r4
+ rsb r1, r5, #0
+ mul r2, r1, r3
+ umull r3, r12, r2, r0
+ add r12, r2, r12
+ mvn r12, r12
+ mul r1, r5, r12
+ cmp r1, r3
+ addhi r1, r1, r5
+ umull r12, r7, r1, r0
+ add r7, r1, r7
+ mvn r7, r7
+ mul r3, r5, r7
+ cmp r3, r12
+ addhi r3, r3, r5
+ mov r5, r2, lsr r4
+ mov r7, r1, lsr r4
+ mov r8, r3, lsr r4
+ stmia r6, {r0,r4,r5,r7,r8} C fill cps
+ pop {r4-r8, pc}
+EPILOGUE()