aboutsummaryrefslogtreecommitdiff
path: root/vendor/gmp-6.3.0/mpn/arm/v6t2
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-21 23:36:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-21 23:42:26 +0200
commita89a14ef5da44684a16b204e7a70460cc8c4922a (patch)
treeb23b4c6b155977909ef508fdae2f48d33d802813 /vendor/gmp-6.3.0/mpn/arm/v6t2
parent1db63fcedab0b288820d66e100b1877b1a5a8851 (diff)
Basic constant folding implementation
Diffstat (limited to 'vendor/gmp-6.3.0/mpn/arm/v6t2')
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm212
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm65
-rw-r--r--vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm113
3 files changed, 390 insertions, 0 deletions
diff --git a/vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm b/vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm
new file mode 100644
index 0000000..be24615
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v6t2/divrem_1.asm
@@ -0,0 +1,212 @@
+dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C norm unorm frac
+C StrongARM - - -
+C XScale - - -
+C Cortex-A7 ? ? ?
+C Cortex-A8 ? ? ?
+C Cortex-A9 13 14 13
+C Cortex-A15 11.4 11.8 11.1
+
+C TODO
+C * Optimise inner-loops better, they could likely run a cycle or two faster.
+C * Decrease register usage, streamline non-loop code.
+
+define(`qp_arg', `r0')
+define(`fn', `r1')
+define(`up_arg', `r2')
+define(`n_arg', `r3')
+define(`d_arg', `0')
+define(`dinv_arg',`4')
+define(`cnt_arg', `8')
+
+define(`n', `r9')
+define(`qp', `r5')
+define(`up', `r6')
+define(`cnt', `r7')
+define(`tnc', `r10')
+define(`dinv', `r0')
+define(`d', `r4')
+
+ASM_START()
+PROLOGUE(mpn_preinv_divrem_1)
+ stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+ ldr d, [sp, #9*4+d_arg]
+ ldr cnt, [sp, #9*4+cnt_arg]
+ str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn
+ sub n, r3, #1
+ add r3, r1, n
+ cmp d, #0
+ add qp, qp_arg, r3, lsl #2 C put qp at Q[] end
+ add up, up_arg, n, lsl #2 C put up at U[] end
+ ldr dinv, [sp, #9*4+dinv_arg]
+ blt L(nent)
+ b L(uent)
+EPILOGUE()
+
+PROLOGUE(mpn_divrem_1)
+ stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+ sub n, r3, #1
+ ldr d, [sp, #9*4+d_arg] C d
+ str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn
+ add r3, r1, n
+ cmp d, #0
+ add qp, qp_arg, r3, lsl #2 C put qp at Q[] end
+ add up, up_arg, n, lsl #2 C put up at U[] end
+ blt L(normalised)
+
+L(unnorm):
+ clz cnt, d
+ mov r0, d, lsl cnt C pass d << cnt
+ bl mpn_invert_limb
+L(uent):
+ mov d, d, lsl cnt C d <<= cnt
+ cmp n, #0
+ mov r1, #0 C r
+ blt L(frac)
+
+ ldr r11, [up, #0]
+
+ rsb tnc, cnt, #32
+ mov r1, r11, lsr tnc
+ mov r11, r11, lsl cnt
+ beq L(uend)
+
+ ldr r3, [up, #-4]!
+ orr r2, r11, r3, lsr tnc
+ b L(mid)
+
+L(utop):
+ mls r1, d, r8, r11
+ mov r11, r3, lsl cnt
+ ldr r3, [up, #-4]!
+ cmp r1, r2
+ addhi r1, r1, d
+ subhi r8, r8, #1
+ orr r2, r11, r3, lsr tnc
+ cmp r1, d
+ bcs L(ufx)
+L(uok): str r8, [qp], #-4
+L(mid): add r8, r1, #1
+ mov r11, r2
+ umlal r2, r8, r1, dinv
+ subs n, n, #1
+ bne L(utop)
+
+ mls r1, d, r8, r11
+ mov r11, r3, lsl cnt
+ cmp r1, r2
+ addhi r1, r1, d
+ subhi r8, r8, #1
+ cmp r1, d
+ rsbcs r1, d, r1
+ addcs r8, r8, #1
+ str r8, [qp], #-4
+
+L(uend):add r8, r1, #1
+ mov r2, r11
+ umlal r2, r8, r1, dinv
+ mls r1, d, r8, r11
+ cmp r1, r2
+ addhi r1, r1, d
+ subhi r8, r8, #1
+ cmp r1, d
+ rsbcs r1, d, r1
+ addcs r8, r8, #1
+ str r8, [qp], #-4
+L(frac):
+ ldr r2, [sp, #9*4+d_arg] C fn
+ cmp r2, #0
+ beq L(fend)
+
+L(ftop):mov r6, #0
+ add r3, r1, #1
+ umlal r6, r3, r1, dinv
+ mov r8, #0
+ mls r1, d, r3, r8
+ cmp r1, r6
+ addhi r1, r1, d
+ subhi r3, r3, #1
+ subs r2, r2, #1
+ str r3, [qp], #-4
+ bne L(ftop)
+
+L(fend):mov r11, r1, lsr cnt
+L(rtn): mov r0, r11
+ ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+
+L(normalised):
+ mov r0, d
+ bl mpn_invert_limb
+L(nent):
+ cmp n, #0
+ mov r11, #0 C r
+ blt L(nend)
+
+ ldr r11, [up, #0]
+ cmp r11, d
+ movlo r2, #0 C hi q limb
+ movhs r2, #1 C hi q limb
+ subhs r11, r11, d
+
+ str r2, [qp], #-4
+ cmp n, #0
+ beq L(nend)
+
+L(ntop):ldr r1, [up, #-4]!
+ add r12, r11, #1
+ umlal r1, r12, r11, dinv
+ ldr r3, [up, #0]
+ mls r11, d, r12, r3
+ cmp r11, r1
+ addhi r11, r11, d
+ subhi r12, r12, #1
+ cmp d, r11
+ bls L(nfx)
+L(nok): str r12, [qp], #-4
+ subs n, n, #1
+ bne L(ntop)
+
+L(nend):mov r1, r11 C r
+ mov cnt, #0 C shift cnt
+ b L(frac)
+
+L(nfx): add r12, r12, #1
+ rsb r11, d, r11
+ b L(nok)
+L(ufx): rsb r1, d, r1
+ add r8, r8, #1
+ b L(uok)
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm
new file mode 100644
index 0000000..8a38351
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_11.asm
@@ -0,0 +1,65 @@
+dnl ARM v6t2 mpn_gcd_11.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2019 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale -
+C Cortex-A5 5.2
+C Cortex-A7 5.04
+C Cortex-A8 3.59
+C Cortex-A9 9.5
+C Cortex-A15 3.2
+C Cortex-A17 5.25
+C Cortex-A53 3.57
+
+define(`u0', `r0')
+define(`v0', `r1')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_gcd_11)
+ subs r3, u0, v0 C 0
+ beq L(end) C
+
+ ALIGN(16)
+L(top): rbit r12, r3 C 1,5
+ clz r12, r12 C 2
+ rsbcc r3, r3, #0 C v = abs(u-v), even 1
+ movcs u0, v0 C u = min(u,v) 1
+ lsr v0, r3, r12 C 3
+ subs r3, u0, v0 C 4
+ bne L(top) C
+
+L(end): bx lr
+EPILOGUE()
diff --git a/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm
new file mode 100644
index 0000000..3b23808
--- /dev/null
+++ b/vendor/gmp-6.3.0/mpn/arm/v6t2/gcd_22.asm
@@ -0,0 +1,113 @@
+dnl ARM v6t2 mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale -
+C Cortex-A5 10.1
+C Cortex-A7 9.1
+C Cortex-A8 6.3
+C Cortex-A9 ?
+C Cortex-A12 7.7
+C Cortex-A15 5.7
+C Cortex-A17 ?
+C Cortex-A53 7.0
+
+
+define(`gp', `r0')
+
+define(`u1', `r1')
+define(`u0', `r2')
+define(`v1', `r3')
+define(`v0', `r4')
+
+define(`t0', `r5')
+define(`t1', `r6')
+define(`cnt', `r7')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+ push { r4-r7 }
+
+ ldr v0, [sp,#16] C
+
+L(top): subs t0, u0, v0 C 0 7
+ beq L(lowz)
+ sbcs t1, u1, v1 C 1 8
+
+ rbit cnt, t0 C 1
+
+ negcc t0, t0
+ mvncc t1, t1
+L(bck): movcc v0, u0
+ movcc v1, u1
+
+ clz cnt, cnt C 2
+ rsb r12, cnt, #32 C 3
+
+ lsr u0, t0, cnt C 3
+ lsl r12, t1, r12 C 4
+ lsr u1, t1, cnt C 3
+ orr u0, u0, r12 C 5
+
+ orrs r12, u1, v1
+ bne L(top)
+
+
+ str r12, [gp,#4] C high result limb <= 0
+
+ mov r6, gp
+ mov r0, u0 C pass 1st argument
+ mov r1, v0 C pass 2nd argument
+ mov r7, r14 C preserve link register
+ bl mpn_gcd_11
+ str r0, [r6,#0]
+ mov r14, r7
+ pop { r4-r7 }
+ bx r14
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ subs t0, u1, v1
+ beq L(end)
+ mov t1, #0
+ rbit cnt, t0 C 1
+ negcc t0, t0
+ b L(bck)
+
+L(end): str v0, [gp,#0]
+ str v1, [gp,#4]
+ pop { r4-r7 }
+ bx r14
+EPILOGUE()