github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/v6/popham.asm (about) 1 dnl ARM mpn_popcount and mpn_hamdist. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C popcount hamdist 36 C cycles/limb cycles/limb 37 C StrongARM - 38 C XScale - 39 C Cortex-A7 ? 40 C Cortex-A8 ? 41 C Cortex-A9 8.94 9.47 42 C Cortex-A15 5.67 6.44 43 44 C Architecture requirements: 45 C v5 - 46 C v5t - 47 C v5te ldrd strd 48 C v6 usada8 49 C v6t2 - 50 C v7a - 51 52 ifdef(`OPERATION_popcount',` 53 define(`func',`mpn_popcount') 54 define(`ap', `r0') 55 define(`n', `r1') 56 define(`a0', `r2') 57 define(`a1', `r3') 58 define(`s', `r5') 59 define(`b_01010101', `r6') 60 define(`b_00110011', `r7') 61 define(`b_00001111', `r8') 62 define(`zero', `r9') 63 define(`POPC', `$1') 64 define(`HAMD', `dnl') 65 ') 66 ifdef(`OPERATION_hamdist',` 67 define(`func',`mpn_hamdist') 68 define(`ap', `r0') 69 define(`bp', `r1') 70 define(`n', `r2') 71 define(`a0', `r6') 72 define(`a1', `r7') 73 define(`b0', `r4') 74 define(`b1', `r5') 75 define(`s', `r11') 76 define(`b_01010101', `r8') 77 define(`b_00110011', `r9') 78 define(`b_00001111', `r10') 79 define(`zero', `r3') 80 define(`POPC', `dnl') 81 define(`HAMD', `$1') 82 ') 83 84 MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) 85 86 ASM_START() 87 PROLOGUE(func) 88 POPC(` push { r4-r9 } ') 89 HAMD(` push { r4-r11 } ') 90 91 ldr b_01010101, =0x55555555 92 mov r12, #0 93 ldr b_00110011, =0x33333333 94 mov zero, #0 95 ldr b_00001111, =0x0f0f0f0f 96 97 tst n, #1 98 beq L(evn) 99 100 L(odd): ldr a1, [ap], #4 C 1 x 32 1-bit accumulators, 0-1 101 HAMD(` ldr b1, [bp], #4 ') C 1 x 32 1-bit accumulators, 0-1 102 HAMD(` eor a1, a1, b1 ') 103 and r4, b_01010101, a1, lsr #1 104 sub a1, a1, r4 105 and r4, a1, b_00110011 106 bic r5, a1, b_00110011 107 add r5, r4, r5, lsr #2 C 8 4-bit accumulators, 0-4 108 subs n, n, #1 109 b L(mid) 110 111 L(evn): mov s, #0 112 113 L(top): ldrd a0, a1, [ap], #8 C 2 x 32 1-bit accumulators, 0-1 114 HAMD(` ldrd b0, b1, [bp], #8') 115 HAMD(` eor a0, a0, b0 ') 116 HAMD(` eor a1, a1, b1 ') 117 subs n, n, #2 118 usada8 r12, s, zero, r12 119 and r4, b_01010101, a0, lsr #1 120 sub a0, a0, r4 121 and r4, b_01010101, a1, lsr #1 122 sub a1, a1, r4 123 and r4, a0, b_00110011 124 bic r5, a0, b_00110011 125 add a0, r4, r5, lsr #2 C 8 4-bit accumulators, 0-4 126 and r4, a1, b_00110011 127 bic r5, a1, b_00110011 128 add a1, r4, r5, lsr #2 C 8 4-bit accumulators, 0-4 129 add r5, a0, a1 C 8 4-bit accumulators, 0-8 130 L(mid): and r4, r5, b_00001111 131 bic r5, r5, b_00001111 132 add s, r4, r5, lsr #4 C 4 8-bit accumulators 133 bne L(top) 134 135 usada8 r0, s, zero, r12 136 POPC(` pop { r4-r9 } ') 137 HAMD(` pop { r4-r11 } ') 138 bx r14 139 EPILOGUE()