github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/p7/aors_n.asm (about) 1 dnl PowerPC-64 mpn_add_n, mpn_sub_n optimised for POWER7. 2 3 dnl Copyright 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 ? 36 C POWER5 ? 37 C POWER6 ? 38 C POWER7 2.18 39 40 C This is a tad bit slower than the cnd_aors_n.asm code, which is of course an 41 C anomaly. 42 43 ifdef(`OPERATION_add_n',` 44 define(ADDSUBC, adde) 45 define(ADDSUB, addc) 46 define(func, mpn_add_n) 47 define(func_nc, mpn_add_nc) 48 define(GENRVAL, `addi r3, r3, 1') 49 define(SETCBR, `addic r0, $1, -1') 50 define(CLRCB, `addic r0, r0, 0') 51 ') 52 ifdef(`OPERATION_sub_n',` 53 define(ADDSUBC, subfe) 54 define(ADDSUB, subfc) 55 define(func, mpn_sub_n) 56 define(func_nc, mpn_sub_nc) 57 define(GENRVAL, `neg r3, r3') 58 define(SETCBR, `subfic r0, $1, 0') 59 define(CLRCB, `addic r0, r1, -1') 60 ') 61 62 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 63 64 C INPUT PARAMETERS 65 define(`rp', `r3') 66 define(`up', `r4') 67 define(`vp', `r5') 68 define(`n', `r6') 69 70 ASM_START() 71 PROLOGUE(func_nc) 72 SETCBR(r7) 73 b L(ent) 74 EPILOGUE() 75 76 PROLOGUE(func) 77 CLRCB 78 L(ent): 79 andi. r7, n, 1 80 beq L(bx0) 81 82 L(bx1): ld r7, 0(up) 83 ld r9, 0(vp) 84 ADDSUBC r11, r9, r7 85 std r11, 0(rp) 86 cmpldi cr6, n, 1 87 beq cr6, L(end) 88 addi up, up, 8 89 addi vp, vp, 8 90 addi rp, rp, 8 91 92 L(bx0): addi r0, n, 2 C compute branch... 93 srdi r0, r0, 2 C ...count 94 mtctr r0 95 96 andi. r7, n, 2 97 bne L(mid) 98 99 addi up, up, 16 100 addi vp, vp, 16 101 addi rp, rp, 16 102 103 ALIGN(32) 104 L(top): ld r6, -16(up) 105 ld r7, -8(up) 106 ld r8, -16(vp) 107 ld r9, -8(vp) 108 ADDSUBC r10, r8, r6 109 ADDSUBC r11, r9, r7 110 std r10, -16(rp) 111 std r11, -8(rp) 112 L(mid): ld r6, 0(up) 113 ld r7, 8(up) 114 ld r8, 0(vp) 115 ld r9, 8(vp) 116 ADDSUBC r10, r8, r6 117 ADDSUBC r11, r9, r7 118 std r10, 0(rp) 119 std r11, 8(rp) 120 addi up, up, 32 121 addi vp, vp, 32 122 addi rp, rp, 32 123 bdnz L(top) 124 125 L(end): subfe r3, r0, r0 C -cy 126 GENRVAL 127 blr 128 EPILOGUE()