github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/m88k/add_n.s (about) 1 ; mc88100 mpn_add_n -- Add two limb vectors of the same length > 0 and store 2 ; sum in a third limb vector. 3 4 ; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc. 5 6 ; This file is part of the GNU MP Library. 7 ; 8 ; The GNU MP Library is free software; you can redistribute it and/or modify 9 ; it under the terms of either: 10 ; 11 ; * the GNU Lesser General Public License as published by the Free 12 ; Software Foundation; either version 3 of the License, or (at your 13 ; option) any later version. 14 ; 15 ; or 16 ; 17 ; * the GNU General Public License as published by the Free Software 18 ; Foundation; either version 2 of the License, or (at your option) any 19 ; later version. 20 ; 21 ; or both in parallel, as here. 22 ; 23 ; The GNU MP Library is distributed in the hope that it will be useful, but 24 ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 ; for more details. 27 ; 28 ; You should have received copies of the GNU General Public License and the 29 ; GNU Lesser General Public License along with the GNU MP Library. If not, 30 ; see https://www.gnu.org/licenses/. 31 32 33 ; INPUT PARAMETERS 34 ; res_ptr r2 35 ; s1_ptr r3 36 ; s2_ptr r4 37 ; size r5 38 39 ; This code has been optimized to run one instruction per clock, avoiding 40 ; load stalls and writeback contention. As a result, the instruction 41 ; order is not always natural. 42 43 ; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, 44 ; but on the 88110, it seems to run much slower, 6.6 clocks/limb. 45 46 text 47 align 16 48 global ___gmpn_add_n 49 ___gmpn_add_n: 50 ld r6,r3,0 ; read first limb from s1_ptr 51 extu r10,r5,3 52 ld r7,r4,0 ; read first limb from s2_ptr 53 54 subu.co r5,r0,r5 ; (clear carry as side effect) 55 mak r5,r5,3<4> 56 bcnd eq0,r5,Lzero 57 58 or r12,r0,lo16(Lbase) 59 or.u r12,r12,hi16(Lbase) 60 addu r12,r12,r5 ; r12 is address for entering in loop 61 62 extu r5,r5,2 ; divide by 4 63 subu r2,r2,r5 ; adjust res_ptr 64 subu r3,r3,r5 ; adjust s1_ptr 65 subu r4,r4,r5 ; adjust s2_ptr 66 67 or r8,r6,r0 68 69 jmp.n r12 70 or r9,r7,r0 71 72 Loop: addu r3,r3,32 73 st r8,r2,28 74 addu r4,r4,32 75 ld r6,r3,0 76 addu r2,r2,32 77 ld r7,r4,0 78 Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt) 79 Lbase: ld r8,r3,4 80 addu.cio r6,r6,r7 81 ld r9,r4,4 82 st r6,r2,0 83 ld r6,r3,8 ; add 7 + 8r limbs 84 addu.cio r8,r8,r9 85 ld r7,r4,8 86 st r8,r2,4 87 ld r8,r3,12 ; add 6 + 8r limbs 88 addu.cio r6,r6,r7 89 ld r9,r4,12 90 st r6,r2,8 91 ld r6,r3,16 ; add 5 + 8r limbs 92 addu.cio r8,r8,r9 93 ld r7,r4,16 94 st r8,r2,12 95 ld r8,r3,20 ; add 4 + 8r limbs 96 addu.cio r6,r6,r7 97 ld r9,r4,20 98 st r6,r2,16 99 ld r6,r3,24 ; add 3 + 8r limbs 100 addu.cio r8,r8,r9 101 ld r7,r4,24 102 st r8,r2,20 103 ld r8,r3,28 ; add 2 + 8r limbs 104 addu.cio r6,r6,r7 105 ld r9,r4,28 106 st r6,r2,24 107 bcnd.n ne0,r10,Loop ; add 1 + 8r limbs 108 addu.cio r8,r8,r9 109 110 st r8,r2,28 ; store most significant limb 111 112 jmp.n r1 113 addu.ci r2,r0,r0 ; return carry-out from most sign. limb