github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/m88k/sub_n.s (about) 1 ; mc88100 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2 ; store difference in a third limb vector. 3 4 ; Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc. 5 6 ; This file is part of the GNU MP Library. 7 ; 8 ; The GNU MP Library is free software; you can redistribute it and/or modify 9 ; it under the terms of either: 10 ; 11 ; * the GNU Lesser General Public License as published by the Free 12 ; Software Foundation; either version 3 of the License, or (at your 13 ; option) any later version. 14 ; 15 ; or 16 ; 17 ; * the GNU General Public License as published by the Free Software 18 ; Foundation; either version 2 of the License, or (at your option) any 19 ; later version. 20 ; 21 ; or both in parallel, as here. 22 ; 23 ; The GNU MP Library is distributed in the hope that it will be useful, but 24 ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 ; for more details. 27 ; 28 ; You should have received copies of the GNU General Public License and the 29 ; GNU Lesser General Public License along with the GNU MP Library. If not, 30 ; see https://www.gnu.org/licenses/. 31 32 33 ; INPUT PARAMETERS 34 ; res_ptr r2 35 ; s1_ptr r3 36 ; s2_ptr r4 37 ; size r5 38 39 ; This code has been optimized to run one instruction per clock, avoiding 40 ; load stalls and writeback contention. As a result, the instruction 41 ; order is not always natural. 42 43 ; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, 44 ; but on the 88110, it seems to run much slower, 6.6 clocks/limb. 45 46 text 47 align 16 48 global ___gmpn_sub_n 49 ___gmpn_sub_n: 50 ld r6,r3,0 ; read first limb from s1_ptr 51 extu r10,r5,3 52 ld r7,r4,0 ; read first limb from s2_ptr 53 54 subu r5,r0,r5 55 mak r5,r5,3<4> 56 bcnd.n eq0,r5,Lzero 57 subu.co r0,r0,r0 ; initialize carry 58 59 or r12,r0,lo16(Lbase) 60 or.u r12,r12,hi16(Lbase) 61 addu r12,r12,r5 ; r12 is address for entering in loop 62 63 extu r5,r5,2 ; divide by 4 64 subu r2,r2,r5 ; adjust res_ptr 65 subu r3,r3,r5 ; adjust s1_ptr 66 subu r4,r4,r5 ; adjust s2_ptr 67 68 or r8,r6,r0 69 70 jmp.n r12 71 or r9,r7,r0 72 73 Loop: addu r3,r3,32 74 st r8,r2,28 75 addu r4,r4,32 76 ld r6,r3,0 77 addu r2,r2,32 78 ld r7,r4,0 79 Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt) 80 Lbase: ld r8,r3,4 81 subu.cio r6,r6,r7 82 ld r9,r4,4 83 st r6,r2,0 84 ld r6,r3,8 ; subtract 7 + 8r limbs 85 subu.cio r8,r8,r9 86 ld r7,r4,8 87 st r8,r2,4 88 ld r8,r3,12 ; subtract 6 + 8r limbs 89 subu.cio r6,r6,r7 90 ld r9,r4,12 91 st r6,r2,8 92 ld r6,r3,16 ; subtract 5 + 8r limbs 93 subu.cio r8,r8,r9 94 ld r7,r4,16 95 st r8,r2,12 96 ld r8,r3,20 ; subtract 4 + 8r limbs 97 subu.cio r6,r6,r7 98 ld r9,r4,20 99 st r6,r2,16 100 ld r6,r3,24 ; subtract 3 + 8r limbs 101 subu.cio r8,r8,r9 102 ld r7,r4,24 103 st r8,r2,20 104 ld r8,r3,28 ; subtract 2 + 8r limbs 105 subu.cio r6,r6,r7 106 ld r9,r4,28 107 st r6,r2,24 108 bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs 109 subu.cio r8,r8,r9 110 111 st r8,r2,28 ; store most significant limb 112 113 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 114 jmp.n r1 115 xor r2,r2,1