github.com/lzhfromustc/gofuzz@v0.0.0-20211116160056-151b3108bbd1/runtime/memclr_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "textflag.h" 8 9 // See memclrNoHeapPointers Go doc for important implementation constraints. 10 11 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 12 TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16 13 MOVD ptr+0(FP), R3 14 MOVD n+8(FP), R4 15 16 // Determine if there are doublewords to clear 17 check: 18 ANDCC $7, R4, R5 // R5: leftover bytes to clear 19 SRD $3, R4, R6 // R6: double words to clear 20 CMP R6, $0, CR1 // CR1[EQ] set if no double words 21 22 BC 12, 6, nozerolarge // only single bytes 23 CMP R4, $512 24 BLT under512 // special case for < 512 25 ANDCC $127, R3, R8 // check for 128 alignment of address 26 BEQ zero512setup 27 28 ANDCC $7, R3, R15 29 BEQ zero512xsetup // at least 8 byte aligned 30 31 // zero bytes up to 8 byte alignment 32 33 ANDCC $1, R3, R15 // check for byte alignment 34 BEQ byte2 35 MOVB R0, 0(R3) // zero 1 byte 36 ADD $1, R3 // bump ptr by 1 37 ADD $-1, R4 38 39 byte2: 40 ANDCC $2, R3, R15 // check for 2 byte alignment 41 BEQ byte4 42 MOVH R0, 0(R3) // zero 2 bytes 43 ADD $2, R3 // bump ptr by 2 44 ADD $-2, R4 45 46 byte4: 47 ANDCC $4, R3, R15 // check for 4 byte alignment 48 BEQ zero512xsetup 49 MOVW R0, 0(R3) // zero 4 bytes 50 ADD $4, R3 // bump ptr by 4 51 ADD $-4, R4 52 BR zero512xsetup // ptr should now be 8 byte aligned 53 54 under512: 55 MOVD R6, CTR // R6 = number of double words 56 SRDCC $2, R6, R7 // 32 byte chunks? 57 BNE zero32setup 58 59 // Clear double words 60 61 zero8: 62 MOVD R0, 0(R3) // double word 63 ADD $8, R3 64 ADD $-8, R4 65 BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0 66 BR nozerolarge // handle leftovers 67 68 // Prepare to clear 32 bytes at a time. 69 70 zero32setup: 71 DCBTST (R3) // prepare data cache 72 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 73 MOVD R7, CTR // number of 32 byte chunks 74 MOVD $16, R8 75 76 zero32: 77 STXVD2X VS32, (R3+R0) // store 16 bytes 78 STXVD2X VS32, (R3+R8) 79 ADD $32, R3 80 ADD $-32, R4 81 BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0 82 RLDCLCC $61, R4, $3, R6 // remaining doublewords 83 BEQ nozerolarge 84 MOVD R6, CTR // set up the CTR for doublewords 85 BR zero8 86 87 nozerolarge: 88 ANDCC $7, R4, R5 // any remaining bytes 89 BC 4, 1, LR // ble lr 90 91 zerotail: 92 MOVD R5, CTR // set up to clear tail bytes 93 94 zerotailloop: 95 MOVB R0, 0(R3) // clear single bytes 96 ADD $1, R3 97 BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0 98 RET 99 100 zero512xsetup: // 512 chunk with extra needed 101 ANDCC $8, R3, R11 // 8 byte alignment? 102 BEQ zero512setup16 103 MOVD R0, 0(R3) // clear 8 bytes 104 ADD $8, R3 // update ptr to next 8 105 ADD $-8, R4 // dec count by 8 106 107 zero512setup16: 108 ANDCC $127, R3, R14 // < 128 byte alignment 109 BEQ zero512setup // handle 128 byte alignment 110 MOVD $128, R15 111 SUB R14, R15, R14 // find increment to 128 alignment 112 SRD $4, R14, R15 // number of 16 byte chunks 113 114 zero512presetup: 115 MOVD R15, CTR // loop counter of 16 bytes 116 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 117 118 zero512preloop: // clear up to 128 alignment 119 STXVD2X VS32, (R3+R0) // clear 16 bytes 120 ADD $16, R3 // update ptr 121 ADD $-16, R4 // dec count 122 BC 16, 0, zero512preloop 123 124 zero512setup: // setup for dcbz loop 125 CMP R4, $512 // check if at least 512 126 BLT remain 127 SRD $9, R4, R8 // loop count for 512 chunks 128 MOVD R8, CTR // set up counter 129 MOVD $128, R9 // index regs for 128 bytes 130 MOVD $256, R10 131 MOVD $384, R11 132 133 zero512: 134 DCBZ (R3+R0) // clear first chunk 135 DCBZ (R3+R9) // clear second chunk 136 DCBZ (R3+R10) // clear third chunk 137 DCBZ (R3+R11) // clear fourth chunk 138 ADD $512, R3 139 ADD $-512, R4 140 BC 16, 0, zero512 141 142 remain: 143 CMP R4, $128 // check if 128 byte chunks left 144 BLT smaller 145 DCBZ (R3+R0) // clear 128 146 ADD $128, R3 147 ADD $-128, R4 148 BR remain 149 150 smaller: 151 ANDCC $127, R4, R7 // find leftovers 152 BEQ done 153 CMP R7, $64 // more than 64, do 32 at a time 154 BLT zero8setup // less than 64, do 8 at a time 155 SRD $5, R7, R7 // set up counter for 32 156 BR zero32setup 157 158 zero8setup: 159 SRDCC $3, R7, R7 // less than 8 bytes 160 BEQ nozerolarge 161 MOVD R7, CTR 162 BR zero8 163 164 done: 165 RET