github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/memclr_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "textflag.h" 8 9 // See memclrNoHeapPointers Go doc for important implementation constraints. 10 11 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 12 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16 13 // R3 = ptr 14 // R4 = n 15 16 // Determine if there are doublewords to clear 17 check: 18 ANDCC $7, R4, R5 // R5: leftover bytes to clear 19 SRD $3, R4, R6 // R6: double words to clear 20 CMP R6, $0, CR1 // CR1[EQ] set if no double words 21 22 BC 12, 6, nozerolarge // only single bytes 23 CMP R4, $512 24 BLT under512 // special case for < 512 25 ANDCC $127, R3, R8 // check for 128 alignment of address 26 BEQ zero512setup 27 28 ANDCC $7, R3, R15 29 BEQ zero512xsetup // at least 8 byte aligned 30 31 // zero bytes up to 8 byte alignment 32 33 ANDCC $1, R3, R15 // check for byte alignment 34 BEQ byte2 35 MOVB R0, 0(R3) // zero 1 byte 36 ADD $1, R3 // bump ptr by 1 37 ADD $-1, R4 38 39 byte2: 40 ANDCC $2, R3, R15 // check for 2 byte alignment 41 BEQ byte4 42 MOVH R0, 0(R3) // zero 2 bytes 43 ADD $2, R3 // bump ptr by 2 44 ADD $-2, R4 45 46 byte4: 47 ANDCC $4, R3, R15 // check for 4 byte alignment 48 BEQ zero512xsetup 49 MOVW R0, 0(R3) // zero 4 bytes 50 ADD $4, R3 // bump ptr by 4 51 ADD $-4, R4 52 BR zero512xsetup // ptr should now be 8 byte aligned 53 54 under512: 55 SRDCC $3, R6, R7 // 64 byte chunks? 56 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 57 BEQ lt64gt8 58 59 // Prepare to clear 64 bytes at a time. 60 61 zero64setup: 62 DCBTST (R3) // prepare data cache 63 MOVD R7, CTR // number of 64 byte chunks 64 MOVD $16, R8 65 MOVD $32, R16 66 MOVD $48, R17 67 68 zero64: 69 STXVD2X VS32, (R3+R0) // store 16 bytes 70 STXVD2X VS32, (R3+R8) 71 STXVD2X VS32, (R3+R16) 72 STXVD2X VS32, (R3+R17) 73 ADD $64, R3 74 ADD $-64, R4 75 BDNZ zero64 // dec ctr, br zero64 if ctr not 0 76 SRDCC $3, R4, R6 // remaining doublewords 77 BEQ nozerolarge 78 79 lt64gt8: 80 CMP R4, $32 81 BLT lt32gt8 82 MOVD $16, R8 83 STXVD2X VS32, (R3+R0) 84 STXVD2X VS32, (R3+R8) 85 ADD $-32, R4 86 ADD $32, R3 87 lt32gt8: 88 CMP R4, $16 89 BLT lt16gt8 90 STXVD2X VS32, (R3+R0) 91 ADD $16, R3 92 ADD $-16, R4 93 lt16gt8: 94 #ifdef GOPPC64_power10 95 SLD $56, R4, R7 96 STXVL V0, R3, R7 97 RET 98 #else 99 CMP R4, $8 100 BLT nozerolarge 101 MOVD R0, 0(R3) 102 ADD $8, R3 103 ADD $-8, R4 104 #endif 105 nozerolarge: 106 ANDCC $7, R4, R5 // any remaining bytes 107 BC 4, 1, LR // ble lr 108 #ifdef GOPPC64_power10 109 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 110 SLD $56, R5, R7 111 STXVL V0, R3, R7 112 RET 113 #else 114 CMP R5, $4 115 BLT next2 116 MOVW R0, 0(R3) 117 ADD $4, R3 118 ADD $-4, R5 119 next2: 120 CMP R5, $2 121 BLT next1 122 MOVH R0, 0(R3) 123 ADD $2, R3 124 ADD $-2, R5 125 next1: 126 CMP R5, $0 127 BC 12, 2, LR // beqlr 128 MOVB R0, 0(R3) 129 RET 130 #endif 131 132 zero512xsetup: // 512 chunk with extra needed 133 ANDCC $8, R3, R11 // 8 byte alignment? 134 BEQ zero512setup16 135 MOVD R0, 0(R3) // clear 8 bytes 136 ADD $8, R3 // update ptr to next 8 137 ADD $-8, R4 // dec count by 8 138 139 zero512setup16: 140 ANDCC $127, R3, R14 // < 128 byte alignment 141 BEQ zero512setup // handle 128 byte alignment 142 MOVD $128, R15 143 SUB R14, R15, R14 // find increment to 128 alignment 144 SRD $4, R14, R15 // number of 16 byte chunks 145 MOVD R15, CTR // loop counter of 16 bytes 146 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 147 148 zero512preloop: // clear up to 128 alignment 149 STXVD2X VS32, (R3+R0) // clear 16 bytes 150 ADD $16, R3 // update ptr 151 ADD $-16, R4 // dec count 152 BDNZ zero512preloop 153 154 zero512setup: // setup for dcbz loop 155 CMP R4, $512 // check if at least 512 156 BLT remain 157 SRD $9, R4, R8 // loop count for 512 chunks 158 MOVD R8, CTR // set up counter 159 MOVD $128, R9 // index regs for 128 bytes 160 MOVD $256, R10 161 MOVD $384, R11 162 PCALIGN $16 163 zero512: 164 DCBZ (R3+R0) // clear first chunk 165 DCBZ (R3+R9) // clear second chunk 166 DCBZ (R3+R10) // clear third chunk 167 DCBZ (R3+R11) // clear fourth chunk 168 ADD $512, R3 169 BDNZ zero512 170 ANDCC $511, R4 171 172 remain: 173 CMP R4, $128 // check if 128 byte chunks left 174 BLT smaller 175 DCBZ (R3+R0) // clear 128 176 ADD $128, R3 177 ADD $-128, R4 178 BR remain 179 180 smaller: 181 ANDCC $127, R4, R7 // find leftovers 182 BEQ done 183 CMP R7, $64 // more than 64, do 64 at a time 184 XXLXOR VS32, VS32, VS32 185 BLT lt64gt8 // less than 64 186 SRD $6, R7, R7 // set up counter for 64 187 BR zero64setup 188 189 done: 190 RET