github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/memclr_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "textflag.h" 8 9 // See memclrNoHeapPointers Go doc for important implementation constraints. 10 11 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 12 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16 13 // R3 = ptr 14 // R4 = n 15 16 // Determine if there are doublewords to clear 17 check: 18 ANDCC $7, R4, R5 // R5: leftover bytes to clear 19 SRD $3, R4, R6 // R6: double words to clear 20 CMP R6, $0, CR1 // CR1[EQ] set if no double words 21 22 BC 12, 6, nozerolarge // only single bytes 23 CMP R4, $512 24 BLT under512 // special case for < 512 25 ANDCC $127, R3, R8 // check for 128 alignment of address 26 BEQ zero512setup 27 28 ANDCC $7, R3, R15 29 BEQ zero512xsetup // at least 8 byte aligned 30 31 // zero bytes up to 8 byte alignment 32 33 ANDCC $1, R3, R15 // check for byte alignment 34 BEQ byte2 35 MOVB R0, 0(R3) // zero 1 byte 36 ADD $1, R3 // bump ptr by 1 37 ADD $-1, R4 38 39 byte2: 40 ANDCC $2, R3, R15 // check for 2 byte alignment 41 BEQ byte4 42 MOVH R0, 0(R3) // zero 2 bytes 43 ADD $2, R3 // bump ptr by 2 44 ADD $-2, R4 45 46 byte4: 47 ANDCC $4, R3, R15 // check for 4 byte alignment 48 BEQ zero512xsetup 49 MOVW R0, 0(R3) // zero 4 bytes 50 ADD $4, R3 // bump ptr by 4 51 ADD $-4, R4 52 BR zero512xsetup // ptr should now be 8 byte aligned 53 54 under512: 55 SRDCC $3, R6, R7 // 64 byte chunks? 56 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 57 BEQ lt64gt8 58 59 // Prepare to clear 64 bytes at a time. 60 61 zero64setup: 62 DCBTST (R3) // prepare data cache 63 MOVD R7, CTR // number of 64 byte chunks 64 MOVD $16, R8 65 MOVD $32, R16 66 MOVD $48, R17 67 68 zero64: 69 STXVD2X VS32, (R3+R0) // store 16 bytes 70 STXVD2X VS32, (R3+R8) 71 STXVD2X VS32, (R3+R16) 72 STXVD2X VS32, (R3+R17) 73 ADD $64, R3 74 ADD $-64, R4 75 BDNZ zero64 // dec ctr, br zero64 if ctr not 0 76 SRDCC $3, R4, R6 // remaining doublewords 77 BEQ nozerolarge 78 79 lt64gt8: 80 CMP R4, $32 81 BLT lt32gt8 82 MOVD $16, R8 83 STXVD2X VS32, (R3+R0) 84 STXVD2X VS32, (R3+R8) 85 ADD $-32, R4 86 ADD $32, R3 87 lt32gt8: 88 CMP R4, $16 89 BLT lt16gt8 90 STXVD2X VS32, (R3+R0) 91 ADD $16, R3 92 ADD $-16, R4 93 lt16gt8: 94 CMP R4, $8 95 BLT nozerolarge 96 MOVD R0, 0(R3) 97 ADD $8, R3 98 ADD $-8, R4 99 100 nozerolarge: 101 ANDCC $7, R4, R5 // any remaining bytes 102 BC 4, 1, LR // ble lr 103 104 zerotail: 105 MOVD R5, CTR // set up to clear tail bytes 106 107 zerotailloop: 108 MOVB R0, 0(R3) // clear single bytes 109 ADD $1, R3 110 BDNZ zerotailloop // dec ctr, br zerotailloop if ctr not 0 111 RET 112 113 zero512xsetup: // 512 chunk with extra needed 114 ANDCC $8, R3, R11 // 8 byte alignment? 115 BEQ zero512setup16 116 MOVD R0, 0(R3) // clear 8 bytes 117 ADD $8, R3 // update ptr to next 8 118 ADD $-8, R4 // dec count by 8 119 120 zero512setup16: 121 ANDCC $127, R3, R14 // < 128 byte alignment 122 BEQ zero512setup // handle 128 byte alignment 123 MOVD $128, R15 124 SUB R14, R15, R14 // find increment to 128 alignment 125 SRD $4, R14, R15 // number of 16 byte chunks 126 127 zero512presetup: 128 MOVD R15, CTR // loop counter of 16 bytes 129 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 130 131 zero512preloop: // clear up to 128 alignment 132 STXVD2X VS32, (R3+R0) // clear 16 bytes 133 ADD $16, R3 // update ptr 134 ADD $-16, R4 // dec count 135 BDNZ zero512preloop 136 137 zero512setup: // setup for dcbz loop 138 CMP R4, $512 // check if at least 512 139 BLT remain 140 SRD $9, R4, R8 // loop count for 512 chunks 141 MOVD R8, CTR // set up counter 142 MOVD $128, R9 // index regs for 128 bytes 143 MOVD $256, R10 144 MOVD $384, R11 145 PCALIGN $32 146 147 zero512: 148 DCBZ (R3+R0) // clear first chunk 149 DCBZ (R3+R9) // clear second chunk 150 DCBZ (R3+R10) // clear third chunk 151 DCBZ (R3+R11) // clear fourth chunk 152 ADD $512, R3 153 BDNZ zero512 154 ANDCC $511, R4 155 156 remain: 157 CMP R4, $128 // check if 128 byte chunks left 158 BLT smaller 159 DCBZ (R3+R0) // clear 128 160 ADD $128, R3 161 ADD $-128, R4 162 BR remain 163 164 smaller: 165 ANDCC $127, R4, R7 // find leftovers 166 BEQ done 167 CMP R7, $64 // more than 64, do 64 at a time 168 XXLXOR VS32, VS32, VS32 169 BLT lt64gt8 // less than 64 170 SRD $6, R7, R7 // set up counter for 64 171 BR zero64setup 172 173 done: 174 RET