github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/memclr_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "textflag.h" 8 9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 10 TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16 11 MOVD ptr+0(FP), R3 12 MOVD n+8(FP), R4 13 14 // Determine if there are doublewords to clear 15 check: 16 ANDCC $7, R4, R5 // R5: leftover bytes to clear 17 SRD $3, R4, R6 // R6: double words to clear 18 CMP R6, $0, CR1 // CR1[EQ] set if no double words 19 20 BC 12, 6, nozerolarge // only single bytes 21 CMP R4, $512 22 BLT under512 // special case for < 512 23 ANDCC $127, R3, R8 // check for 128 alignment of address 24 BEQ zero512setup 25 26 ANDCC $7, R3, R15 27 BEQ zero512xsetup // at least 8 byte aligned 28 29 // zero bytes up to 8 byte alignment 30 31 ANDCC $1, R3, R15 // check for byte alignment 32 BEQ byte2 33 MOVB R0, 0(R3) // zero 1 byte 34 ADD $1, R3 // bump ptr by 1 35 ADD $-1, R4 36 37 byte2: 38 ANDCC $2, R3, R15 // check for 2 byte alignment 39 BEQ byte4 40 MOVH R0, 0(R3) // zero 2 bytes 41 ADD $2, R3 // bump ptr by 2 42 ADD $-2, R4 43 44 byte4: 45 ANDCC $4, R3, R15 // check for 4 byte alignment 46 BEQ zero512xsetup 47 MOVW R0, 0(R3) // zero 4 bytes 48 ADD $4, R3 // bump ptr by 4 49 ADD $-4, R4 50 BR zero512xsetup // ptr should now be 8 byte aligned 51 52 under512: 53 MOVD R6, CTR // R6 = number of double words 54 SRDCC $2, R6, R7 // 32 byte chunks? 55 BNE zero32setup 56 57 // Clear double words 58 59 zero8: 60 MOVD R0, 0(R3) // double word 61 ADD $8, R3 62 ADD $-8, R4 63 BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0 64 BR nozerolarge // handle leftovers 65 66 // Prepare to clear 32 bytes at a time. 67 68 zero32setup: 69 DCBTST (R3) // prepare data cache 70 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 71 MOVD R7, CTR // number of 32 byte chunks 72 MOVD $16, R8 73 74 zero32: 75 STXVD2X VS32, (R3+R0) // store 16 bytes 76 STXVD2X VS32, (R3+R8) 77 ADD $32, R3 78 ADD $-32, R4 79 BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0 80 RLDCLCC $61, R4, $3, R6 // remaining doublewords 81 BEQ nozerolarge 82 MOVD R6, CTR // set up the CTR for doublewords 83 BR zero8 84 85 nozerolarge: 86 ANDCC $7, R4, R5 // any remaining bytes 87 BC 4, 1, LR // ble lr 88 89 zerotail: 90 MOVD R5, CTR // set up to clear tail bytes 91 92 zerotailloop: 93 MOVB R0, 0(R3) // clear single bytes 94 ADD $1, R3 95 BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0 96 RET 97 98 zero512xsetup: // 512 chunk with extra needed 99 ANDCC $8, R3, R11 // 8 byte alignment? 100 BEQ zero512setup16 101 MOVD R0, 0(R3) // clear 8 bytes 102 ADD $8, R3 // update ptr to next 8 103 ADD $-8, R4 // dec count by 8 104 105 zero512setup16: 106 ANDCC $127, R3, R14 // < 128 byte alignment 107 BEQ zero512setup // handle 128 byte alignment 108 MOVD $128, R15 109 SUB R14, R15, R14 // find increment to 128 alignment 110 SRD $4, R14, R15 // number of 16 byte chunks 111 112 zero512presetup: 113 MOVD R15, CTR // loop counter of 16 bytes 114 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 115 116 zero512preloop: // clear up to 128 alignment 117 STXVD2X VS32, (R3+R0) // clear 16 bytes 118 ADD $16, R3 // update ptr 119 ADD $-16, R4 // dec count 120 BC 16, 0, zero512preloop 121 122 zero512setup: // setup for dcbz loop 123 CMP R4, $512 // check if at least 512 124 BLT remain 125 SRD $9, R4, R8 // loop count for 512 chunks 126 MOVD R8, CTR // set up counter 127 MOVD $128, R9 // index regs for 128 bytes 128 MOVD $256, R10 129 MOVD $384, R11 130 131 zero512: 132 DCBZ (R3+R0) // clear first chunk 133 DCBZ (R3+R9) // clear second chunk 134 DCBZ (R3+R10) // clear third chunk 135 DCBZ (R3+R11) // clear fourth chunk 136 ADD $512, R3 137 ADD $-512, R4 138 BC 16, 0, zero512 139 140 remain: 141 CMP R4, $128 // check if 128 byte chunks left 142 BLT smaller 143 DCBZ (R3+R0) // clear 128 144 ADD $128, R3 145 ADD $-128, R4 146 BR remain 147 148 smaller: 149 ANDCC $127, R4, R7 // find leftovers 150 BEQ done 151 CMP R7, $64 // more than 64, do 32 at a time 152 BLT zero8setup // less than 64, do 8 at a time 153 SRD $5, R7, R7 // set up counter for 32 154 BR zero32setup 155 156 zero8setup: 157 SRDCC $3, R7, R7 // less than 8 bytes 158 BEQ nozerolarge 159 MOVD R7, CTR 160 BR zero8 161 162 done: 163 RET