github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/memclr_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 // +build ppc64 ppc64le 7 8 #include "textflag.h" 9 10 // See memclrNoHeapPointers Go doc for important implementation constraints. 11 12 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 13 TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16 14 MOVD ptr+0(FP), R3 15 MOVD n+8(FP), R4 16 17 // Determine if there are doublewords to clear 18 check: 19 ANDCC $7, R4, R5 // R5: leftover bytes to clear 20 SRD $3, R4, R6 // R6: double words to clear 21 CMP R6, $0, CR1 // CR1[EQ] set if no double words 22 23 BC 12, 6, nozerolarge // only single bytes 24 CMP R4, $512 25 BLT under512 // special case for < 512 26 ANDCC $127, R3, R8 // check for 128 alignment of address 27 BEQ zero512setup 28 29 ANDCC $7, R3, R15 30 BEQ zero512xsetup // at least 8 byte aligned 31 32 // zero bytes up to 8 byte alignment 33 34 ANDCC $1, R3, R15 // check for byte alignment 35 BEQ byte2 36 MOVB R0, 0(R3) // zero 1 byte 37 ADD $1, R3 // bump ptr by 1 38 ADD $-1, R4 39 40 byte2: 41 ANDCC $2, R3, R15 // check for 2 byte alignment 42 BEQ byte4 43 MOVH R0, 0(R3) // zero 2 bytes 44 ADD $2, R3 // bump ptr by 2 45 ADD $-2, R4 46 47 byte4: 48 ANDCC $4, R3, R15 // check for 4 byte alignment 49 BEQ zero512xsetup 50 MOVW R0, 0(R3) // zero 4 bytes 51 ADD $4, R3 // bump ptr by 4 52 ADD $-4, R4 53 BR zero512xsetup // ptr should now be 8 byte aligned 54 55 under512: 56 MOVD R6, CTR // R6 = number of double words 57 SRDCC $2, R6, R7 // 32 byte chunks? 58 BNE zero32setup 59 60 // Clear double words 61 62 zero8: 63 MOVD R0, 0(R3) // double word 64 ADD $8, R3 65 ADD $-8, R4 66 BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0 67 BR nozerolarge // handle leftovers 68 69 // Prepare to clear 32 bytes at a time. 70 71 zero32setup: 72 DCBTST (R3) // prepare data cache 73 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 74 MOVD R7, CTR // number of 32 byte chunks 75 MOVD $16, R8 76 77 zero32: 78 STXVD2X VS32, (R3+R0) // store 16 bytes 79 STXVD2X VS32, (R3+R8) 80 ADD $32, R3 81 ADD $-32, R4 82 BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0 83 RLDCLCC $61, R4, $3, R6 // remaining doublewords 84 BEQ nozerolarge 85 MOVD R6, CTR // set up the CTR for doublewords 86 BR zero8 87 88 nozerolarge: 89 ANDCC $7, R4, R5 // any remaining bytes 90 BC 4, 1, LR // ble lr 91 92 zerotail: 93 MOVD R5, CTR // set up to clear tail bytes 94 95 zerotailloop: 96 MOVB R0, 0(R3) // clear single bytes 97 ADD $1, R3 98 BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0 99 RET 100 101 zero512xsetup: // 512 chunk with extra needed 102 ANDCC $8, R3, R11 // 8 byte alignment? 103 BEQ zero512setup16 104 MOVD R0, 0(R3) // clear 8 bytes 105 ADD $8, R3 // update ptr to next 8 106 ADD $-8, R4 // dec count by 8 107 108 zero512setup16: 109 ANDCC $127, R3, R14 // < 128 byte alignment 110 BEQ zero512setup // handle 128 byte alignment 111 MOVD $128, R15 112 SUB R14, R15, R14 // find increment to 128 alignment 113 SRD $4, R14, R15 // number of 16 byte chunks 114 115 zero512presetup: 116 MOVD R15, CTR // loop counter of 16 bytes 117 XXLXOR VS32, VS32, VS32 // clear VS32 (V0) 118 119 zero512preloop: // clear up to 128 alignment 120 STXVD2X VS32, (R3+R0) // clear 16 bytes 121 ADD $16, R3 // update ptr 122 ADD $-16, R4 // dec count 123 BC 16, 0, zero512preloop 124 125 zero512setup: // setup for dcbz loop 126 CMP R4, $512 // check if at least 512 127 BLT remain 128 SRD $9, R4, R8 // loop count for 512 chunks 129 MOVD R8, CTR // set up counter 130 MOVD $128, R9 // index regs for 128 bytes 131 MOVD $256, R10 132 MOVD $384, R11 133 134 zero512: 135 DCBZ (R3+R0) // clear first chunk 136 DCBZ (R3+R9) // clear second chunk 137 DCBZ (R3+R10) // clear third chunk 138 DCBZ (R3+R11) // clear fourth chunk 139 ADD $512, R3 140 ADD $-512, R4 141 BC 16, 0, zero512 142 143 remain: 144 CMP R4, $128 // check if 128 byte chunks left 145 BLT smaller 146 DCBZ (R3+R0) // clear 128 147 ADD $128, R3 148 ADD $-128, R4 149 BR remain 150 151 smaller: 152 ANDCC $127, R4, R7 // find leftovers 153 BEQ done 154 CMP R7, $64 // more than 64, do 32 at a time 155 BLT zero8setup // less than 64, do 8 at a time 156 SRD $5, R7, R7 // set up counter for 32 157 BR zero32setup 158 159 zero8setup: 160 SRDCC $3, R7, R7 // less than 8 bytes 161 BEQ nozerolarge 162 MOVD R7, CTR 163 BR zero8 164 165 done: 166 RET