github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/internal/bytealg/compare_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 // +build ppc64 ppc64le 7 8 #include "go_asm.h" 9 #include "textflag.h" 10 11 TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 12 MOVD a_base+0(FP), R5 13 MOVD b_base+24(FP), R6 14 MOVD a_len+8(FP), R3 15 CMP R5,R6,CR7 16 MOVD b_len+32(FP), R4 17 MOVD $ret+48(FP), R7 18 CMP R3,R4,CR6 19 BEQ CR7,equal 20 21 #ifdef GOARCH_ppc64le 22 BR cmpbodyLE<>(SB) 23 #else 24 BR cmpbodyBE<>(SB) 25 #endif 26 27 equal: 28 BEQ CR6,done 29 MOVD $1, R8 30 BGT CR6,greater 31 NEG R8 32 33 greater: 34 MOVD R8, (R7) 35 RET 36 37 done: 38 MOVD $0, (R7) 39 RET 40 41 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 42 MOVD a_base+0(FP), R5 43 MOVD b_base+16(FP), R6 44 MOVD a_len+8(FP), R3 45 CMP R5,R6,CR7 46 MOVD b_len+24(FP), R4 47 MOVD $ret+32(FP), R7 48 CMP R3,R4,CR6 49 BEQ CR7,equal 50 51 #ifdef GOARCH_ppc64le 52 BR cmpbodyLE<>(SB) 53 #else 54 BR cmpbodyBE<>(SB) 55 #endif 56 57 equal: 58 BEQ CR6,done 59 MOVD $1, R8 60 BGT CR6,greater 61 NEG R8 62 63 greater: 64 MOVD R8, (R7) 65 RET 66 67 done: 68 MOVD $0, (R7) 69 RET 70 71 // Do an efficient memcmp for ppc64le 72 // R3 = a len 73 // R4 = b len 74 // R5 = a addr 75 // R6 = b addr 76 // R7 = addr of return value 77 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 78 MOVD R3,R8 // set up length 79 CMP R3,R4,CR2 // unequal? 80 BC 12,8,setuplen // BLT CR2 81 MOVD R4,R8 // use R4 for comparison len 82 setuplen: 83 MOVD R8,CTR // set up loop counter 84 CMP R8,$8 // only optimize >=8 85 BLT simplecheck 86 DCBT (R5) // cache hint 87 DCBT (R6) 88 CMP R8,$32 // optimize >= 32 89 MOVD R8,R9 90 BLT setup8a // 8 byte moves only 91 setup32a: 92 SRADCC $5,R8,R9 // number of 32 byte chunks 93 MOVD R9,CTR 94 95 // Special processing for 32 bytes or longer. 96 // Loading this way is faster and correct as long as the 97 // doublewords being compared are equal. Once they 98 // are found unequal, reload them in proper byte order 99 // to determine greater or less than. 100 loop32a: 101 MOVD 0(R5),R9 // doublewords to compare 102 MOVD 0(R6),R10 // get 4 doublewords 103 MOVD 8(R5),R14 104 MOVD 8(R6),R15 105 CMPU R9,R10 // bytes equal? 106 MOVD $0,R16 // set up for cmpne 107 BNE cmpne // further compare for LT or GT 108 MOVD 16(R5),R9 // get next pair of doublewords 109 MOVD 16(R6),R10 110 CMPU R14,R15 // bytes match? 111 MOVD $8,R16 // set up for cmpne 112 BNE cmpne // further compare for LT or GT 113 MOVD 24(R5),R14 // get next pair of doublewords 114 MOVD 24(R6),R15 115 CMPU R9,R10 // bytes match? 116 MOVD $16,R16 // set up for cmpne 117 BNE cmpne // further compare for LT or GT 118 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 119 ADD $32,R5 // bump up to next 32 120 ADD $32,R6 121 CMPU R14,R15 // bytes match? 122 BC 8,2,loop32a // br ctr and cr 123 BNE cmpne 124 ANDCC $24,R8,R9 // Any 8 byte chunks? 125 BEQ leftover // and result is 0 126 setup8a: 127 SRADCC $3,R9,R9 // get the 8 byte count 128 BEQ leftover // shifted value is 0 129 MOVD R9,CTR // loop count for doublewords 130 loop8: 131 MOVDBR (R5+R0),R9 // doublewords to compare 132 MOVDBR (R6+R0),R10 // LE compare order 133 ADD $8,R5 134 ADD $8,R6 135 CMPU R9,R10 // match? 136 BC 8,2,loop8 // bt ctr <> 0 && cr 137 BGT greater 138 BLT less 139 leftover: 140 ANDCC $7,R8,R9 // check for leftover bytes 141 MOVD R9,CTR // save the ctr 142 BNE simple // leftover bytes 143 BC 12,10,equal // test CR2 for length comparison 144 BC 12,8,less 145 BR greater 146 simplecheck: 147 CMP R8,$0 // remaining compare length 0 148 BNE simple // do simple compare 149 BC 12,10,equal // test CR2 for length comparison 150 BC 12,8,less // 1st len < 2nd len, result less 151 BR greater // 1st len > 2nd len must be greater 152 simple: 153 MOVBZ 0(R5), R9 // get byte from 1st operand 154 ADD $1,R5 155 MOVBZ 0(R6), R10 // get byte from 2nd operand 156 ADD $1,R6 157 CMPU R9, R10 158 BC 8,2,simple // bc ctr <> 0 && cr 159 BGT greater // 1st > 2nd 160 BLT less // 1st < 2nd 161 BC 12,10,equal // test CR2 for length comparison 162 BC 12,9,greater // 2nd len > 1st len 163 BR less // must be less 164 cmpne: // only here is not equal 165 MOVDBR (R5+R16),R8 // reload in reverse order 166 MOVDBR (R6+R16),R9 167 CMPU R8,R9 // compare correct endianness 168 BGT greater // here only if NE 169 less: 170 MOVD $-1,R3 171 MOVD R3,(R7) // return value if A < B 172 RET 173 equal: 174 MOVD $0,(R7) // return value if A == B 175 RET 176 greater: 177 MOVD $1,R3 178 MOVD R3,(R7) // return value if A > B 179 RET 180 181 // Do an efficient memcmp for ppc64 (BE) 182 // R3 = a len 183 // R4 = b len 184 // R5 = a addr 185 // R6 = b addr 186 // R7 = addr of return value 187 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 188 MOVD R3,R8 // set up length 189 CMP R3,R4,CR2 // unequal? 190 BC 12,8,setuplen // BLT CR2 191 MOVD R4,R8 // use R4 for comparison len 192 setuplen: 193 MOVD R8,CTR // set up loop counter 194 CMP R8,$8 // only optimize >=8 195 BLT simplecheck 196 DCBT (R5) // cache hint 197 DCBT (R6) 198 CMP R8,$32 // optimize >= 32 199 MOVD R8,R9 200 BLT setup8a // 8 byte moves only 201 202 setup32a: 203 SRADCC $5,R8,R9 // number of 32 byte chunks 204 MOVD R9,CTR 205 loop32a: 206 MOVD 0(R5),R9 // doublewords to compare 207 MOVD 0(R6),R10 // get 4 doublewords 208 MOVD 8(R5),R14 209 MOVD 8(R6),R15 210 CMPU R9,R10 // bytes equal? 211 BLT less // found to be less 212 BGT greater // found to be greater 213 MOVD 16(R5),R9 // get next pair of doublewords 214 MOVD 16(R6),R10 215 CMPU R14,R15 // bytes match? 216 BLT less // found less 217 BGT greater // found greater 218 MOVD 24(R5),R14 // get next pair of doublewords 219 MOVD 24(R6),R15 220 CMPU R9,R10 // bytes match? 221 BLT less // found to be less 222 BGT greater // found to be greater 223 ADD $32,R5 // bump up to next 32 224 ADD $32,R6 225 CMPU R14,R15 // bytes match? 226 BC 8,2,loop32a // br ctr and cr 227 BLT less // with BE, byte ordering is 228 BGT greater // good for compare 229 ANDCC $24,R8,R9 // Any 8 byte chunks? 230 BEQ leftover // and result is 0 231 setup8a: 232 SRADCC $3,R9,R9 // get the 8 byte count 233 BEQ leftover // shifted value is 0 234 MOVD R9,CTR // loop count for doublewords 235 loop8: 236 MOVD (R5),R9 237 MOVD (R6),R10 238 ADD $8,R5 239 ADD $8,R6 240 CMPU R9,R10 // match? 241 BC 8,2,loop8 // bt ctr <> 0 && cr 242 BGT greater 243 BLT less 244 leftover: 245 ANDCC $7,R8,R9 // check for leftover bytes 246 MOVD R9,CTR // save the ctr 247 BNE simple // leftover bytes 248 BC 12,10,equal // test CR2 for length comparison 249 BC 12,8,less 250 BR greater 251 simplecheck: 252 CMP R8,$0 // remaining compare length 0 253 BNE simple // do simple compare 254 BC 12,10,equal // test CR2 for length comparison 255 BC 12,8,less // 1st len < 2nd len, result less 256 BR greater // same len, must be equal 257 simple: 258 MOVBZ 0(R5),R9 // get byte from 1st operand 259 ADD $1,R5 260 MOVBZ 0(R6),R10 // get byte from 2nd operand 261 ADD $1,R6 262 CMPU R9,R10 263 BC 8,2,simple // bc ctr <> 0 && cr 264 BGT greater // 1st > 2nd 265 BLT less // 1st < 2nd 266 BC 12,10,equal // test CR2 for length comparison 267 BC 12,9,greater // 2nd len > 1st len 268 less: 269 MOVD $-1,R3 270 MOVD R3,(R7) // return value if A < B 271 RET 272 equal: 273 MOVD $0,(R7) // return value if A == B 274 RET 275 greater: 276 MOVD $1,R3 277 MOVD R3,(R7) // return value if A > B 278 RET