github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/compare_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 11 MOVD a_base+0(FP), R5 12 MOVD b_base+24(FP), R6 13 MOVD a_len+8(FP), R3 14 CMP R5,R6,CR7 15 MOVD b_len+32(FP), R4 16 MOVD $ret+48(FP), R7 17 CMP R3,R4,CR6 18 BEQ CR7,equal 19 20 #ifdef GOARCH_ppc64le 21 BR cmpbodyLE<>(SB) 22 #else 23 BR cmpbodyBE<>(SB) 24 #endif 25 26 equal: 27 BEQ CR6,done 28 MOVD $1, R8 29 BGT CR6,greater 30 NEG R8 31 32 greater: 33 MOVD R8, (R7) 34 RET 35 36 done: 37 MOVD $0, (R7) 38 RET 39 40 TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 41 FUNCDATA $0, ·Compare·args_stackmap(SB) 42 MOVD a_base+0(FP), R5 43 MOVD b_base+24(FP), R6 44 MOVD a_len+8(FP), R3 45 CMP R5,R6,CR7 46 MOVD b_len+32(FP), R4 47 MOVD $ret+48(FP), R7 48 CMP R3,R4,CR6 49 BEQ CR7,equal 50 51 #ifdef GOARCH_ppc64le 52 BR cmpbodyLE<>(SB) 53 #else 54 BR cmpbodyBE<>(SB) 55 #endif 56 57 equal: 58 BEQ CR6,done 59 MOVD $1, R8 60 BGT CR6,greater 61 NEG R8 62 63 greater: 64 MOVD R8, (R7) 65 RET 66 67 done: 68 MOVD $0, (R7) 69 RET 70 71 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 72 MOVD a_base+0(FP), R5 73 MOVD b_base+16(FP), R6 74 MOVD a_len+8(FP), R3 75 CMP R5,R6,CR7 76 MOVD b_len+24(FP), R4 77 MOVD $ret+32(FP), R7 78 CMP R3,R4,CR6 79 BEQ CR7,equal 80 81 #ifdef GOARCH_ppc64le 82 BR cmpbodyLE<>(SB) 83 #else 84 BR cmpbodyBE<>(SB) 85 #endif 86 87 equal: 88 BEQ CR6,done 89 MOVD $1, R8 90 BGT CR6,greater 91 NEG R8 92 93 greater: 94 MOVD R8, (R7) 95 RET 96 97 done: 98 MOVD $0, (R7) 99 RET 100 101 // Do an efficient memcmp for ppc64le 102 // R3 = a len 103 // R4 = b len 104 // R5 = a addr 105 // R6 = b addr 106 // R7 = addr of return value 107 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 108 MOVD R3,R8 // set up length 109 CMP R3,R4,CR2 // unequal? 110 BC 12,8,setuplen // BLT CR2 111 MOVD R4,R8 // use R4 for comparison len 112 setuplen: 113 MOVD R8,CTR // set up loop counter 114 CMP R8,$8 // only optimize >=8 115 BLT simplecheck 116 DCBT (R5) // cache hint 117 DCBT (R6) 118 CMP R8,$32 // optimize >= 32 119 MOVD R8,R9 120 BLT setup8a // 8 byte moves only 121 setup32a: 122 SRADCC $5,R8,R9 // number of 32 byte chunks 123 MOVD R9,CTR 124 125 // Special processing for 32 bytes or longer. 126 // Loading this way is faster and correct as long as the 127 // doublewords being compared are equal. Once they 128 // are found unequal, reload them in proper byte order 129 // to determine greater or less than. 130 loop32a: 131 MOVD 0(R5),R9 // doublewords to compare 132 MOVD 0(R6),R10 // get 4 doublewords 133 MOVD 8(R5),R14 134 MOVD 8(R6),R15 135 CMPU R9,R10 // bytes equal? 136 MOVD $0,R16 // set up for cmpne 137 BNE cmpne // further compare for LT or GT 138 MOVD 16(R5),R9 // get next pair of doublewords 139 MOVD 16(R6),R10 140 CMPU R14,R15 // bytes match? 141 MOVD $8,R16 // set up for cmpne 142 BNE cmpne // further compare for LT or GT 143 MOVD 24(R5),R14 // get next pair of doublewords 144 MOVD 24(R6),R15 145 CMPU R9,R10 // bytes match? 146 MOVD $16,R16 // set up for cmpne 147 BNE cmpne // further compare for LT or GT 148 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 149 ADD $32,R5 // bump up to next 32 150 ADD $32,R6 151 CMPU R14,R15 // bytes match? 152 BC 8,2,loop32a // br ctr and cr 153 BNE cmpne 154 ANDCC $24,R8,R9 // Any 8 byte chunks? 155 BEQ leftover // and result is 0 156 setup8a: 157 SRADCC $3,R9,R9 // get the 8 byte count 158 BEQ leftover // shifted value is 0 159 MOVD R9,CTR // loop count for doublewords 160 loop8: 161 MOVDBR (R5+R0),R9 // doublewords to compare 162 MOVDBR (R6+R0),R10 // LE compare order 163 ADD $8,R5 164 ADD $8,R6 165 CMPU R9,R10 // match? 166 BC 8,2,loop8 // bt ctr <> 0 && cr 167 BGT greater 168 BLT less 169 leftover: 170 ANDCC $7,R8,R9 // check for leftover bytes 171 MOVD R9,CTR // save the ctr 172 BNE simple // leftover bytes 173 BC 12,10,equal // test CR2 for length comparison 174 BC 12,8,less 175 BR greater 176 simplecheck: 177 CMP R8,$0 // remaining compare length 0 178 BNE simple // do simple compare 179 BC 12,10,equal // test CR2 for length comparison 180 BC 12,8,less // 1st len < 2nd len, result less 181 BR greater // 1st len > 2nd len must be greater 182 simple: 183 MOVBZ 0(R5), R9 // get byte from 1st operand 184 ADD $1,R5 185 MOVBZ 0(R6), R10 // get byte from 2nd operand 186 ADD $1,R6 187 CMPU R9, R10 188 BC 8,2,simple // bc ctr <> 0 && cr 189 BGT greater // 1st > 2nd 190 BLT less // 1st < 2nd 191 BC 12,10,equal // test CR2 for length comparison 192 BC 12,9,greater // 2nd len > 1st len 193 BR less // must be less 194 cmpne: // only here is not equal 195 MOVDBR (R5+R16),R8 // reload in reverse order 196 MOVDBR (R6+R16),R9 197 CMPU R8,R9 // compare correct endianness 198 BGT greater // here only if NE 199 less: 200 MOVD $-1,R3 201 MOVD R3,(R7) // return value if A < B 202 RET 203 equal: 204 MOVD $0,(R7) // return value if A == B 205 RET 206 greater: 207 MOVD $1,R3 208 MOVD R3,(R7) // return value if A > B 209 RET 210 211 // Do an efficient memcmp for ppc64 (BE) 212 // R3 = a len 213 // R4 = b len 214 // R5 = a addr 215 // R6 = b addr 216 // R7 = addr of return value 217 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 218 MOVD R3,R8 // set up length 219 CMP R3,R4,CR2 // unequal? 220 BC 12,8,setuplen // BLT CR2 221 MOVD R4,R8 // use R4 for comparison len 222 setuplen: 223 MOVD R8,CTR // set up loop counter 224 CMP R8,$8 // only optimize >=8 225 BLT simplecheck 226 DCBT (R5) // cache hint 227 DCBT (R6) 228 CMP R8,$32 // optimize >= 32 229 MOVD R8,R9 230 BLT setup8a // 8 byte moves only 231 232 setup32a: 233 SRADCC $5,R8,R9 // number of 32 byte chunks 234 MOVD R9,CTR 235 loop32a: 236 MOVD 0(R5),R9 // doublewords to compare 237 MOVD 0(R6),R10 // get 4 doublewords 238 MOVD 8(R5),R14 239 MOVD 8(R6),R15 240 CMPU R9,R10 // bytes equal? 241 BLT less // found to be less 242 BGT greater // found to be greater 243 MOVD 16(R5),R9 // get next pair of doublewords 244 MOVD 16(R6),R10 245 CMPU R14,R15 // bytes match? 246 BLT less // found less 247 BGT greater // found greater 248 MOVD 24(R5),R14 // get next pair of doublewords 249 MOVD 24(R6),R15 250 CMPU R9,R10 // bytes match? 251 BLT less // found to be less 252 BGT greater // found to be greater 253 ADD $32,R5 // bump up to next 32 254 ADD $32,R6 255 CMPU R14,R15 // bytes match? 256 BC 8,2,loop32a // br ctr and cr 257 BLT less // with BE, byte ordering is 258 BGT greater // good for compare 259 ANDCC $24,R8,R9 // Any 8 byte chunks? 260 BEQ leftover // and result is 0 261 setup8a: 262 SRADCC $3,R9,R9 // get the 8 byte count 263 BEQ leftover // shifted value is 0 264 MOVD R9,CTR // loop count for doublewords 265 loop8: 266 MOVD (R5),R9 267 MOVD (R6),R10 268 ADD $8,R5 269 ADD $8,R6 270 CMPU R9,R10 // match? 271 BC 8,2,loop8 // bt ctr <> 0 && cr 272 BGT greater 273 BLT less 274 leftover: 275 ANDCC $7,R8,R9 // check for leftover bytes 276 MOVD R9,CTR // save the ctr 277 BNE simple // leftover bytes 278 BC 12,10,equal // test CR2 for length comparison 279 BC 12,8,less 280 BR greater 281 simplecheck: 282 CMP R8,$0 // remaining compare length 0 283 BNE simple // do simple compare 284 BC 12,10,equal // test CR2 for length comparison 285 BC 12,8,less // 1st len < 2nd len, result less 286 BR greater // same len, must be equal 287 simple: 288 MOVBZ 0(R5),R9 // get byte from 1st operand 289 ADD $1,R5 290 MOVBZ 0(R6),R10 // get byte from 2nd operand 291 ADD $1,R6 292 CMPU R9,R10 293 BC 8,2,simple // bc ctr <> 0 && cr 294 BGT greater // 1st > 2nd 295 BLT less // 1st < 2nd 296 BC 12,10,equal // test CR2 for length comparison 297 BC 12,9,greater // 2nd len > 1st len 298 less: 299 MOVD $-1,R3 300 MOVD R3,(R7) // return value if A < B 301 RET 302 equal: 303 MOVD $0,(R7) // return value if A == B 304 RET 305 greater: 306 MOVD $1,R3 307 MOVD R3,(R7) // return value if A > B 308 RET