github.com/SandwichDev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/compare_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 11 MOVD a_base+0(FP), R5 12 MOVD b_base+24(FP), R6 13 MOVD a_len+8(FP), R3 14 CMP R5,R6,CR7 15 MOVD b_len+32(FP), R4 16 MOVD $ret+48(FP), R7 17 CMP R3,R4,CR6 18 BEQ CR7,equal 19 20 #ifdef GOARCH_ppc64le 21 BR cmpbodyLE<>(SB) 22 #else 23 BR cmpbodyBE<>(SB) 24 #endif 25 26 equal: 27 BEQ CR6,done 28 MOVD $1, R8 29 BGT CR6,greater 30 NEG R8 31 32 greater: 33 MOVD R8, (R7) 34 RET 35 36 done: 37 MOVD $0, (R7) 38 RET 39 40 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 41 MOVD a_base+0(FP), R5 42 MOVD b_base+16(FP), R6 43 MOVD a_len+8(FP), R3 44 CMP R5,R6,CR7 45 MOVD b_len+24(FP), R4 46 MOVD $ret+32(FP), R7 47 CMP R3,R4,CR6 48 BEQ CR7,equal 49 50 #ifdef GOARCH_ppc64le 51 BR cmpbodyLE<>(SB) 52 #else 53 BR cmpbodyBE<>(SB) 54 #endif 55 56 equal: 57 BEQ CR6,done 58 MOVD $1, R8 59 BGT CR6,greater 60 NEG R8 61 62 greater: 63 MOVD R8, (R7) 64 RET 65 66 done: 67 MOVD $0, (R7) 68 RET 69 70 // Do an efficient memcmp for ppc64le 71 // R3 = a len 72 // R4 = b len 73 // R5 = a addr 74 // R6 = b addr 75 // R7 = addr of return value 76 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 77 MOVD R3,R8 // set up length 78 CMP R3,R4,CR2 // unequal? 79 BC 12,8,setuplen // BLT CR2 80 MOVD R4,R8 // use R4 for comparison len 81 setuplen: 82 MOVD R8,CTR // set up loop counter 83 CMP R8,$8 // only optimize >=8 84 BLT simplecheck 85 DCBT (R5) // cache hint 86 DCBT (R6) 87 CMP R8,$32 // optimize >= 32 88 MOVD R8,R9 89 BLT setup8a // 8 byte moves only 90 setup32a: 91 SRADCC $5,R8,R9 // number of 32 byte chunks 92 MOVD R9,CTR 93 94 // Special processing for 32 bytes or longer. 95 // Loading this way is faster and correct as long as the 96 // doublewords being compared are equal. Once they 97 // are found unequal, reload them in proper byte order 98 // to determine greater or less than. 99 loop32a: 100 MOVD 0(R5),R9 // doublewords to compare 101 MOVD 0(R6),R10 // get 4 doublewords 102 MOVD 8(R5),R14 103 MOVD 8(R6),R15 104 CMPU R9,R10 // bytes equal? 105 MOVD $0,R16 // set up for cmpne 106 BNE cmpne // further compare for LT or GT 107 MOVD 16(R5),R9 // get next pair of doublewords 108 MOVD 16(R6),R10 109 CMPU R14,R15 // bytes match? 110 MOVD $8,R16 // set up for cmpne 111 BNE cmpne // further compare for LT or GT 112 MOVD 24(R5),R14 // get next pair of doublewords 113 MOVD 24(R6),R15 114 CMPU R9,R10 // bytes match? 115 MOVD $16,R16 // set up for cmpne 116 BNE cmpne // further compare for LT or GT 117 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 118 ADD $32,R5 // bump up to next 32 119 ADD $32,R6 120 CMPU R14,R15 // bytes match? 121 BC 8,2,loop32a // br ctr and cr 122 BNE cmpne 123 ANDCC $24,R8,R9 // Any 8 byte chunks? 124 BEQ leftover // and result is 0 125 setup8a: 126 SRADCC $3,R9,R9 // get the 8 byte count 127 BEQ leftover // shifted value is 0 128 MOVD R9,CTR // loop count for doublewords 129 loop8: 130 MOVDBR (R5+R0),R9 // doublewords to compare 131 MOVDBR (R6+R0),R10 // LE compare order 132 ADD $8,R5 133 ADD $8,R6 134 CMPU R9,R10 // match? 135 BC 8,2,loop8 // bt ctr <> 0 && cr 136 BGT greater 137 BLT less 138 leftover: 139 ANDCC $7,R8,R9 // check for leftover bytes 140 MOVD R9,CTR // save the ctr 141 BNE simple // leftover bytes 142 BC 12,10,equal // test CR2 for length comparison 143 BC 12,8,less 144 BR greater 145 simplecheck: 146 CMP R8,$0 // remaining compare length 0 147 BNE simple // do simple compare 148 BC 12,10,equal // test CR2 for length comparison 149 BC 12,8,less // 1st len < 2nd len, result less 150 BR greater // 1st len > 2nd len must be greater 151 simple: 152 MOVBZ 0(R5), R9 // get byte from 1st operand 153 ADD $1,R5 154 MOVBZ 0(R6), R10 // get byte from 2nd operand 155 ADD $1,R6 156 CMPU R9, R10 157 BC 8,2,simple // bc ctr <> 0 && cr 158 BGT greater // 1st > 2nd 159 BLT less // 1st < 2nd 160 BC 12,10,equal // test CR2 for length comparison 161 BC 12,9,greater // 2nd len > 1st len 162 BR less // must be less 163 cmpne: // only here is not equal 164 MOVDBR (R5+R16),R8 // reload in reverse order 165 MOVDBR (R6+R16),R9 166 CMPU R8,R9 // compare correct endianness 167 BGT greater // here only if NE 168 less: 169 MOVD $-1,R3 170 MOVD R3,(R7) // return value if A < B 171 RET 172 equal: 173 MOVD $0,(R7) // return value if A == B 174 RET 175 greater: 176 MOVD $1,R3 177 MOVD R3,(R7) // return value if A > B 178 RET 179 180 // Do an efficient memcmp for ppc64 (BE) 181 // R3 = a len 182 // R4 = b len 183 // R5 = a addr 184 // R6 = b addr 185 // R7 = addr of return value 186 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 187 MOVD R3,R8 // set up length 188 CMP R3,R4,CR2 // unequal? 189 BC 12,8,setuplen // BLT CR2 190 MOVD R4,R8 // use R4 for comparison len 191 setuplen: 192 MOVD R8,CTR // set up loop counter 193 CMP R8,$8 // only optimize >=8 194 BLT simplecheck 195 DCBT (R5) // cache hint 196 DCBT (R6) 197 CMP R8,$32 // optimize >= 32 198 MOVD R8,R9 199 BLT setup8a // 8 byte moves only 200 201 setup32a: 202 SRADCC $5,R8,R9 // number of 32 byte chunks 203 MOVD R9,CTR 204 loop32a: 205 MOVD 0(R5),R9 // doublewords to compare 206 MOVD 0(R6),R10 // get 4 doublewords 207 MOVD 8(R5),R14 208 MOVD 8(R6),R15 209 CMPU R9,R10 // bytes equal? 210 BLT less // found to be less 211 BGT greater // found to be greater 212 MOVD 16(R5),R9 // get next pair of doublewords 213 MOVD 16(R6),R10 214 CMPU R14,R15 // bytes match? 215 BLT less // found less 216 BGT greater // found greater 217 MOVD 24(R5),R14 // get next pair of doublewords 218 MOVD 24(R6),R15 219 CMPU R9,R10 // bytes match? 220 BLT less // found to be less 221 BGT greater // found to be greater 222 ADD $32,R5 // bump up to next 32 223 ADD $32,R6 224 CMPU R14,R15 // bytes match? 225 BC 8,2,loop32a // br ctr and cr 226 BLT less // with BE, byte ordering is 227 BGT greater // good for compare 228 ANDCC $24,R8,R9 // Any 8 byte chunks? 229 BEQ leftover // and result is 0 230 setup8a: 231 SRADCC $3,R9,R9 // get the 8 byte count 232 BEQ leftover // shifted value is 0 233 MOVD R9,CTR // loop count for doublewords 234 loop8: 235 MOVD (R5),R9 236 MOVD (R6),R10 237 ADD $8,R5 238 ADD $8,R6 239 CMPU R9,R10 // match? 240 BC 8,2,loop8 // bt ctr <> 0 && cr 241 BGT greater 242 BLT less 243 leftover: 244 ANDCC $7,R8,R9 // check for leftover bytes 245 MOVD R9,CTR // save the ctr 246 BNE simple // leftover bytes 247 BC 12,10,equal // test CR2 for length comparison 248 BC 12,8,less 249 BR greater 250 simplecheck: 251 CMP R8,$0 // remaining compare length 0 252 BNE simple // do simple compare 253 BC 12,10,equal // test CR2 for length comparison 254 BC 12,8,less // 1st len < 2nd len, result less 255 BR greater // same len, must be equal 256 simple: 257 MOVBZ 0(R5),R9 // get byte from 1st operand 258 ADD $1,R5 259 MOVBZ 0(R6),R10 // get byte from 2nd operand 260 ADD $1,R6 261 CMPU R9,R10 262 BC 8,2,simple // bc ctr <> 0 && cr 263 BGT greater // 1st > 2nd 264 BLT less // 1st < 2nd 265 BC 12,10,equal // test CR2 for length comparison 266 BC 12,9,greater // 2nd len > 1st len 267 less: 268 MOVD $-1,R3 269 MOVD R3,(R7) // return value if A < B 270 RET 271 equal: 272 MOVD $0,(R7) // return value if A == B 273 RET 274 greater: 275 MOVD $1,R3 276 MOVD R3,(R7) // return value if A > B 277 RET