github.com/bir3/gocompiler@v0.9.2202/src/internal/bytealg/compare_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 // Helper names for x-form loads in BE ordering. 11 #ifdef GOARCH_ppc64le 12 #define _LDBEX MOVDBR 13 #define _LWBEX MOVWBR 14 #define _LHBEX MOVHBR 15 #else 16 #define _LDBEX MOVD 17 #define _LWBEX MOVW 18 #define _LHBEX MOVH 19 #endif 20 21 #ifdef GOPPC64_power9 22 #define SETB_CR0(rout) SETB CR0, rout 23 #define SETB_CR1(rout) SETB CR1, rout 24 #define SETB_INIT() 25 #define SETB_CR0_NE(rout) SETB_CR0(rout) 26 #else 27 // A helper macro to emulate SETB on P8. This assumes 28 // -1 is in R20, and 1 is in R21. crxlt and crxeq must 29 // also be the same CR field. 30 #define _SETB(crxlt, crxeq, rout) \ 31 ISEL crxeq,R0,R21,rout \ 32 ISEL crxlt,R20,rout,rout 33 34 // A special case when it is know the comparison 35 // will always be not equal. The result must be -1 or 1. 36 #define SETB_CR0_NE(rout) \ 37 ISEL CR0LT,R20,R21,rout 38 39 #define SETB_CR0(rout) _SETB(CR0LT, CR0EQ, rout) 40 #define SETB_CR1(rout) _SETB(CR1LT, CR1EQ, rout) 41 #define SETB_INIT() \ 42 MOVD $-1,R20 \ 43 MOVD $1,R21 44 #endif 45 46 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 47 // incoming: 48 // R3 a addr 49 // R4 a len 50 // R6 b addr 51 // R7 b len 52 // 53 // on entry to cmpbody: 54 // R3 return value if len(a) == len(b) 55 // R5 a addr 56 // R6 b addr 57 // R9 min(len(a),len(b)) 58 SETB_INIT() 59 MOVD R3,R5 60 CMP R4,R7,CR0 61 CMP R3,R6,CR7 62 ISEL CR0LT,R4,R7,R9 63 SETB_CR0(R3) 64 BC $12,30,LR // beqlr cr7 65 BR cmpbody<>(SB) 66 67 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 68 // incoming: 69 // R3 a addr -> R5 70 // R4 a len -> R3 71 // R5 b addr -> R6 72 // R6 b len -> R4 73 // 74 // on entry to cmpbody: 75 // R3 compare value if compared length is same. 76 // R5 a addr 77 // R6 b addr 78 // R9 min(len(a),len(b)) 79 SETB_INIT() 80 CMP R4,R6,CR0 81 CMP R3,R5,CR7 82 ISEL CR0LT,R4,R6,R9 83 MOVD R5,R6 84 MOVD R3,R5 85 SETB_CR0(R3) 86 BC $12,30,LR // beqlr cr7 87 BR cmpbody<>(SB) 88 89 #ifdef GOARCH_ppc64le 90 DATA byteswap<>+0(SB)/8, $0x0706050403020100 91 DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908 92 GLOBL byteswap<>+0(SB), RODATA, $16 93 #define SWAP V21 94 #endif 95 96 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 97 start: 98 CMP R9,$16,CR0 99 CMP R9,$32,CR1 100 CMP R9,$64,CR2 101 MOVD $16,R10 102 BLT cmp8 103 BLT CR1,cmp16 104 BLT CR2,cmp32 105 106 cmp64: // >= 64B 107 DCBT (R5) // optimize for size>=64 108 DCBT (R6) // cache hint 109 110 SRD $6,R9,R14 // There is at least one iteration. 111 MOVD R14,CTR 112 ANDCC $63,R9,R9 113 CMP R9,$16,CR1 // Do setup for tail check early on. 114 CMP R9,$32,CR2 115 CMP R9,$48,CR3 116 ADD $-16,R9,R9 117 118 MOVD $32,R11 // set offsets to load into vector 119 MOVD $48,R12 // set offsets to load into vector 120 121 PCALIGN $16 122 cmp64_loop: 123 LXVD2X (R5)(R0),V3 // load bytes of A at offset 0 into vector 124 LXVD2X (R6)(R0),V4 // load bytes of B at offset 0 into vector 125 VCMPEQUDCC V3,V4,V1 126 BGE CR6,different // jump out if its different 127 128 LXVD2X (R5)(R10),V3 // load bytes of A at offset 16 into vector 129 LXVD2X (R6)(R10),V4 // load bytes of B at offset 16 into vector 130 VCMPEQUDCC V3,V4,V1 131 BGE CR6,different 132 133 LXVD2X (R5)(R11),V3 // load bytes of A at offset 32 into vector 134 LXVD2X (R6)(R11),V4 // load bytes of B at offset 32 into vector 135 VCMPEQUDCC V3,V4,V1 136 BGE CR6,different 137 138 LXVD2X (R5)(R12),V3 // load bytes of A at offset 64 into vector 139 LXVD2X (R6)(R12),V4 // load bytes of B at offset 64 into vector 140 VCMPEQUDCC V3,V4,V1 141 BGE CR6,different 142 143 ADD $64,R5,R5 // increment to next 64 bytes of A 144 ADD $64,R6,R6 // increment to next 64 bytes of B 145 BDNZ cmp64_loop 146 BC $12,2,LR // beqlr 147 148 // Finish out tail with minimal overlapped checking. 149 // Note, 0 tail is handled by beqlr above. 150 BLE CR1,cmp64_tail_gt0 151 BLE CR2,cmp64_tail_gt16 152 BLE CR3,cmp64_tail_gt32 153 154 cmp64_tail_gt48: // 49 - 63 B 155 LXVD2X (R0)(R5),V3 156 LXVD2X (R0)(R6),V4 157 VCMPEQUDCC V3,V4,V1 158 BGE CR6,different 159 160 LXVD2X (R5)(R10),V3 161 LXVD2X (R6)(R10),V4 162 VCMPEQUDCC V3,V4,V1 163 BGE CR6,different 164 165 LXVD2X (R5)(R11),V3 166 LXVD2X (R6)(R11),V4 167 VCMPEQUDCC V3,V4,V1 168 BGE CR6,different 169 170 BR cmp64_tail_gt0 171 172 PCALIGN $16 173 cmp64_tail_gt32: // 33 - 48B 174 LXVD2X (R0)(R5),V3 175 LXVD2X (R0)(R6),V4 176 VCMPEQUDCC V3,V4,V1 177 BGE CR6,different 178 179 LXVD2X (R5)(R10),V3 180 LXVD2X (R6)(R10),V4 181 VCMPEQUDCC V3,V4,V1 182 BGE CR6,different 183 184 BR cmp64_tail_gt0 185 186 PCALIGN $16 187 cmp64_tail_gt16: // 17 - 32B 188 LXVD2X (R0)(R5),V3 189 LXVD2X (R0)(R6),V4 190 VCMPEQUDCC V3,V4,V1 191 BGE CR6,different 192 193 BR cmp64_tail_gt0 194 195 PCALIGN $16 196 cmp64_tail_gt0: // 1 - 16B 197 LXVD2X (R5)(R9),V3 198 LXVD2X (R6)(R9),V4 199 VCMPEQUDCC V3,V4,V1 200 BGE CR6,different 201 202 RET 203 204 PCALIGN $16 205 cmp32: // 32 - 63B 206 ANDCC $31,R9,R9 207 208 LXVD2X (R0)(R5),V3 209 LXVD2X (R0)(R6),V4 210 VCMPEQUDCC V3,V4,V1 211 BGE CR6,different 212 213 LXVD2X (R10)(R5),V3 214 LXVD2X (R10)(R6),V4 215 VCMPEQUDCC V3,V4,V1 216 BGE CR6,different 217 218 BC $12,2,LR // beqlr 219 ADD R9,R10,R10 220 221 LXVD2X (R9)(R5),V3 222 LXVD2X (R9)(R6),V4 223 VCMPEQUDCC V3,V4,V1 224 BGE CR6,different 225 226 LXVD2X (R10)(R5),V3 227 LXVD2X (R10)(R6),V4 228 VCMPEQUDCC V3,V4,V1 229 BGE CR6,different 230 RET 231 232 PCALIGN $16 233 cmp16: // 16 - 31B 234 ANDCC $15,R9,R9 235 LXVD2X (R0)(R5),V3 236 LXVD2X (R0)(R6),V4 237 VCMPEQUDCC V3,V4,V1 238 BGE CR6,different 239 BC $12,2,LR // beqlr 240 241 LXVD2X (R9)(R5),V3 242 LXVD2X (R9)(R6),V4 243 VCMPEQUDCC V3,V4,V1 244 BGE CR6,different 245 RET 246 247 PCALIGN $16 248 different: 249 #ifdef GOARCH_ppc64le 250 MOVD $byteswap<>+00(SB),R16 251 LXVD2X (R16)(R0),SWAP // Set up swap string 252 253 VPERM V3,V3,SWAP,V3 254 VPERM V4,V4,SWAP,V4 255 #endif 256 257 MFVSRD VS35,R16 // move upper doublewords of A and B into GPR for comparison 258 MFVSRD VS36,R10 259 260 CMPU R16,R10 261 BEQ lower 262 SETB_CR0_NE(R3) 263 RET 264 265 PCALIGN $16 266 lower: 267 VSLDOI $8,V3,V3,V3 // move lower doublewords of A and B into GPR for comparison 268 MFVSRD VS35,R16 269 VSLDOI $8,V4,V4,V4 270 MFVSRD VS36,R10 271 272 CMPU R16,R10 273 SETB_CR0_NE(R3) 274 RET 275 276 PCALIGN $16 277 cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10) 278 #ifdef GOPPC64_power10 279 SLD $56,R9,R9 280 LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled. 281 LXVLL R6,R9,V4 282 VCMPUQ V3,V4,CR0 // Compare as a 128b integer. 283 SETB_CR0(R6) 284 ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value. 285 RET 286 #else 287 CMP R9,$8 288 BLT cmp4 289 ANDCC $7,R9,R9 290 _LDBEX (R0)(R5),R10 291 _LDBEX (R0)(R6),R11 292 _LDBEX (R9)(R5),R12 293 _LDBEX (R9)(R6),R14 294 CMPU R10,R11,CR0 295 SETB_CR0(R5) 296 CMPU R12,R14,CR1 297 SETB_CR1(R6) 298 CRAND CR0EQ,CR1EQ,CR1EQ // If both equal, length determines return value. 299 ISEL CR0EQ,R6,R5,R4 300 ISEL CR1EQ,R3,R4,R3 301 RET 302 303 PCALIGN $16 304 cmp4: // 4 - 7B 305 CMP R9,$4 306 BLT cmp2 307 ANDCC $3,R9,R9 308 _LWBEX (R0)(R5),R10 309 _LWBEX (R0)(R6),R11 310 _LWBEX (R9)(R5),R12 311 _LWBEX (R9)(R6),R14 312 RLDIMI $32,R10,$0,R12 313 RLDIMI $32,R11,$0,R14 314 CMPU R12,R14 315 BR cmp0 316 317 PCALIGN $16 318 cmp2: // 2 - 3B 319 CMP R9,$2 320 BLT cmp1 321 ANDCC $1,R9,R9 322 _LHBEX (R0)(R5),R10 323 _LHBEX (R0)(R6),R11 324 _LHBEX (R9)(R5),R12 325 _LHBEX (R9)(R6),R14 326 RLDIMI $32,R10,$0,R12 327 RLDIMI $32,R11,$0,R14 328 CMPU R12,R14 329 BR cmp0 330 331 PCALIGN $16 332 cmp1: 333 CMP R9,$0 334 BEQ cmp0 335 MOVBZ (R5),R10 336 MOVBZ (R6),R11 337 CMPU R10,R11 338 cmp0: 339 SETB_CR0(R6) 340 ISEL CR0EQ,R3,R6,R3 341 RET 342 #endif