github.com/bir3/gocompiler@v0.9.2202/src/internal/bytealg/equal_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 // 4K (smallest case) page size offset mask for PPC64. 11 #define PAGE_OFFSET 4095 12 13 // Likewise, the BC opcode is hard to read, and no extended 14 // mnemonics are offered for these forms. 15 #define BGELR_CR6 BC 4, CR6LT, (LR) 16 #define BEQLR BC 12, CR0EQ, (LR) 17 18 // memequal(a, b unsafe.Pointer, size uintptr) bool 19 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 20 // R3 = a 21 // R4 = b 22 // R5 = size 23 BR memeqbody<>(SB) 24 25 // memequal_varlen(a, b unsafe.Pointer) bool 26 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17 27 // R3 = a 28 // R4 = b 29 CMP R3, R4 30 BEQ eq 31 MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure 32 BR memeqbody<>(SB) 33 eq: 34 MOVD $1, R3 35 RET 36 37 // Do an efficient memequal for ppc64 38 // R3 = s1 39 // R4 = s2 40 // R5 = len 41 // On exit: 42 // R3 = return value 43 TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 44 MOVD R3, R8 // Move s1 into R8 45 ADD R5, R3, R9 // &s1[len(s1)] 46 ADD R5, R4, R10 // &s2[len(s2)] 47 MOVD $1, R11 48 CMP R5, $16 // Use GPR checks for check for len <= 16 49 BLE check0_16 50 MOVD $0, R3 // Assume no-match in case BGELR CR6 returns 51 CMP R5, $32 // Use overlapping VSX loads for len <= 32 52 BLE check17_32 // Do a pair of overlapping VSR compares 53 CMP R5, $64 54 BLE check33_64 // Hybrid check + overlap compare. 55 56 setup64: 57 SRD $6, R5, R6 // number of 64 byte chunks to compare 58 MOVD R6, CTR 59 MOVD $16, R14 // index for VSX loads and stores 60 MOVD $32, R15 61 MOVD $48, R16 62 ANDCC $0x3F, R5, R5 // len%64==0? 63 64 PCALIGN $16 65 loop64: 66 LXVD2X (R8+R0), V0 67 LXVD2X (R4+R0), V1 68 VCMPEQUBCC V0, V1, V2 // compare, setting CR6 69 BGELR_CR6 70 LXVD2X (R8+R14), V0 71 LXVD2X (R4+R14), V1 72 VCMPEQUBCC V0, V1, V2 73 BGELR_CR6 74 LXVD2X (R8+R15), V0 75 LXVD2X (R4+R15), V1 76 VCMPEQUBCC V0, V1, V2 77 BGELR_CR6 78 LXVD2X (R8+R16), V0 79 LXVD2X (R4+R16), V1 80 VCMPEQUBCC V0, V1, V2 81 BGELR_CR6 82 ADD $64,R8 // bump up to next 64 83 ADD $64,R4 84 BDNZ loop64 85 86 ISEL CR0EQ, R11, R3, R3 // If no tail, return 1, otherwise R3 remains 0. 87 BEQLR // return if no tail. 88 89 ADD $-64, R9, R8 90 ADD $-64, R10, R4 91 LXVD2X (R8+R0), V0 92 LXVD2X (R4+R0), V1 93 VCMPEQUBCC V0, V1, V2 94 BGELR_CR6 95 LXVD2X (R8+R14), V0 96 LXVD2X (R4+R14), V1 97 VCMPEQUBCC V0, V1, V2 98 BGELR_CR6 99 LXVD2X (R8+R15), V0 100 LXVD2X (R4+R15), V1 101 VCMPEQUBCC V0, V1, V2 102 BGELR_CR6 103 LXVD2X (R8+R16), V0 104 LXVD2X (R4+R16), V1 105 VCMPEQUBCC V0, V1, V2 106 ISEL CR6LT, R11, R0, R3 107 RET 108 109 check33_64: 110 // Bytes 0-15 111 LXVD2X (R8+R0), V0 112 LXVD2X (R4+R0), V1 113 VCMPEQUBCC V0, V1, V2 114 BGELR_CR6 115 ADD $16, R8 116 ADD $16, R4 117 118 // Bytes 16-31 119 LXVD2X (R8+R0), V0 120 LXVD2X (R4+R0), V1 121 VCMPEQUBCC V0, V1, V2 122 BGELR_CR6 123 124 // A little tricky, but point R4,R8 to &sx[len-32], 125 // and reuse check17_32 to check the next 1-31 bytes (with some overlap) 126 ADD $-32, R9, R8 127 ADD $-32, R10, R4 128 // Fallthrough 129 130 check17_32: 131 LXVD2X (R8+R0), V0 132 LXVD2X (R4+R0), V1 133 VCMPEQUBCC V0, V1, V2 134 ISEL CR6LT, R11, R0, R5 135 136 // Load sX[len(sX)-16:len(sX)] and compare. 137 ADD $-16, R9 138 ADD $-16, R10 139 LXVD2X (R9+R0), V0 140 LXVD2X (R10+R0), V1 141 VCMPEQUBCC V0, V1, V2 142 ISEL CR6LT, R5, R0, R3 143 RET 144 145 check0_16: 146 #ifdef GOPPC64_power10 147 SLD $56, R5, R7 148 LXVL R8, R7, V0 149 LXVL R4, R7, V1 150 VCMPEQUDCC V0, V1, V2 151 ISEL CR6LT, R11, R0, R3 152 RET 153 #else 154 CMP R5, $8 155 BLT check0_7 156 // Load sX[0:7] and compare. 157 MOVD (R8), R6 158 MOVD (R4), R7 159 CMP R6, R7 160 ISEL CR0EQ, R11, R0, R5 161 // Load sX[len(sX)-8:len(sX)] and compare. 162 MOVD -8(R9), R6 163 MOVD -8(R10), R7 164 CMP R6, R7 165 ISEL CR0EQ, R5, R0, R3 166 RET 167 168 check0_7: 169 CMP R5,$0 170 MOVD $1, R3 171 BEQLR // return if len == 0 172 173 // Check < 8B loads with a single compare, but select the load address 174 // such that it cannot cross a page boundary. Load a few bytes from the 175 // lower address if that does not cross the lower page. Or, load a few 176 // extra bytes from the higher addresses. And align those values 177 // consistently in register as either address may have differing 178 // alignment requirements. 179 ANDCC $PAGE_OFFSET, R8, R6 // &sX & PAGE_OFFSET 180 ANDCC $PAGE_OFFSET, R4, R9 181 SUBC R5, $8, R12 // 8-len 182 SLD $3, R12, R14 // (8-len)*8 183 CMPU R6, R12, CR1 // Enough bytes lower in the page to load lower? 184 CMPU R9, R12, CR0 185 SUB R12, R8, R6 // compute lower load address 186 SUB R12, R4, R9 187 ISEL CR1LT, R8, R6, R8 // R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len)) 188 ISEL CR0LT, R4, R9, R4 // Similar for s2 189 MOVD (R8), R15 190 MOVD (R4), R16 191 SLD R14, R15, R7 192 SLD R14, R16, R17 193 SRD R14, R7, R7 // Clear the upper (8-len) bytes (with 2 shifts) 194 SRD R14, R17, R17 195 SRD R14, R15, R6 // Clear the lower (8-len) bytes 196 SRD R14, R16, R9 197 #ifdef GOARCH_ppc64le 198 ISEL CR1LT, R7, R6, R8 // Choose the correct len bytes to compare based on alignment 199 ISEL CR0LT, R17, R9, R4 200 #else 201 ISEL CR1LT, R6, R7, R8 202 ISEL CR0LT, R9, R17, R4 203 #endif 204 CMP R4, R8 205 ISEL CR0EQ, R11, R0, R3 206 RET 207 #endif // tail processing if !defined(GOPPC64_power10)