github.com/bir3/gocompiler@v0.3.205/src/internal/bytealg/equal_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 // 4K (smallest case) page size offset mask for PPC64. 11 #define PAGE_OFFSET 4095 12 13 // TODO: At writing, ISEL and BC do not support CR bit type arguments, 14 // define them here for readability. 15 #define CR0LT 4*0+0 16 #define CR0EQ 4*0+2 17 #define CR1LT 4*1+0 18 #define CR6LT 4*6+0 19 20 // Likewise, the BC opcode is hard to read, and no extended 21 // mnemonics are offered for these forms. 22 #define BGELR_CR6 BC 4, CR6LT, (LR) 23 #define BEQLR BC 12, CR0EQ, (LR) 24 25 // memequal(a, b unsafe.Pointer, size uintptr) bool 26 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 27 // R3 = a 28 // R4 = b 29 // R5 = size 30 BR memeqbody<>(SB) 31 32 // memequal_varlen(a, b unsafe.Pointer) bool 33 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17 34 // R3 = a 35 // R4 = b 36 CMP R3, R4 37 BEQ eq 38 MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure 39 BR memeqbody<>(SB) 40 eq: 41 MOVD $1, R3 42 RET 43 44 // Do an efficient memequal for ppc64 45 // R3 = s1 46 // R4 = s2 47 // R5 = len 48 // On exit: 49 // R3 = return value 50 TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 51 MOVD R3, R8 // Move s1 into R8 52 ADD R5, R3, R9 // &s1[len(s1)] 53 ADD R5, R4, R10 // &s2[len(s2)] 54 MOVD $1, R11 55 CMP R5, $16 // Use GPR checks for check for len <= 16 56 BLE check0_16 57 MOVD $0, R3 // Assume no-match in case BGELR CR6 returns 58 CMP R5, $32 // Use overlapping VSX loads for len <= 32 59 BLE check17_32 // Do a pair of overlapping VSR compares 60 CMP R5, $64 61 BLE check33_64 // Hybrid check + overlap compare. 62 63 setup64: 64 SRD $6, R5, R6 // number of 64 byte chunks to compare 65 MOVD R6, CTR 66 MOVD $16, R14 // index for VSX loads and stores 67 MOVD $32, R15 68 MOVD $48, R16 69 ANDCC $0x3F, R5, R5 // len%64==0? 70 71 PCALIGN $32 72 loop64: 73 LXVD2X (R8+R0), V0 74 LXVD2X (R4+R0), V1 75 VCMPEQUBCC V0, V1, V2 // compare, setting CR6 76 BGELR_CR6 77 LXVD2X (R8+R14), V0 78 LXVD2X (R4+R14), V1 79 VCMPEQUBCC V0, V1, V2 80 BGELR_CR6 81 LXVD2X (R8+R15), V0 82 LXVD2X (R4+R15), V1 83 VCMPEQUBCC V0, V1, V2 84 BGELR_CR6 85 LXVD2X (R8+R16), V0 86 LXVD2X (R4+R16), V1 87 VCMPEQUBCC V0, V1, V2 88 BGELR_CR6 89 ADD $64,R8 // bump up to next 64 90 ADD $64,R4 91 BDNZ loop64 92 93 ISEL $CR0EQ, R11, R3, R3 // If no tail, return 1, otherwise R3 remains 0. 94 BEQLR // return if no tail. 95 96 ADD $-64, R9, R8 97 ADD $-64, R10, R4 98 LXVD2X (R8+R0), V0 99 LXVD2X (R4+R0), V1 100 VCMPEQUBCC V0, V1, V2 101 BGELR_CR6 102 LXVD2X (R8+R14), V0 103 LXVD2X (R4+R14), V1 104 VCMPEQUBCC V0, V1, V2 105 BGELR_CR6 106 LXVD2X (R8+R15), V0 107 LXVD2X (R4+R15), V1 108 VCMPEQUBCC V0, V1, V2 109 BGELR_CR6 110 LXVD2X (R8+R16), V0 111 LXVD2X (R4+R16), V1 112 VCMPEQUBCC V0, V1, V2 113 ISEL $CR6LT, R11, R0, R3 114 RET 115 116 check33_64: 117 // Bytes 0-15 118 LXVD2X (R8+R0), V0 119 LXVD2X (R4+R0), V1 120 VCMPEQUBCC V0, V1, V2 121 BGELR_CR6 122 ADD $16, R8 123 ADD $16, R4 124 125 // Bytes 16-31 126 LXVD2X (R8+R0), V0 127 LXVD2X (R4+R0), V1 128 VCMPEQUBCC V0, V1, V2 129 BGELR_CR6 130 131 // A little tricky, but point R4,R8 to &sx[len-32], 132 // and reuse check17_32 to check the next 1-31 bytes (with some overlap) 133 ADD $-32, R9, R8 134 ADD $-32, R10, R4 135 // Fallthrough 136 137 check17_32: 138 LXVD2X (R8+R0), V0 139 LXVD2X (R4+R0), V1 140 VCMPEQUBCC V0, V1, V2 141 ISEL $CR6LT, R11, R0, R5 142 143 // Load sX[len(sX)-16:len(sX)] and compare. 144 ADD $-16, R9 145 ADD $-16, R10 146 LXVD2X (R9+R0), V0 147 LXVD2X (R10+R0), V1 148 VCMPEQUBCC V0, V1, V2 149 ISEL $CR6LT, R5, R0, R3 150 RET 151 152 check0_16: 153 CMP R5, $8 154 BLT check0_7 155 // Load sX[0:7] and compare. 156 MOVD (R8), R6 157 MOVD (R4), R7 158 CMP R6, R7 159 ISEL $CR0EQ, R11, R0, R5 160 // Load sX[len(sX)-8:len(sX)] and compare. 161 MOVD -8(R9), R6 162 MOVD -8(R10), R7 163 CMP R6, R7 164 ISEL $CR0EQ, R5, R0, R3 165 RET 166 167 check0_7: 168 CMP R5,$0 169 MOVD $1, R3 170 BEQLR // return if len == 0 171 172 // Check < 8B loads with a single compare, but select the load address 173 // such that it cannot cross a page boundary. Load a few bytes from the 174 // lower address if that does not cross the lower page. Or, load a few 175 // extra bytes from the higher addresses. And align those values 176 // consistently in register as either address may have differing 177 // alignment requirements. 178 ANDCC $PAGE_OFFSET, R8, R6 // &sX & PAGE_OFFSET 179 ANDCC $PAGE_OFFSET, R4, R9 180 SUBC R5, $8, R12 // 8-len 181 SLD $3, R12, R14 // (8-len)*8 182 CMPU R6, R12, CR1 // Enough bytes lower in the page to load lower? 183 CMPU R9, R12, CR0 184 SUB R12, R8, R6 // compute lower load address 185 SUB R12, R4, R9 186 ISEL $CR1LT, R8, R6, R8 // R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len)) 187 ISEL $CR0LT, R4, R9, R4 // Similar for s2 188 MOVD (R8), R15 189 MOVD (R4), R16 190 SLD R14, R15, R7 191 SLD R14, R16, R17 192 SRD R14, R7, R7 // Clear the upper (8-len) bytes (with 2 shifts) 193 SRD R14, R17, R17 194 SRD R14, R15, R6 // Clear the lower (8-len) bytes 195 SRD R14, R16, R9 196 #ifdef GOARCH_ppc64le 197 ISEL $CR1LT, R7, R6, R8 // Choose the correct len bytes to compare based on alignment 198 ISEL $CR0LT, R17, R9, R4 199 #else 200 ISEL $CR1LT, R6, R7, R8 201 ISEL $CR0LT, R9, R17, R4 202 #endif 203 CMP R4, R8 204 ISEL $CR0EQ, R11, R0, R3 205 RET