gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/internal/bytealg/count_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64le || ppc64 6 // +build ppc64le ppc64 7 8 #include "go_asm.h" 9 #include "textflag.h" 10 11 TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40 12 MOVD b_base+0(FP), R3 // R3 = byte array pointer 13 MOVD b_len+8(FP), R4 // R4 = length 14 MOVBZ c+24(FP), R5 // R5 = byte 15 MOVD $ret+32(FP), R14 // R14 = &ret 16 BR countbytebody<>(SB) 17 18 TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32 19 MOVD s_base+0(FP), R3 // R3 = string 20 MOVD s_len+8(FP), R4 // R4 = length 21 MOVBZ c+16(FP), R5 // R5 = byte 22 MOVD $ret+24(FP), R14 // R14 = &ret 23 BR countbytebody<>(SB) 24 25 // R3: addr of string 26 // R4: len of string 27 // R5: byte to count 28 // R14: addr for return value 29 // endianness shouldn't matter since we are just counting and order 30 // is irrelevant 31 TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0 32 DCBT (R3) // Prepare cache line. 33 MOVD R0, R18 // byte count 34 MOVD R3, R19 // Save base address for calculating the index later. 35 MOVD R4, R16 36 37 MOVD R5, R6 38 RLDIMI $8, R6, $48, R6 39 RLDIMI $16, R6, $32, R6 40 RLDIMI $32, R6, $0, R6 // fill reg with the byte to count 41 42 VSPLTISW $3, V4 // used for shift 43 MTVRD R6, V1 // move compare byte 44 VSPLTB $7, V1, V1 // replicate byte across V1 45 46 CMPU R4, $32 // Check if it's a small string (<32 bytes) 47 BLT tail // Jump to the small string case 48 XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator 49 50 cmploop: 51 LXVW4X (R3), VS32 // load bytes from string 52 53 // when the bytes match, the corresponding byte contains all 1s 54 VCMPEQUB V1, V0, V2 // compare bytes 55 VPOPCNTD V2, V3 // each double word contains its count 56 VADDUDM V3, V5, V5 // accumulate bit count in each double word 57 ADD $16, R3, R3 // increment pointer 58 SUB $16, R16, R16 // remaining bytes 59 CMP R16, $16 // at least 16 remaining? 60 BGE cmploop 61 VSRD V5, V4, V5 // shift by 3 to convert bits to bytes 62 VSLDOI $8, V5, V5, V6 // get the double word values from vector 63 MFVSRD V5, R9 64 MFVSRD V6, R10 65 ADD R9, R10, R9 66 ADD R9, R18, R18 67 68 tail: 69 CMP R16, $8 // 8 bytes left? 70 BLT small 71 72 MOVD (R3), R12 // load 8 bytes 73 CMPB R12, R6, R17 // compare bytes 74 POPCNTD R17, R15 // bit count 75 SRD $3, R15, R15 // byte count 76 ADD R15, R18, R18 // add to byte count 77 78 next1: 79 ADD $8, R3, R3 80 SUB $8, R16, R16 // remaining bytes 81 BR tail 82 83 small: 84 CMP $0, R16 // any remaining 85 BEQ done 86 MOVBZ (R3), R12 // check each remaining byte 87 CMP R12, R5 88 BNE next2 89 ADD $1, R18 90 91 next2: 92 SUB $1, R16 93 ADD $1, R3 // inc address 94 BR small 95 96 done: 97 MOVD R18, (R14) // return count 98 RET