github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/bytealg/count_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64le || ppc64 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 11 // R3 = byte array pointer 12 // R4 = length 13 MOVBZ R6, R5 // R5 = byte 14 BR countbytebody<>(SB) 15 16 TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32 17 // R3 = byte array pointer 18 // R4 = length 19 MOVBZ R5, R5 // R5 = byte 20 BR countbytebody<>(SB) 21 22 // R3: addr of string 23 // R4: len of string 24 // R5: byte to count 25 // On exit: 26 // R3: return value 27 // endianness shouldn't matter since we are just counting and order 28 // is irrelevant 29 TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0 30 DCBT (R3) // Prepare cache line. 31 MOVD R0, R18 // byte count 32 MOVD R3, R19 // Save base address for calculating the index later. 33 MOVD R4, R16 34 35 MOVD R5, R6 36 RLDIMI $8, R6, $48, R6 37 RLDIMI $16, R6, $32, R6 38 RLDIMI $32, R6, $0, R6 // fill reg with the byte to count 39 40 VSPLTISW $3, V4 // used for shift 41 MTVRD R6, V1 // move compare byte 42 VSPLTB $7, V1, V1 // replicate byte across V1 43 44 CMPU R4, $32 // Check if it's a small string (<32 bytes) 45 BLT tail // Jump to the small string case 46 XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator 47 48 cmploop: 49 LXVW4X (R3), VS32 // load bytes from string 50 51 // when the bytes match, the corresponding byte contains all 1s 52 VCMPEQUB V1, V0, V2 // compare bytes 53 VPOPCNTD V2, V3 // each double word contains its count 54 VADDUDM V3, V5, V5 // accumulate bit count in each double word 55 ADD $16, R3, R3 // increment pointer 56 SUB $16, R16, R16 // remaining bytes 57 CMP R16, $16 // at least 16 remaining? 58 BGE cmploop 59 VSRD V5, V4, V5 // shift by 3 to convert bits to bytes 60 VSLDOI $8, V5, V5, V6 // get the double word values from vector 61 MFVSRD V5, R9 62 MFVSRD V6, R10 63 ADD R9, R10, R9 64 ADD R9, R18, R18 65 66 tail: 67 CMP R16, $8 // 8 bytes left? 68 BLT small 69 70 MOVD (R3), R12 // load 8 bytes 71 CMPB R12, R6, R17 // compare bytes 72 POPCNTD R17, R15 // bit count 73 SRD $3, R15, R15 // byte count 74 ADD R15, R18, R18 // add to byte count 75 76 next1: 77 ADD $8, R3, R3 78 SUB $8, R16, R16 // remaining bytes 79 BR tail 80 81 small: 82 CMP $0, R16 // any remaining 83 BEQ done 84 MOVBZ (R3), R12 // check each remaining byte 85 CMP R12, R5 86 BNE next2 87 ADD $1, R18 88 89 next2: 90 SUB $1, R16 91 ADD $1, R3 // inc address 92 BR small 93 94 done: 95 MOVD R18, R3 // return count 96 RET