github.com/m10x/go/src@v0.0.0-20220112094212-ba61592315da/internal/bytealg/count_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64le || ppc64 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 11 #ifdef GOEXPERIMENT_regabiargs 12 // R3 = byte array pointer 13 // R4 = length 14 MOVBZ R6,R5 // R5 = byte 15 #else 16 17 MOVD b_base+0(FP), R3 // R3 = byte array pointer 18 MOVD b_len+8(FP), R4 // R4 = length 19 MOVBZ c+24(FP), R5 // R5 = byte 20 MOVD $ret+32(FP), R14 // R14 = &ret 21 #endif 22 BR countbytebody<>(SB) 23 24 TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32 25 #ifdef GOEXPERIMENT_regabiargs 26 // R3 = byte array pointer 27 // R4 = length 28 MOVBZ R5,R5 // R5 = byte 29 #else 30 MOVD s_base+0(FP), R3 // R3 = string 31 MOVD s_len+8(FP), R4 // R4 = length 32 MOVBZ c+16(FP), R5 // R5 = byte 33 MOVD $ret+24(FP), R14 // R14 = &ret 34 #endif 35 BR countbytebody<>(SB) 36 37 // R3: addr of string 38 // R4: len of string 39 // R5: byte to count 40 // R14: addr for return value when not regabi 41 // endianness shouldn't matter since we are just counting and order 42 // is irrelevant 43 TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0 44 DCBT (R3) // Prepare cache line. 45 MOVD R0, R18 // byte count 46 MOVD R3, R19 // Save base address for calculating the index later. 47 MOVD R4, R16 48 49 MOVD R5, R6 50 RLDIMI $8, R6, $48, R6 51 RLDIMI $16, R6, $32, R6 52 RLDIMI $32, R6, $0, R6 // fill reg with the byte to count 53 54 VSPLTISW $3, V4 // used for shift 55 MTVRD R6, V1 // move compare byte 56 VSPLTB $7, V1, V1 // replicate byte across V1 57 58 CMPU R4, $32 // Check if it's a small string (<32 bytes) 59 BLT tail // Jump to the small string case 60 XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator 61 62 cmploop: 63 LXVW4X (R3), VS32 // load bytes from string 64 65 // when the bytes match, the corresponding byte contains all 1s 66 VCMPEQUB V1, V0, V2 // compare bytes 67 VPOPCNTD V2, V3 // each double word contains its count 68 VADDUDM V3, V5, V5 // accumulate bit count in each double word 69 ADD $16, R3, R3 // increment pointer 70 SUB $16, R16, R16 // remaining bytes 71 CMP R16, $16 // at least 16 remaining? 72 BGE cmploop 73 VSRD V5, V4, V5 // shift by 3 to convert bits to bytes 74 VSLDOI $8, V5, V5, V6 // get the double word values from vector 75 MFVSRD V5, R9 76 MFVSRD V6, R10 77 ADD R9, R10, R9 78 ADD R9, R18, R18 79 80 tail: 81 CMP R16, $8 // 8 bytes left? 82 BLT small 83 84 MOVD (R3), R12 // load 8 bytes 85 CMPB R12, R6, R17 // compare bytes 86 POPCNTD R17, R15 // bit count 87 SRD $3, R15, R15 // byte count 88 ADD R15, R18, R18 // add to byte count 89 90 next1: 91 ADD $8, R3, R3 92 SUB $8, R16, R16 // remaining bytes 93 BR tail 94 95 small: 96 CMP $0, R16 // any remaining 97 BEQ done 98 MOVBZ (R3), R12 // check each remaining byte 99 CMP R12, R5 100 BNE next2 101 ADD $1, R18 102 103 next2: 104 SUB $1, R16 105 ADD $1, R3 // inc address 106 BR small 107 108 done: 109 #ifdef GOEXPERIMENT_regabiargs 110 MOVD R18, R3 // return count 111 #else 112 MOVD R18, (R14) // return count 113 #endif 114 115 RET