github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/equal_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "go_asm.h" 8 #include "textflag.h" 9 10 TEXT ·Equal(SB),NOSPLIT|NOFRAME,$0-49 11 MOVD a_len+8(FP), R4 12 MOVD b_len+32(FP), R5 13 CMP R5, R4 // unequal lengths are not equal 14 BNE noteq 15 MOVD a_base+0(FP), R3 16 MOVD b_base+24(FP), R4 17 MOVD $ret+48(FP), R10 18 BR memeqbody<>(SB) 19 20 noteq: 21 MOVBZ $0,ret+48(FP) 22 RET 23 24 equal: 25 MOVD $1,R3 26 MOVBZ R3,ret+48(FP) 27 RET 28 29 TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49 30 FUNCDATA $0, ·Equal·args_stackmap(SB) 31 MOVD a_len+8(FP), R4 32 MOVD b_len+32(FP), R5 33 CMP R5, R4 // unequal lengths are not equal 34 BNE noteq 35 MOVD a_base+0(FP), R3 36 MOVD b_base+24(FP), R4 37 MOVD $ret+48(FP), R10 38 BR memeqbody<>(SB) 39 40 noteq: 41 MOVBZ $0,ret+48(FP) 42 RET 43 44 equal: 45 MOVD $1,R3 46 MOVBZ R3,ret+48(FP) 47 RET 48 49 // memequal(a, b unsafe.Pointer, size uintptr) bool 50 TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 51 MOVD a+0(FP), R3 52 MOVD b+8(FP), R4 53 MOVD size+16(FP), R5 54 MOVD $ret+24(FP), R10 55 56 BR memeqbody<>(SB) 57 58 // memequal_varlen(a, b unsafe.Pointer) bool 59 TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 60 MOVD a+0(FP), R3 61 MOVD b+8(FP), R4 62 CMP R3, R4 63 BEQ eq 64 MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure 65 MOVD $ret+16(FP), R10 66 BR memeqbody<>(SB) 67 eq: 68 MOVD $1, R3 69 MOVB R3, ret+16(FP) 70 RET 71 72 // Do an efficient memequal for ppc64 73 // R3 = s1 74 // R4 = s2 75 // R5 = len 76 // R10 = addr of return value (byte) 77 TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 78 MOVD R5,CTR 79 CMP R5,$8 // only optimize >=8 80 BLT simplecheck 81 DCBT (R3) // cache hint 82 DCBT (R4) 83 CMP R5,$32 // optimize >= 32 84 MOVD R5,R6 // needed if setup8a branch 85 BLT setup8a // 8 byte moves only 86 setup32a: // 8 byte aligned, >= 32 bytes 87 SRADCC $5,R5,R6 // number of 32 byte chunks to compare 88 MOVD R6,CTR 89 MOVD $16,R14 // index for VSX loads and stores 90 loop32a: 91 LXVD2X (R3+R0), VS32 // VS32 = V0 92 LXVD2X (R4+R0), VS33 // VS33 = V1 93 VCMPEQUBCC V0, V1, V2 // compare, setting CR6 94 BGE CR6, noteq 95 LXVD2X (R3+R14), VS32 96 LXVD2X (R4+R14), VS33 97 VCMPEQUBCC V0, V1, V2 98 BGE CR6, noteq 99 ADD $32,R3 // bump up to next 32 100 ADD $32,R4 101 BC 16, 0, loop32a // br ctr and cr 102 ANDCC $24,R5,R6 // Any 8 byte chunks? 103 BEQ leftover // and result is 0 104 setup8a: 105 SRADCC $3,R6,R6 // get the 8 byte count 106 BEQ leftover // shifted value is 0 107 MOVD R6,CTR 108 loop8: 109 MOVD 0(R3),R6 // doublewords to compare 110 ADD $8,R3 111 MOVD 0(R4),R7 112 ADD $8,R4 113 CMP R6,R7 // match? 114 BC 8,2,loop8 // bt ctr <> 0 && cr 115 BNE noteq 116 leftover: 117 ANDCC $7,R5,R6 // check for leftover bytes 118 BEQ equal 119 MOVD R6,CTR 120 BR simple 121 simplecheck: 122 CMP R5,$0 123 BEQ equal 124 simple: 125 MOVBZ 0(R3), R6 126 ADD $1,R3 127 MOVBZ 0(R4), R7 128 ADD $1,R4 129 CMP R6, R7 130 BNE noteq 131 BC 8,2,simple 132 BNE noteq 133 BR equal 134 noteq: 135 MOVB $0, (R10) 136 RET 137 equal: 138 MOVD $1, R3 139 MOVB R3, (R10) 140 RET 141