github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_ppc64x.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "textflag.h"
     9  
    10  TEXT ·Equal(SB),NOSPLIT|NOFRAME,$0-49
    11  	MOVD	a_len+8(FP), R4
    12  	MOVD	b_len+32(FP), R5
    13  	CMP	R5, R4		// unequal lengths are not equal
    14  	BNE	noteq
    15  	MOVD	a_base+0(FP), R3
    16  	MOVD	b_base+24(FP), R4
    17  	MOVD	$ret+48(FP), R10
    18  	BR	memeqbody<>(SB)
    19  
    20  noteq:
    21  	MOVBZ	$0,ret+48(FP)
    22  	RET
    23  
    24  equal:
    25  	MOVD	$1,R3
    26  	MOVBZ	R3,ret+48(FP)
    27  	RET
    28  
    29  // memequal(a, b unsafe.Pointer, size uintptr) bool
    30  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
    31  	MOVD    a+0(FP), R3
    32  	MOVD    b+8(FP), R4
    33  	MOVD    size+16(FP), R5
    34  	MOVD    $ret+24(FP), R10
    35  
    36  	BR	memeqbody<>(SB)
    37  
    38  // memequal_varlen(a, b unsafe.Pointer) bool
    39  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
    40  	MOVD	a+0(FP), R3
    41  	MOVD	b+8(FP), R4
    42  	CMP	R3, R4
    43  	BEQ	eq
    44  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
    45  	MOVD    $ret+16(FP), R10
    46  	BR	memeqbody<>(SB)
    47  eq:
    48  	MOVD	$1, R3
    49  	MOVB	R3, ret+16(FP)
    50  	RET
    51  
    52  // Do an efficient memequal for ppc64
    53  // R3 = s1
    54  // R4 = s2
    55  // R5 = len
    56  // R10 = addr of return value (byte)
    57  TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
    58  	MOVD    R5,CTR
    59  	CMP     R5,$8		// only optimize >=8
    60  	BLT     simplecheck
    61  	DCBT	(R3)		// cache hint
    62  	DCBT	(R4)
    63  	CMP	R5,$32		// optimize >= 32
    64  	MOVD	R5,R6		// needed if setup8a branch
    65  	BLT	setup8a		// 8 byte moves only
    66  setup32a:                       // 8 byte aligned, >= 32 bytes
    67  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
    68  	MOVD	R6,CTR
    69  	MOVD	$16,R14		// index for VSX loads and stores
    70  loop32a:
    71  	LXVD2X  (R3+R0), VS32	// VS32 = V0
    72  	LXVD2X  (R4+R0), VS33	// VS33 = V1
    73  	VCMPEQUBCC V0, V1, V2	// compare, setting CR6
    74  	BGE     CR6, noteq
    75  	LXVD2X  (R3+R14), VS32
    76  	LXVD2X  (R4+R14), VS33
    77  	VCMPEQUBCC V0, V1, V2
    78  	BGE     CR6, noteq
    79  	ADD     $32,R3		// bump up to next 32
    80  	ADD     $32,R4
    81  	BC      16, 0, loop32a  // br ctr and cr
    82  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
    83  	BEQ	leftover	// and result is 0
    84  setup8a:
    85  	SRADCC  $3,R6,R6        // get the 8 byte count
    86  	BEQ	leftover	// shifted value is 0
    87  	MOVD    R6,CTR
    88  loop8:
    89  	MOVD    0(R3),R6        // doublewords to compare
    90  	ADD	$8,R3
    91  	MOVD    0(R4),R7
    92  	ADD     $8,R4
    93  	CMP     R6,R7           // match?
    94  	BC	8,2,loop8	// bt ctr <> 0 && cr
    95  	BNE     noteq
    96  leftover:
    97  	ANDCC   $7,R5,R6        // check for leftover bytes
    98  	BEQ     equal
    99  	MOVD    R6,CTR
   100  	BR	simple
   101  simplecheck:
   102  	CMP	R5,$0
   103  	BEQ	equal
   104  simple:
   105  	MOVBZ   0(R3), R6
   106  	ADD	$1,R3
   107  	MOVBZ   0(R4), R7
   108  	ADD     $1,R4
   109  	CMP     R6, R7
   110  	BNE     noteq
   111  	BC      8,2,simple
   112  	BNE	noteq
   113  	BR	equal
   114  noteq:
   115  	MOVB    $0, (R10)
   116  	RET
   117  equal:
   118  	MOVD	$1, R3
   119  	MOVB	R3, (R10)
   120  	RET
   121