github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/equal_ppc64x.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "textflag.h"
     9  
    10  // memequal(a, b unsafe.Pointer, size uintptr) bool
    11  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
    12  	MOVD    a+0(FP), R3
    13  	MOVD    b+8(FP), R4
    14  	MOVD    size+16(FP), R5
    15  	MOVD    $ret+24(FP), R10
    16  
    17  	BR	memeqbody<>(SB)
    18  
    19  // memequal_varlen(a, b unsafe.Pointer) bool
    20  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
    21  	MOVD	a+0(FP), R3
    22  	MOVD	b+8(FP), R4
    23  	CMP	R3, R4
    24  	BEQ	eq
    25  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
    26  	MOVD    $ret+16(FP), R10
    27  	BR	memeqbody<>(SB)
    28  eq:
    29  	MOVD	$1, R3
    30  	MOVB	R3, ret+16(FP)
    31  	RET
    32  
    33  // Do an efficient memequal for ppc64
    34  // R3 = s1
    35  // R4 = s2
    36  // R5 = len
    37  // R10 = addr of return value (byte)
    38  TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
    39  	MOVD    R5,CTR
    40  	CMP     R5,$8		// only optimize >=8
    41  	BLT     simplecheck
    42  	DCBT	(R3)		// cache hint
    43  	DCBT	(R4)
    44  	CMP	R5,$32		// optimize >= 32
    45  	MOVD	R5,R6		// needed if setup8a branch
    46  	BLT	setup8a		// 8 byte moves only
    47  setup32a:                       // 8 byte aligned, >= 32 bytes
    48  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
    49  	MOVD	R6,CTR
    50  	MOVD	$16,R14		// index for VSX loads and stores
    51  loop32a:
    52  	LXVD2X  (R3+R0), VS32	// VS32 = V0
    53  	LXVD2X  (R4+R0), VS33	// VS33 = V1
    54  	VCMPEQUBCC V0, V1, V2	// compare, setting CR6
    55  	BGE     CR6, noteq
    56  	LXVD2X  (R3+R14), VS32
    57  	LXVD2X  (R4+R14), VS33
    58  	VCMPEQUBCC V0, V1, V2
    59  	BGE     CR6, noteq
    60  	ADD     $32,R3		// bump up to next 32
    61  	ADD     $32,R4
    62  	BC      16, 0, loop32a  // br ctr and cr
    63  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
    64  	BEQ	leftover	// and result is 0
    65  setup8a:
    66  	SRADCC  $3,R6,R6        // get the 8 byte count
    67  	BEQ	leftover	// shifted value is 0
    68  	MOVD    R6,CTR
    69  loop8:
    70  	MOVD    0(R3),R6        // doublewords to compare
    71  	ADD	$8,R3
    72  	MOVD    0(R4),R7
    73  	ADD     $8,R4
    74  	CMP     R6,R7           // match?
    75  	BC	8,2,loop8	// bt ctr <> 0 && cr
    76  	BNE     noteq
    77  leftover:
    78  	ANDCC   $7,R5,R6        // check for leftover bytes
    79  	BEQ     equal
    80  	MOVD    R6,CTR
    81  	BR	simple
    82  simplecheck:
    83  	CMP	R5,$0
    84  	BEQ	equal
    85  simple:
    86  	MOVBZ   0(R3), R6
    87  	ADD	$1,R3
    88  	MOVBZ   0(R4), R7
    89  	ADD     $1,R4
    90  	CMP     R6, R7
    91  	BNE     noteq
    92  	BC      8,2,simple
    93  	BNE	noteq
    94  	BR	equal
    95  noteq:
    96  	MOVB    $0, (R10)
    97  	RET
    98  equal:
    99  	MOVD	$1, R3
   100  	MOVB	R3, (R10)
   101  	RET
   102