gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/internal/bytealg/equal_ppc64x.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  // +build ppc64 ppc64le
     7  
     8  #include "go_asm.h"
     9  #include "textflag.h"
    10  
    11  // memequal(a, b unsafe.Pointer, size uintptr) bool
    12  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
    13  	MOVD    a+0(FP), R3
    14  	MOVD    b+8(FP), R4
    15  	MOVD    size+16(FP), R5
    16  	MOVD    $ret+24(FP), R10
    17  
    18  	BR	memeqbody<>(SB)
    19  
    20  // memequal_varlen(a, b unsafe.Pointer) bool
    21  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
    22  	MOVD	a+0(FP), R3
    23  	MOVD	b+8(FP), R4
    24  	CMP	R3, R4
    25  	BEQ	eq
    26  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
    27  	MOVD    $ret+16(FP), R10
    28  	BR	memeqbody<>(SB)
    29  eq:
    30  	MOVD	$1, R3
    31  	MOVB	R3, ret+16(FP)
    32  	RET
    33  
    34  // Do an efficient memequal for ppc64
    35  // R3 = s1
    36  // R4 = s2
    37  // R5 = len
    38  // R10 = addr of return value (byte)
    39  TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
    40  	MOVD    R5,CTR
    41  	CMP     R5,$8		// only optimize >=8
    42  	BLT     simplecheck
    43  	DCBT	(R3)		// cache hint
    44  	DCBT	(R4)
    45  	CMP	R5,$32		// optimize >= 32
    46  	MOVD	R5,R6		// needed if setup8a branch
    47  	BLT	setup8a		// 8 byte moves only
    48  setup32a:                       // 8 byte aligned, >= 32 bytes
    49  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
    50  	MOVD	R6,CTR
    51  	MOVD	$16,R14		// index for VSX loads and stores
    52  loop32a:
    53  	LXVD2X  (R3+R0), VS32	// VS32 = V0
    54  	LXVD2X  (R4+R0), VS33	// VS33 = V1
    55  	VCMPEQUBCC V0, V1, V2	// compare, setting CR6
    56  	BGE     CR6, noteq
    57  	LXVD2X  (R3+R14), VS32
    58  	LXVD2X  (R4+R14), VS33
    59  	VCMPEQUBCC V0, V1, V2
    60  	BGE     CR6, noteq
    61  	ADD     $32,R3		// bump up to next 32
    62  	ADD     $32,R4
    63  	BC      16, 0, loop32a  // br ctr and cr
    64  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
    65  	BEQ	leftover	// and result is 0
    66  setup8a:
    67  	SRADCC  $3,R6,R6        // get the 8 byte count
    68  	BEQ	leftover	// shifted value is 0
    69  	MOVD    R6,CTR
    70  loop8:
    71  	MOVD    0(R3),R6        // doublewords to compare
    72  	ADD	$8,R3
    73  	MOVD    0(R4),R7
    74  	ADD     $8,R4
    75  	CMP     R6,R7           // match?
    76  	BC	8,2,loop8	// bt ctr <> 0 && cr
    77  	BNE     noteq
    78  leftover:
    79  	ANDCC   $7,R5,R6        // check for leftover bytes
    80  	BEQ     equal
    81  	MOVD    R6,CTR
    82  	BR	simple
    83  simplecheck:
    84  	CMP	R5,$0
    85  	BEQ	equal
    86  simple:
    87  	MOVBZ   0(R3), R6
    88  	ADD	$1,R3
    89  	MOVBZ   0(R4), R7
    90  	ADD     $1,R4
    91  	CMP     R6, R7
    92  	BNE     noteq
    93  	BC      8,2,simple
    94  	BNE	noteq
    95  	BR	equal
    96  noteq:
    97  	MOVB    $0, (R10)
    98  	RET
    99  equal:
   100  	MOVD	$1, R3
   101  	MOVB	R3, (R10)
   102  	RET
   103