gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/internal/xor/xor_arm64.s (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func xorBytesARM64(dst, a, b *byte, n int)
     8  TEXT ·xorBytesARM64(SB), NOSPLIT|NOFRAME, $0
     9  	MOVD	dst+0(FP), R0
    10  	MOVD	a+8(FP), R1
    11  	MOVD	b+16(FP), R2
    12  	MOVD	n+24(FP), R3
    13  	CMP	$64, R3
    14  	BLT	tail
    15  loop_64:
    16  	VLD1.P	64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
    17  	VLD1.P	64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
    18  	VEOR	V0.B16, V4.B16, V4.B16
    19  	VEOR	V1.B16, V5.B16, V5.B16
    20  	VEOR	V2.B16, V6.B16, V6.B16
    21  	VEOR	V3.B16, V7.B16, V7.B16
    22  	VST1.P	[V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
    23  	SUBS	$64, R3
    24  	CMP	$64, R3
    25  	BGE	loop_64
    26  tail:
    27  	// quick end
    28  	CBZ	R3, end
    29  	TBZ	$5, R3, less_than32
    30  	VLD1.P	32(R1), [V0.B16, V1.B16]
    31  	VLD1.P	32(R2), [V2.B16, V3.B16]
    32  	VEOR	V0.B16, V2.B16, V2.B16
    33  	VEOR	V1.B16, V3.B16, V3.B16
    34  	VST1.P	[V2.B16, V3.B16], 32(R0)
    35  less_than32:
    36  	TBZ	$4, R3, less_than16
    37  	LDP.P	16(R1), (R11, R12)
    38  	LDP.P	16(R2), (R13, R14)
    39  	EOR	R11, R13, R13
    40  	EOR	R12, R14, R14
    41  	STP.P	(R13, R14), 16(R0)
    42  less_than16:
    43  	TBZ	$3, R3, less_than8
    44  	MOVD.P	8(R1), R11
    45  	MOVD.P	8(R2), R12
    46  	EOR	R11, R12, R12
    47  	MOVD.P	R12, 8(R0)
    48  less_than8:
    49  	TBZ	$2, R3, less_than4
    50  	MOVWU.P	4(R1), R13
    51  	MOVWU.P	4(R2), R14
    52  	EORW	R13, R14, R14
    53  	MOVWU.P	R14, 4(R0)
    54  less_than4:
    55  	TBZ	$1, R3, less_than2
    56  	MOVHU.P	2(R1), R15
    57  	MOVHU.P	2(R2), R16
    58  	EORW	R15, R16, R16
    59  	MOVHU.P	R16, 2(R0)
    60  less_than2:
    61  	TBZ	$0, R3, end
    62  	MOVBU	(R1), R17
    63  	MOVBU	(R2), R19
    64  	EORW	R17, R19, R19
    65  	MOVBU	R19, (R0)
    66  end:
    67  	RET