github.com/emmansun/gmsm@v0.29.1/internal/subtle/xor_s390x.s (about)

     1  // Copyright 2024 Sun Yimin. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // func xorBytes(dst, a, b *byte, n int)
    10  TEXT ·xorBytes(SB),NOSPLIT,$0-32
    11  	MOVD	dst+0(FP), R1
    12  	MOVD	a+8(FP), R2
    13  	MOVD	b+16(FP), R3
    14  	MOVD	n+24(FP), R4
    15  
    16  	MOVD	$0, R5
    17  	CMPBLT	R4, $64, tail
    18  
    19  loop_64:
    20  	VL 0(R2)(R5*1), V0
    21  	VL 16(R2)(R5*1), V1
    22  	VL 32(R2)(R5*1), V2
    23  	VL 48(R2)(R5*1), V3
    24  	VL 0(R3)(R5*1), V4
    25  	VL 16(R3)(R5*1), V5
    26  	VL 32(R3)(R5*1), V6
    27  	VL 48(R3)(R5*1), V7
    28  	VX V0, V4, V4
    29  	VX V1, V5, V5
    30  	VX V2, V6, V6
    31  	VX V3, V7, V7
    32  	VST V4, 0(R1)(R5*1)
    33  	VST V5, 16(R1)(R5*1)
    34  	VST V6, 32(R1)(R5*1)
    35  	VST V7, 48(R1)(R5*1)
    36  	LAY	64(R5), R5
    37  	SUB	$64, R4
    38  	CMPBGE	R4, $64, loop_64
    39  
    40  tail:
    41  	CMPBEQ	R4, $0, done
    42  	CMPBLT	R4, $32, less_than32
    43  	VL 0(R2)(R5*1), V0
    44  	VL 16(R2)(R5*1), V1
    45  	VL 0(R3)(R5*1), V2
    46  	VL 16(R3)(R5*1), V3
    47  	VX V0, V2, V2
    48  	VX V1, V3, V3
    49  	VST V2, 0(R1)(R5*1)
    50  	VST V3, 16(R1)(R5*1)
    51  	LAY	32(R5), R5
    52  	SUB	$32, R4
    53  
    54  less_than32:
    55  	CMPBLT	R4, $16, less_than16
    56  	VL 0(R2)(R5*1), V0
    57  	VL 0(R3)(R5*1), V1
    58  	VX V0, V1, V1
    59  	VST V1, 0(R1)(R5*1)
    60  	LAY	16(R5), R5
    61  	SUB	$16, R4
    62  
    63  less_than16:	
    64  	CMPBLT	R4, $8, less_than8
    65  	MOVD	0(R2)(R5*1), R7
    66  	MOVD	0(R3)(R5*1), R8
    67  	XOR	R7, R8
    68  	MOVD	R8, 0(R1)(R5*1)
    69  	LAY	8(R5), R5
    70  	SUB	$8, R4
    71  
    72  less_than8:
    73  	CMPBLT	R4, $4, less_than4
    74  	MOVWZ	0(R2)(R5*1), R7
    75  	MOVWZ	0(R3)(R5*1), R8
    76  	XOR	R7, R8
    77  	MOVW	R8, 0(R1)(R5*1)
    78  	LAY	4(R5), R5
    79  	SUB	$4, R4
    80  
    81  less_than4:
    82  	CMPBLT	R4, $2, less_than2
    83  	MOVHZ	0(R2)(R5*1), R7
    84  	MOVHZ	0(R3)(R5*1), R8
    85  	XOR	R7, R8
    86  	MOVH	R8, 0(R1)(R5*1)
    87  	LAY	2(R5), R5
    88  	SUB	$2, R4
    89  
    90  less_than2:
    91  	CMPBEQ	R4, $0, done
    92  	MOVB	0(R2)(R5*1), R7
    93  	MOVB	0(R3)(R5*1), R8
    94  	XOR	R7, R8
    95  	MOVB	R8, 0(R1)(R5*1)
    96  
    97  done:
    98  	RET