github.com/emmansun/gmsm@v0.29.1/internal/subtle/xor_s390x.s (about) 1 // Copyright 2024 Sun Yimin. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !purego 6 7 #include "textflag.h" 8 9 // func xorBytes(dst, a, b *byte, n int) 10 TEXT ·xorBytes(SB),NOSPLIT,$0-32 11 MOVD dst+0(FP), R1 12 MOVD a+8(FP), R2 13 MOVD b+16(FP), R3 14 MOVD n+24(FP), R4 15 16 MOVD $0, R5 17 CMPBLT R4, $64, tail 18 19 loop_64: 20 VL 0(R2)(R5*1), V0 21 VL 16(R2)(R5*1), V1 22 VL 32(R2)(R5*1), V2 23 VL 48(R2)(R5*1), V3 24 VL 0(R3)(R5*1), V4 25 VL 16(R3)(R5*1), V5 26 VL 32(R3)(R5*1), V6 27 VL 48(R3)(R5*1), V7 28 VX V0, V4, V4 29 VX V1, V5, V5 30 VX V2, V6, V6 31 VX V3, V7, V7 32 VST V4, 0(R1)(R5*1) 33 VST V5, 16(R1)(R5*1) 34 VST V6, 32(R1)(R5*1) 35 VST V7, 48(R1)(R5*1) 36 LAY 64(R5), R5 37 SUB $64, R4 38 CMPBGE R4, $64, loop_64 39 40 tail: 41 CMPBEQ R4, $0, done 42 CMPBLT R4, $32, less_than32 43 VL 0(R2)(R5*1), V0 44 VL 16(R2)(R5*1), V1 45 VL 0(R3)(R5*1), V2 46 VL 16(R3)(R5*1), V3 47 VX V0, V2, V2 48 VX V1, V3, V3 49 VST V2, 0(R1)(R5*1) 50 VST V3, 16(R1)(R5*1) 51 LAY 32(R5), R5 52 SUB $32, R4 53 54 less_than32: 55 CMPBLT R4, $16, less_than16 56 VL 0(R2)(R5*1), V0 57 VL 0(R3)(R5*1), V1 58 VX V0, V1, V1 59 VST V1, 0(R1)(R5*1) 60 LAY 16(R5), R5 61 SUB $16, R4 62 63 less_than16: 64 CMPBLT R4, $8, less_than8 65 MOVD 0(R2)(R5*1), R7 66 MOVD 0(R3)(R5*1), R8 67 XOR R7, R8 68 MOVD R8, 0(R1)(R5*1) 69 LAY 8(R5), R5 70 SUB $8, R4 71 72 less_than8: 73 CMPBLT R4, $4, less_than4 74 MOVWZ 0(R2)(R5*1), R7 75 MOVWZ 0(R3)(R5*1), R8 76 XOR R7, R8 77 MOVW R8, 0(R1)(R5*1) 78 LAY 4(R5), R5 79 SUB $4, R4 80 81 less_than4: 82 CMPBLT R4, $2, less_than2 83 MOVHZ 0(R2)(R5*1), R7 84 MOVHZ 0(R3)(R5*1), R8 85 XOR R7, R8 86 MOVH R8, 0(R1)(R5*1) 87 LAY 2(R5), R5 88 SUB $2, R4 89 90 less_than2: 91 CMPBEQ R4, $0, done 92 MOVB 0(R2)(R5*1), R7 93 MOVB 0(R3)(R5*1), R8 94 XOR R7, R8 95 MOVB R8, 0(R1)(R5*1) 96 97 done: 98 RET