github.com/emmansun/gmsm@v0.29.1/internal/subtle/xor_ppc64x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build (ppc64 || ppc64le) && !purego 6 7 #include "textflag.h" 8 9 // func xorBytes(dst, a, b *byte, n int) 10 TEXT ·xorBytes(SB), NOSPLIT, $0 11 MOVD dst+0(FP), R3 // R3 = dst 12 MOVD a+8(FP), R4 // R4 = a 13 MOVD b+16(FP), R5 // R5 = b 14 MOVD n+24(FP), R6 // R6 = n 15 16 CMPU R6, $64, CR7 // Check if n ≥ 64 bytes 17 MOVD R0, R8 // R8 = index 18 CMPU R6, $8, CR6 // Check if 8 ≤ n < 64 bytes 19 BLE CR6, small // <= 8 20 BLT CR7, xor32 // Case for 32 ≤ n < 64 bytes 21 22 // Case for n ≥ 64 bytes 23 preloop64: 24 SRD $6, R6, R7 // Set up loop counter 25 MOVD R7, CTR 26 MOVD $16, R10 27 MOVD $32, R14 28 MOVD $48, R15 29 ANDCC $63, R6, R9 // Check for tailing bytes for later 30 PCALIGN $16 31 // Case for >= 64 bytes 32 // Process 64 bytes per iteration 33 // Load 4 vectors of a and b 34 // XOR the corresponding vectors 35 // from a and b and store the result 36 loop64: 37 LXVD2X (R4)(R8), VS32 38 LXVD2X (R4)(R10), VS34 39 LXVD2X (R4)(R14), VS36 40 LXVD2X (R4)(R15), VS38 41 LXVD2X (R5)(R8), VS33 42 LXVD2X (R5)(R10), VS35 43 LXVD2X (R5)(R14), VS37 44 LXVD2X (R5)(R15), VS39 45 XXLXOR VS32, VS33, VS32 46 XXLXOR VS34, VS35, VS34 47 XXLXOR VS36, VS37, VS36 48 XXLXOR VS38, VS39, VS38 49 STXVD2X VS32, (R3)(R8) 50 STXVD2X VS34, (R3)(R10) 51 STXVD2X VS36, (R3)(R14) 52 STXVD2X VS38, (R3)(R15) 53 ADD $64, R8 54 ADD $64, R10 55 ADD $64, R14 56 ADD $64, R15 57 BDNZ loop64 58 BC 12,2,LR // BEQLR 59 MOVD R9, R6 60 CMP R6, $8 61 BLE small 62 // Case for 8 <= n < 64 bytes 63 // Process 32 bytes if available 64 xor32: 65 CMP R6, $32 66 BLT xor16 67 ADD $16, R8, R9 68 LXVD2X (R4)(R8), VS32 69 LXVD2X (R4)(R9), VS33 70 LXVD2X (R5)(R8), VS34 71 LXVD2X (R5)(R9), VS35 72 XXLXOR VS32, VS34, VS32 73 XXLXOR VS33, VS35, VS33 74 STXVD2X VS32, (R3)(R8) 75 STXVD2X VS33, (R3)(R9) 76 ADD $32, R8 77 ADD $-32, R6 78 CMP R6, $8 79 BLE small 80 // Case for 8 <= n < 32 bytes 81 // Process 16 bytes if available 82 xor16: 83 CMP R6, $16 84 BLT xor8 85 LXVD2X (R4)(R8), VS32 86 LXVD2X (R5)(R8), VS33 87 XXLXOR VS32, VS33, VS32 88 STXVD2X VS32, (R3)(R8) 89 ADD $16, R8 90 ADD $-16, R6 91 small: 92 CMP R6, $0 93 BC 12,2,LR // BEQLR 94 xor8: 95 #ifdef GOPPC64_power10 96 SLD $56,R6,R17 97 ADD R4,R8,R18 98 ADD R5,R8,R19 99 ADD R3,R8,R20 100 LXVL R18,R17,V0 101 LXVL R19,R17,V1 102 VXOR V0,V1,V1 103 STXVL V1,R20,R17 104 RET 105 #else 106 CMP R6, $8 107 BLT xor4 108 // Case for 8 ≤ n < 16 bytes 109 MOVD (R4)(R8), R14 // R14 = a[i,...,i+7] 110 MOVD (R5)(R8), R15 // R15 = b[i,...,i+7] 111 XOR R14, R15, R16 // R16 = a[] ^ b[] 112 SUB $8, R6 // n = n - 8 113 MOVD R16, (R3)(R8) // Store to dst 114 ADD $8, R8 115 xor4: 116 CMP R6, $4 117 BLT xor2 118 MOVWZ (R4)(R8), R14 119 MOVWZ (R5)(R8), R15 120 XOR R14, R15, R16 121 MOVW R16, (R3)(R8) 122 ADD $4,R8 123 ADD $-4,R6 124 xor2: 125 CMP R6, $2 126 BLT xor1 127 MOVHZ (R4)(R8), R14 128 MOVHZ (R5)(R8), R15 129 XOR R14, R15, R16 130 MOVH R16, (R3)(R8) 131 ADD $2,R8 132 ADD $-2,R6 133 xor1: 134 CMP R6, $0 135 BC 12,2,LR // BEQLR 136 MOVBZ (R4)(R8), R14 // R14 = a[i] 137 MOVBZ (R5)(R8), R15 // R15 = b[i] 138 XOR R14, R15, R16 // R16 = a[i] ^ b[i] 139 MOVB R16, (R3)(R8) // Store to dst 140 #endif 141 done: 142 RET