github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/internal/poly1305/sum_ppc64le.s (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build gc && !purego 6 // +build gc,!purego 7 8 #include "textflag.h" 9 10 // This was ported from the amd64 implementation. 11 12 #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \ 13 MOVD (msg), t0; \ 14 MOVD 8(msg), t1; \ 15 MOVD $1, t2; \ 16 ADDC t0, h0, h0; \ 17 ADDE t1, h1, h1; \ 18 ADDE t2, h2; \ 19 ADD $16, msg 20 21 #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \ 22 MULLD r0, h0, t0; \ 23 MULLD r0, h1, t4; \ 24 MULHDU r0, h0, t1; \ 25 MULHDU r0, h1, t5; \ 26 ADDC t4, t1, t1; \ 27 MULLD r0, h2, t2; \ 28 ADDZE t5; \ 29 MULHDU r1, h0, t4; \ 30 MULLD r1, h0, h0; \ 31 ADD t5, t2, t2; \ 32 ADDC h0, t1, t1; \ 33 MULLD h2, r1, t3; \ 34 ADDZE t4, h0; \ 35 MULHDU r1, h1, t5; \ 36 MULLD r1, h1, t4; \ 37 ADDC t4, t2, t2; \ 38 ADDE t5, t3, t3; \ 39 ADDC h0, t2, t2; \ 40 MOVD $-4, t4; \ 41 MOVD t0, h0; \ 42 MOVD t1, h1; \ 43 ADDZE t3; \ 44 ANDCC $3, t2, h2; \ 45 AND t2, t4, t0; \ 46 ADDC t0, h0, h0; \ 47 ADDE t3, h1, h1; \ 48 SLD $62, t3, t4; \ 49 SRD $2, t2; \ 50 ADDZE h2; \ 51 OR t4, t2, t2; \ 52 SRD $2, t3; \ 53 ADDC t2, h0, h0; \ 54 ADDE t3, h1, h1; \ 55 ADDZE h2 56 57 DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF 58 DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC 59 GLOBL ·poly1305Mask<>(SB), RODATA, $16 60 61 // func update(state *[7]uint64, msg []byte) 62 TEXT ·update(SB), $0-32 63 MOVD state+0(FP), R3 64 MOVD msg_base+8(FP), R4 65 MOVD msg_len+16(FP), R5 66 67 MOVD 0(R3), R8 // h0 68 MOVD 8(R3), R9 // h1 69 MOVD 16(R3), R10 // h2 70 MOVD 24(R3), R11 // r0 71 MOVD 32(R3), R12 // r1 72 73 CMP R5, $16 74 BLT bytes_between_0_and_15 75 76 loop: 77 POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22) 78 79 multiply: 80 POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21) 81 ADD $-16, R5 82 CMP R5, $16 83 BGE loop 84 85 bytes_between_0_and_15: 86 CMP R5, $0 87 BEQ done 88 MOVD $0, R16 // h0 89 MOVD $0, R17 // h1 90 91 flush_buffer: 92 CMP R5, $8 93 BLE just1 94 95 MOVD $8, R21 96 SUB R21, R5, R21 97 98 // Greater than 8 -- load the rightmost remaining bytes in msg 99 // and put into R17 (h1) 100 MOVD (R4)(R21), R17 101 MOVD $16, R22 102 103 // Find the offset to those bytes 104 SUB R5, R22, R22 105 SLD $3, R22 106 107 // Shift to get only the bytes in msg 108 SRD R22, R17, R17 109 110 // Put 1 at high end 111 MOVD $1, R23 112 SLD $3, R21 113 SLD R21, R23, R23 114 OR R23, R17, R17 115 116 // Remainder is 8 117 MOVD $8, R5 118 119 just1: 120 CMP R5, $8 121 BLT less8 122 123 // Exactly 8 124 MOVD (R4), R16 125 126 CMP R17, $0 127 128 // Check if we've already set R17; if not 129 // set 1 to indicate end of msg. 130 BNE carry 131 MOVD $1, R17 132 BR carry 133 134 less8: 135 MOVD $0, R16 // h0 136 MOVD $0, R22 // shift count 137 CMP R5, $4 138 BLT less4 139 MOVWZ (R4), R16 140 ADD $4, R4 141 ADD $-4, R5 142 MOVD $32, R22 143 144 less4: 145 CMP R5, $2 146 BLT less2 147 MOVHZ (R4), R21 148 SLD R22, R21, R21 149 OR R16, R21, R16 150 ADD $16, R22 151 ADD $-2, R5 152 ADD $2, R4 153 154 less2: 155 CMP R5, $0 156 BEQ insert1 157 MOVBZ (R4), R21 158 SLD R22, R21, R21 159 OR R16, R21, R16 160 ADD $8, R22 161 162 insert1: 163 // Insert 1 at end of msg 164 MOVD $1, R21 165 SLD R22, R21, R21 166 OR R16, R21, R16 167 168 carry: 169 // Add new values to h0, h1, h2 170 ADDC R16, R8 171 ADDE R17, R9 172 ADDZE R10, R10 173 MOVD $16, R5 174 ADD R5, R4 175 BR multiply 176 177 done: 178 // Save h0, h1, h2 in state 179 MOVD R8, 0(R3) 180 MOVD R9, 8(R3) 181 MOVD R10, 16(R3) 182 RET