github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/internal/poly1305/sum_amd64.s (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build gc && !purego 6 // +build gc,!purego 7 8 #include "textflag.h" 9 10 #define POLY1305_ADD(msg, h0, h1, h2) \ 11 ADDQ 0(msg), h0; \ 12 ADCQ 8(msg), h1; \ 13 ADCQ $1, h2; \ 14 LEAQ 16(msg), msg 15 16 #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ 17 MOVQ r0, AX; \ 18 MULQ h0; \ 19 MOVQ AX, t0; \ 20 MOVQ DX, t1; \ 21 MOVQ r0, AX; \ 22 MULQ h1; \ 23 ADDQ AX, t1; \ 24 ADCQ $0, DX; \ 25 MOVQ r0, t2; \ 26 IMULQ h2, t2; \ 27 ADDQ DX, t2; \ 28 \ 29 MOVQ r1, AX; \ 30 MULQ h0; \ 31 ADDQ AX, t1; \ 32 ADCQ $0, DX; \ 33 MOVQ DX, h0; \ 34 MOVQ r1, t3; \ 35 IMULQ h2, t3; \ 36 MOVQ r1, AX; \ 37 MULQ h1; \ 38 ADDQ AX, t2; \ 39 ADCQ DX, t3; \ 40 ADDQ h0, t2; \ 41 ADCQ $0, t3; \ 42 \ 43 MOVQ t0, h0; \ 44 MOVQ t1, h1; \ 45 MOVQ t2, h2; \ 46 ANDQ $3, h2; \ 47 MOVQ t2, t0; \ 48 ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ 49 ADDQ t0, h0; \ 50 ADCQ t3, h1; \ 51 ADCQ $0, h2; \ 52 SHRQ $2, t3, t2; \ 53 SHRQ $2, t3; \ 54 ADDQ t2, h0; \ 55 ADCQ t3, h1; \ 56 ADCQ $0, h2 57 58 // func update(state *[7]uint64, msg []byte) 59 TEXT ·update(SB), $0-32 60 MOVQ state+0(FP), DI 61 MOVQ msg_base+8(FP), SI 62 MOVQ msg_len+16(FP), R15 63 64 MOVQ 0(DI), R8 // h0 65 MOVQ 8(DI), R9 // h1 66 MOVQ 16(DI), R10 // h2 67 MOVQ 24(DI), R11 // r0 68 MOVQ 32(DI), R12 // r1 69 70 CMPQ R15, $16 71 JB bytes_between_0_and_15 72 73 loop: 74 POLY1305_ADD(SI, R8, R9, R10) 75 76 multiply: 77 POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) 78 SUBQ $16, R15 79 CMPQ R15, $16 80 JAE loop 81 82 bytes_between_0_and_15: 83 TESTQ R15, R15 84 JZ done 85 MOVQ $1, BX 86 XORQ CX, CX 87 XORQ R13, R13 88 ADDQ R15, SI 89 90 flush_buffer: 91 SHLQ $8, BX, CX 92 SHLQ $8, BX 93 MOVB -1(SI), R13 94 XORQ R13, BX 95 DECQ SI 96 DECQ R15 97 JNZ flush_buffer 98 99 ADDQ BX, R8 100 ADCQ CX, R9 101 ADCQ $0, R10 102 MOVQ $16, R15 103 JMP multiply 104 105 done: 106 MOVQ R8, 0(DI) 107 MOVQ R9, 8(DI) 108 MOVQ R10, 16(DI) 109 RET