github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/crypto/poly1305/sum_amd64.s (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build amd64,!gccgo,!appengine 6 7 #include "textflag.h" 8 9 #define POLY1305_ADD(msg, h0, h1, h2) \ 10 ADDQ 0(msg), h0; \ 11 ADCQ 8(msg), h1; \ 12 ADCQ $1, h2; \ 13 LEAQ 16(msg), msg 14 15 #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ 16 MOVQ r0, AX; \ 17 MULQ h0; \ 18 MOVQ AX, t0; \ 19 MOVQ DX, t1; \ 20 MOVQ r0, AX; \ 21 MULQ h1; \ 22 ADDQ AX, t1; \ 23 ADCQ $0, DX; \ 24 MOVQ r0, t2; \ 25 IMULQ h2, t2; \ 26 ADDQ DX, t2; \ 27 \ 28 MOVQ r1, AX; \ 29 MULQ h0; \ 30 ADDQ AX, t1; \ 31 ADCQ $0, DX; \ 32 MOVQ DX, h0; \ 33 MOVQ r1, t3; \ 34 IMULQ h2, t3; \ 35 MOVQ r1, AX; \ 36 MULQ h1; \ 37 ADDQ AX, t2; \ 38 ADCQ DX, t3; \ 39 ADDQ h0, t2; \ 40 ADCQ $0, t3; \ 41 \ 42 MOVQ t0, h0; \ 43 MOVQ t1, h1; \ 44 MOVQ t2, h2; \ 45 ANDQ $3, h2; \ 46 MOVQ t2, t0; \ 47 ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ 48 ADDQ t0, h0; \ 49 ADCQ t3, h1; \ 50 ADCQ $0, h2; \ 51 SHRQ $2, t3, t2; \ 52 SHRQ $2, t3; \ 53 ADDQ t2, h0; \ 54 ADCQ t3, h1; \ 55 ADCQ $0, h2 56 57 DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF 58 DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC 59 GLOBL ·poly1305Mask<>(SB), RODATA, $16 60 61 // func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) 62 TEXT ·poly1305(SB), $0-32 63 MOVQ out+0(FP), DI 64 MOVQ m+8(FP), SI 65 MOVQ mlen+16(FP), R15 66 MOVQ key+24(FP), AX 67 68 MOVQ 0(AX), R11 69 MOVQ 8(AX), R12 70 ANDQ ·poly1305Mask<>(SB), R11 // r0 71 ANDQ ·poly1305Mask<>+8(SB), R12 // r1 72 XORQ R8, R8 // h0 73 XORQ R9, R9 // h1 74 XORQ R10, R10 // h2 75 76 CMPQ R15, $16 77 JB bytes_between_0_and_15 78 79 loop: 80 POLY1305_ADD(SI, R8, R9, R10) 81 82 multiply: 83 POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) 84 SUBQ $16, R15 85 CMPQ R15, $16 86 JAE loop 87 88 bytes_between_0_and_15: 89 TESTQ R15, R15 90 JZ done 91 MOVQ $1, BX 92 XORQ CX, CX 93 XORQ R13, R13 94 ADDQ R15, SI 95 96 flush_buffer: 97 SHLQ $8, BX, CX 98 SHLQ $8, BX 99 MOVB -1(SI), R13 100 XORQ R13, BX 101 DECQ SI 102 DECQ R15 103 JNZ flush_buffer 104 105 ADDQ BX, R8 106 ADCQ CX, R9 107 ADCQ $0, R10 108 MOVQ $16, R15 109 JMP multiply 110 111 done: 112 MOVQ R8, AX 113 MOVQ R9, BX 114 SUBQ $0xFFFFFFFFFFFFFFFB, AX 115 SBBQ $0xFFFFFFFFFFFFFFFF, BX 116 SBBQ $3, R10 117 CMOVQCS R8, AX 118 CMOVQCS R9, BX 119 MOVQ key+24(FP), R8 120 ADDQ 16(R8), AX 121 ADCQ 24(R8), BX 122 123 MOVQ AX, 0(DI) 124 MOVQ BX, 8(DI) 125 RET