github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/checksum/checksum_amd64.s (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 16 // +build amd64 17 18 #include "textflag.h" 19 20 // calculateChecksum computes the checksum of a slice, taking into account a 21 // previously computed initial value and whether the first byte is a lower or 22 // upper byte. 23 // 24 // It utilizes byte order independence and parallel summation as described in 25 // RFC 1071 1.2. 26 // 27 // The best way to understand this function is to understand 28 // checksum_noasm_unsafe.go first, which implements largely the same logic. 29 // Using assembly speeds things up via ADC (add with carry). 30 TEXT ·calculateChecksum(SB),NOSPLIT|NOFRAME,$0-35 31 // Store arguments in registers. 32 MOVW initial+26(FP), AX 33 MOVQ buf_len+8(FP), BX 34 MOVQ buf_base+0(FP), CX 35 XORQ R8, R8 36 MOVB odd+24(FP), R8 37 38 // Account for a previous odd number of bytes. 39 // 40 // if odd { 41 // initial += buf[0] 42 // buf = buf[1:] 43 // } 44 CMPB R8, $0 45 JE newlyodd 46 XORQ R9, R9 47 MOVB (CX), R9 48 ADDW R9, AX 49 ADCW $0, AX 50 INCQ CX 51 DECQ BX 52 53 // See whether we're checksumming an odd number of bytes. If so, the final 54 // byte is a big endian most significant byte, and so needs to be shifted. 55 // 56 // odd = buf_len%2 != 0 57 // if odd { 58 // buf_len-- 59 // initial += buf[buf_len]<<8 60 // } 61 newlyodd: 62 XORQ R8, R8 63 TESTQ $1, BX 64 JZ swaporder 65 MOVB $1, R8 66 DECQ BX 67 XORQ R10, R10 68 MOVB (CX)(BX*1), R10 69 SHLQ $8, R10 70 ADDW R10, AX 71 ADCW $0, AX 72 73 swaporder: 74 // Load initial in network byte order. 75 BSWAPQ AX 76 SHRQ $48, AX 77 78 // Accumulate 8 bytes at a time. 79 // 80 // while buf_len >= 8 { 81 // acc, carry = acc + *(uint64 *)(buf) + carry 82 // buf_len -= 8 83 // buf = buf[8:] 84 // } 85 // acc += carry 86 JMP addcond 87 addloop: 88 ADDQ (CX), AX 89 ADCQ $0, AX 90 SUBQ $8, BX 91 ADDQ $8, CX 92 addcond: 93 CMPQ BX, $8 94 JAE addloop 95 96 // TODO(krakauer): We can do 4 byte accumulation too. 97 98 // Accumulate the rest 2 bytes at a time. 99 // 100 // while buf_len > 0 { 101 // acc, carry = acc + *(uint16 *)(buf) 102 // buf_len -= 2 103 // buf = buf[2:] 104 // } 105 JMP slowaddcond 106 slowaddloop: 107 XORQ DX, DX 108 MOVW (CX), DX 109 ADDQ DX, AX 110 ADCQ $0, AX 111 SUBQ $2, BX 112 ADDQ $2, CX 113 slowaddcond: 114 CMPQ BX, $2 115 JAE slowaddloop 116 117 // Fold into 16 bits. 118 // 119 // for acc > math.MaxUint16 { 120 // acc = (acc & 0xffff) + acc>>16 121 // } 122 JMP foldcond 123 foldloop: 124 MOVQ AX, DX 125 ANDQ $0xffff, DX 126 SHRQ $16, AX 127 ADDQ DX, AX 128 // We don't need ADC because folding will take care of it 129 foldcond: 130 CMPQ AX, $0xffff 131 JA foldloop 132 133 // Return the checksum in host byte order. 134 BSWAPQ AX 135 SHRQ $48, AX 136 MOVW AX, ret+32(FP) 137 MOVB R8, ret1+34(FP) 138 RET