github.com/bir3/gocompiler@v0.3.205/src/cmd/gocmd/compress/zstd/internal/xxhash/xxhash_amd64.s (about) 1 //go:build !appengine && gc && !purego && !noasm 2 // +build !appengine 3 // +build gc 4 // +build !purego 5 // +build !noasm 6 7 #include "textflag.h" 8 9 // Registers: 10 #define h AX 11 #define d AX 12 #define p SI // pointer to advance through b 13 #define n DX 14 #define end BX // loop end 15 #define v1 R8 16 #define v2 R9 17 #define v3 R10 18 #define v4 R11 19 #define x R12 20 #define prime1 R13 21 #define prime2 R14 22 #define prime4 DI 23 24 #define round(acc, x) \ 25 IMULQ prime2, x \ 26 ADDQ x, acc \ 27 ROLQ $31, acc \ 28 IMULQ prime1, acc 29 30 // round0 performs the operation x = round(0, x). 31 #define round0(x) \ 32 IMULQ prime2, x \ 33 ROLQ $31, x \ 34 IMULQ prime1, x 35 36 // mergeRound applies a merge round on the two registers acc and x. 37 // It assumes that prime1, prime2, and prime4 have been loaded. 38 #define mergeRound(acc, x) \ 39 round0(x) \ 40 XORQ x, acc \ 41 IMULQ prime1, acc \ 42 ADDQ prime4, acc 43 44 // blockLoop processes as many 32-byte blocks as possible, 45 // updating v1, v2, v3, and v4. It assumes that there is at least one block 46 // to process. 47 #define blockLoop() \ 48 loop: \ 49 MOVQ +0(p), x \ 50 round(v1, x) \ 51 MOVQ +8(p), x \ 52 round(v2, x) \ 53 MOVQ +16(p), x \ 54 round(v3, x) \ 55 MOVQ +24(p), x \ 56 round(v4, x) \ 57 ADDQ $32, p \ 58 CMPQ p, end \ 59 JLE loop 60 61 // func Sum64(b []byte) uint64 62 TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 63 // Load fixed primes. 64 MOVQ ·primes+0(SB), prime1 65 MOVQ ·primes+8(SB), prime2 66 MOVQ ·primes+24(SB), prime4 67 68 // Load slice. 69 MOVQ b_base+0(FP), p 70 MOVQ b_len+8(FP), n 71 LEAQ (p)(n*1), end 72 73 // The first loop limit will be len(b)-32. 74 SUBQ $32, end 75 76 // Check whether we have at least one block. 77 CMPQ n, $32 78 JLT noBlocks 79 80 // Set up initial state (v1, v2, v3, v4). 81 MOVQ prime1, v1 82 ADDQ prime2, v1 83 MOVQ prime2, v2 84 XORQ v3, v3 85 XORQ v4, v4 86 SUBQ prime1, v4 87 88 blockLoop() 89 90 MOVQ v1, h 91 ROLQ $1, h 92 MOVQ v2, x 93 ROLQ $7, x 94 ADDQ x, h 95 MOVQ v3, x 96 ROLQ $12, x 97 ADDQ x, h 98 MOVQ v4, x 99 ROLQ $18, x 100 ADDQ x, h 101 102 mergeRound(h, v1) 103 mergeRound(h, v2) 104 mergeRound(h, v3) 105 mergeRound(h, v4) 106 107 JMP afterBlocks 108 109 noBlocks: 110 MOVQ ·primes+32(SB), h 111 112 afterBlocks: 113 ADDQ n, h 114 115 ADDQ $24, end 116 CMPQ p, end 117 JG try4 118 119 loop8: 120 MOVQ (p), x 121 ADDQ $8, p 122 round0(x) 123 XORQ x, h 124 ROLQ $27, h 125 IMULQ prime1, h 126 ADDQ prime4, h 127 128 CMPQ p, end 129 JLE loop8 130 131 try4: 132 ADDQ $4, end 133 CMPQ p, end 134 JG try1 135 136 MOVL (p), x 137 ADDQ $4, p 138 IMULQ prime1, x 139 XORQ x, h 140 141 ROLQ $23, h 142 IMULQ prime2, h 143 ADDQ ·primes+16(SB), h 144 145 try1: 146 ADDQ $4, end 147 CMPQ p, end 148 JGE finalize 149 150 loop1: 151 MOVBQZX (p), x 152 ADDQ $1, p 153 IMULQ ·primes+32(SB), x 154 XORQ x, h 155 ROLQ $11, h 156 IMULQ prime1, h 157 158 CMPQ p, end 159 JL loop1 160 161 finalize: 162 MOVQ h, x 163 SHRQ $33, x 164 XORQ x, h 165 IMULQ prime2, h 166 MOVQ h, x 167 SHRQ $29, x 168 XORQ x, h 169 IMULQ ·primes+16(SB), h 170 MOVQ h, x 171 SHRQ $32, x 172 XORQ x, h 173 174 MOVQ h, ret+24(FP) 175 RET 176 177 // func writeBlocks(d *Digest, b []byte) int 178 TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 179 // Load fixed primes needed for round. 180 MOVQ ·primes+0(SB), prime1 181 MOVQ ·primes+8(SB), prime2 182 183 // Load slice. 184 MOVQ b_base+8(FP), p 185 MOVQ b_len+16(FP), n 186 LEAQ (p)(n*1), end 187 SUBQ $32, end 188 189 // Load vN from d. 190 MOVQ s+0(FP), d 191 MOVQ 0(d), v1 192 MOVQ 8(d), v2 193 MOVQ 16(d), v3 194 MOVQ 24(d), v4 195 196 // We don't need to check the loop condition here; this function is 197 // always called with at least one block of data to process. 198 blockLoop() 199 200 // Copy vN back to d. 201 MOVQ v1, 0(d) 202 MOVQ v2, 8(d) 203 MOVQ v3, 16(d) 204 MOVQ v4, 24(d) 205 206 // The number of bytes written is p minus the old base pointer. 207 SUBQ b_base+8(FP), p 208 MOVQ p, ret+32(FP) 209 210 RET