github.com/muyo/sno@v1.2.1/internal/encoding_amd64.s (about) 1 #include "textflag.h" 2 #include "funcdata.h" 3 4 DATA shuffleVec<>+0(SB)/8, $0x0001020304050607 5 DATA shuffleVec<>+8(SB)/8, $0x08090A0B0C0D0E0F 6 GLOBL shuffleVec<>(SB), (NOPTR+RODATA), $16 7 8 DATA offsetCharset<>+0(SB)/8, $0x3232323232323232 // 50 9 DATA offsetCharset<>+8(SB)/8, $0x3232323232323232 10 GLOBL offsetCharset<>(SB), (NOPTR+RODATA), $16 11 12 DATA selectLetters<>+0(SB)/8, $0x0707070707070707 13 DATA selectLetters<>+8(SB)/8, $0x0707070707070707 14 GLOBL selectLetters<>(SB), (NOPTR+RODATA), $16 15 16 DATA subLetters<>+0(SB)/8, $0xD8D8D8D8D8D8D8D8 // 216 17 DATA subLetters<>+8(SB)/8, $0xD8D8D8D8D8D8D8D8 18 GLOBL subLetters<>(SB), (NOPTR+RODATA), $16 19 20 DATA interleave<>+0(SB)/8, $0x1f1f1f1f1f1f1f1f 21 DATA interleave<>+8(SB)/8, $0x1f1f1f1f1f1f1f1f 22 GLOBL interleave<>(SB), (NOPTR+RODATA), $16 23 24 // func Encode(src *[10]byte) (dst [16]byte) 25 TEXT ·Encode(SB), NOSPLIT, $0-24 26 MOVQ src+0(FP), BX 27 28 MOVQ 0(BX), AX 29 BSWAPQ AX 30 SHRQ $24, AX 31 32 MOVQ 5(BX), BX 33 BSWAPQ BX 34 SHRQ $24, BX 35 36 CMPB ·hasVectorSupport(SB), $1 37 JEQ encodeVec 38 39 LEAQ dst+8(FP), DX 40 41 MOVB AX, 7(DX) 42 SHRQ $5, AX 43 MOVB AX, 6(DX) 44 SHRQ $5, AX 45 MOVB AX, 5(DX) 46 SHRQ $5, AX 47 MOVB AX, 4(DX) 48 SHRQ $5, AX 49 MOVB AX, 3(DX) 50 SHRQ $5, AX 51 MOVB AX, 2(DX) 52 SHRQ $5, AX 53 MOVB AX, 1(DX) 54 SHRQ $5, AX 55 MOVB AX, 0(DX) 56 57 MOVB BX, 15(DX) 58 SHRQ $5, BX 59 MOVB BX, 14(DX) 60 SHRQ $5, BX 61 MOVB BX, 13(DX) 62 SHRQ $5, BX 63 MOVB BX, 12(DX) 64 SHRQ $5, BX 65 MOVB BX, 11(DX) 66 SHRQ $5, BX 67 MOVB BX, 10(DX) 68 SHRQ $5, BX 69 MOVB BX, 9(DX) 70 SHRQ $5, BX 71 MOVB BX, 8(DX) 72 73 MOVOU (DX), X0 74 PAND interleave<>+0(SB), X0 75 76 JMP encodeFinish 77 78 encodeVec: 79 PDEPQ interleave<>+0(SB), AX, AX 80 PDEPQ interleave<>+0(SB), BX, BX 81 82 MOVQ AX, X0 83 PINSRQ $1, BX, X0 84 PSHUFB shuffleVec<>+0(SB), X0 85 86 encodeFinish: 87 MOVOA X0, X1 88 PADDB offsetCharset<>+0(SB), X0 // Add 50, where 50 is the beginning of our alphabet (ASCII '2') 89 // That takes care of all digits. We need to offset letters, though, 90 // as they start at char('a'), which is 97 in dec. 91 PCMPGTB selectLetters<>+0(SB), X1 // PCMPGTB will set all bytes with letters to 255. 92 PSUBUSB subLetters<>+0(SB), X1 // We need to add 39 to each letter in X0 to move them into the right range. 93 // Note: Not 47 (50 + 47 = 97), as our letters are in the [8..31] range. 94 // And so we simply do a (unsigned) subtraction of 216 and as a result 95 // get a mask of 39 (the offset) in dec where all the letters are. 96 PADDB X1, X0 // Add them together and done. 97 98 MOVOU X0, dst+8(FP) 99 100 RET 101 102 103 //func Decode(src []byte) (dst [10]byte) 104 TEXT ·Decode(SB), NOSPLIT, $0-34 105 // The entirety of this function is simply the inverse of encode. 106 MOVQ src+0(FP), BX 107 LEAQ dst+24(FP), DX 108 MOVOU (BX), X0 109 110 PSUBB offsetCharset<>+0(SB), X0 111 MOVOA X0, X1 112 113 PCMPGTB selectLetters<>+0(SB), X1 114 PSUBUSB subLetters<>+0(SB), X1 115 PSUBB X1, X0 116 117 CMPB ·hasVectorSupport(SB), $0 118 JEQ decodeFallback 119 120 PSHUFB shuffleVec<>+0(SB), X0 121 122 MOVQ X0, R8 123 PEXTRQ $1, X0, R9 124 125 PEXTQ interleave<>+0(SB), R8, R8 126 BSWAPQ R8 127 SHRQ $24, R8 128 129 PEXTQ interleave<>+0(SB), R9, R9 130 BSWAPQ R9 131 SHRQ $24, R9 132 133 MOVQ R8, 0(DX) 134 MOVQ R9, 5(DX) 135 136 RET 137 138 decodeFallback: 139 // TODO(alcore) Subject to an optimization pass. 140 MOVQ X0, R8 141 PSRLO $8, X0 142 MOVQ X0, R9 143 144 // Timestamp block - 0 145 MOVB R8, BX 146 SHLB $3, BX 147 148 SHRQ $8, R8 // 1 149 MOVB R8, AX 150 SHRB $2, AX 151 ORB AX, BX 152 153 MOVB BX, 0(DX) 154 155 MOVB R8, BX 156 SHLB $6, BX 157 158 SHRQ $8, R8 // 2 159 MOVB R8, AX 160 SHLB $1, AX 161 ORB AX, BX 162 163 SHRQ $8, R8 // 3 164 MOVB R8, CX 165 SHRB $4, CX 166 ORB CX, BX 167 168 MOVB BX, 1(DX) 169 170 MOVB R8, BX 171 SHLB $4, BX 172 173 SHRQ $8, R8 // 4 174 MOVB R8, AX 175 SHRB $1, AX 176 ORB AX, BX 177 178 MOVB BX, 2(DX) 179 180 MOVB R8, BX 181 SHLB $7, BX 182 183 SHRQ $8, R8 // 5 184 MOVB R8, CX 185 SHLB $2, CX 186 ORB CX, BX 187 188 SHRQ $8, R8 // 6 189 MOVB R8, AX 190 SHRB $3, AX 191 ORB AX, BX 192 193 MOVB BX, 3(DX) 194 195 MOVB R8, BX 196 SHLB $5, BX 197 198 SHRQ $8, R8 // 7 199 ORB R8, BX 200 201 MOVB BX, 4(DX) 202 203 // Payload block - 8 204 MOVB R9, BX 205 SHLB $3, BX 206 207 SHRQ $8, R9 // 9 208 MOVB R9, AX 209 SHRB $2, AX 210 ORB AX, BX 211 212 MOVB BX, 5(DX) 213 214 MOVB R9, BX 215 SHLB $6, BX 216 217 SHRQ $8, R9 // 10 218 MOVB R9, AX 219 SHLB $1, AX 220 ORB AX, BX 221 222 SHRQ $8, R9 // 11 223 MOVB R9, CX 224 SHRB $4, CX 225 ORB CX, BX 226 227 MOVB BX, 6(DX) 228 229 MOVB R9, BX 230 SHLB $4, BX 231 232 SHRQ $8, R9 // 12 233 MOVB R9, AX 234 SHRB $1, AX 235 ORB AX, BX 236 237 MOVB BX, 7(DX) 238 239 MOVB R9, BX 240 SHLB $7, BX 241 242 SHRQ $8, R9 // 13 243 MOVB R9, CX 244 SHLB $2, CX 245 ORB CX, BX 246 247 SHRQ $8, R9 // 14 248 MOVB R9, AX 249 SHRB $3, AX 250 ORB AX, BX 251 252 MOVB BX, 8(DX) 253 254 MOVB R9, BX 255 SHLB $5, BX 256 257 SHRQ $8, R9 // 15 258 ORB R9, BX 259 260 MOVB BX, 9(DX) 261 262 RET