github.com/muyo/sno@v1.2.1/internal/encoding_amd64.s (about)

     1  #include "textflag.h"
     2  #include "funcdata.h"
     3  
     4  DATA shuffleVec<>+0(SB)/8, $0x0001020304050607
     5  DATA shuffleVec<>+8(SB)/8, $0x08090A0B0C0D0E0F
     6  GLOBL shuffleVec<>(SB), (NOPTR+RODATA), $16
     7  
     8  DATA offsetCharset<>+0(SB)/8, $0x3232323232323232 // 50
     9  DATA offsetCharset<>+8(SB)/8, $0x3232323232323232
    10  GLOBL offsetCharset<>(SB), (NOPTR+RODATA), $16
    11  
    12  DATA selectLetters<>+0(SB)/8, $0x0707070707070707
    13  DATA selectLetters<>+8(SB)/8, $0x0707070707070707
    14  GLOBL selectLetters<>(SB), (NOPTR+RODATA), $16
    15  
    16  DATA subLetters<>+0(SB)/8, $0xD8D8D8D8D8D8D8D8 // 216
    17  DATA subLetters<>+8(SB)/8, $0xD8D8D8D8D8D8D8D8
    18  GLOBL subLetters<>(SB), (NOPTR+RODATA), $16
    19  
    20  DATA interleave<>+0(SB)/8, $0x1f1f1f1f1f1f1f1f
    21  DATA interleave<>+8(SB)/8, $0x1f1f1f1f1f1f1f1f
    22  GLOBL interleave<>(SB), (NOPTR+RODATA), $16
    23  
    24  // func Encode(src *[10]byte) (dst [16]byte)
    25  TEXT ·Encode(SB), NOSPLIT, $0-24
    26      MOVQ  src+0(FP), BX
    27  
    28      MOVQ   0(BX), AX
    29      BSWAPQ AX
    30      SHRQ   $24, AX
    31  
    32      MOVQ   5(BX), BX
    33      BSWAPQ BX
    34      SHRQ   $24, BX
    35  
    36      CMPB  ·hasVectorSupport(SB), $1
    37      JEQ    encodeVec
    38  
    39      LEAQ   dst+8(FP), DX
    40  
    41      MOVB   AX, 7(DX)
    42      SHRQ   $5, AX
    43      MOVB   AX, 6(DX)
    44      SHRQ   $5, AX
    45      MOVB   AX, 5(DX)
    46      SHRQ   $5, AX
    47      MOVB   AX, 4(DX)
    48      SHRQ   $5, AX
    49      MOVB   AX, 3(DX)
    50      SHRQ   $5, AX
    51      MOVB   AX, 2(DX)
    52      SHRQ   $5, AX
    53      MOVB   AX, 1(DX)
    54      SHRQ   $5, AX
    55      MOVB   AX, 0(DX)
    56  
    57      MOVB   BX, 15(DX)
    58      SHRQ   $5, BX
    59      MOVB   BX, 14(DX)
    60      SHRQ   $5, BX
    61      MOVB   BX, 13(DX)
    62      SHRQ   $5, BX
    63      MOVB   BX, 12(DX)
    64      SHRQ   $5, BX
    65      MOVB   BX, 11(DX)
    66      SHRQ   $5, BX
    67      MOVB   BX, 10(DX)
    68      SHRQ   $5, BX
    69      MOVB   BX, 9(DX)
    70      SHRQ   $5, BX
    71      MOVB   BX, 8(DX)
    72  
    73      MOVOU  (DX), X0
    74      PAND   interleave<>+0(SB), X0
    75  
    76      JMP    encodeFinish
    77  
    78  encodeVec:
    79      PDEPQ  interleave<>+0(SB), AX, AX
    80      PDEPQ  interleave<>+0(SB), BX, BX
    81  
    82      MOVQ   AX, X0
    83      PINSRQ $1, BX, X0
    84      PSHUFB shuffleVec<>+0(SB), X0
    85  
    86  encodeFinish:
    87      MOVOA   X0, X1
    88      PADDB   offsetCharset<>+0(SB), X0   // Add 50, where 50 is the beginning of our alphabet (ASCII '2')
    89                                          // That takes care of all digits. We need to offset letters, though,
    90                                          // as they start at char('a'), which is 97 in dec.
    91      PCMPGTB selectLetters<>+0(SB), X1   // PCMPGTB will set all bytes with letters to 255.
    92      PSUBUSB subLetters<>+0(SB), X1      // We need to add 39 to each letter in X0 to move them into the right range.
    93                                          // Note: Not 47 (50 + 47 = 97), as our letters are in the [8..31] range.
    94                                          // And so we simply do a (unsigned) subtraction of 216 and as a result
    95                                          // get a mask of 39 (the offset) in dec where all the letters are.
    96      PADDB X1, X0                        // Add them together and done.
    97  
    98      MOVOU X0, dst+8(FP)
    99  
   100      RET
   101  
   102  
   103  //func Decode(src []byte) (dst [10]byte)
   104  TEXT ·Decode(SB), NOSPLIT, $0-34
   105      // The entirety of this function is simply the inverse of encode.
   106      MOVQ  src+0(FP), BX
   107      LEAQ  dst+24(FP), DX
   108      MOVOU (BX), X0
   109  
   110      PSUBB  offsetCharset<>+0(SB), X0
   111      MOVOA  X0, X1
   112  
   113      PCMPGTB selectLetters<>+0(SB), X1
   114      PSUBUSB subLetters<>+0(SB), X1
   115      PSUBB   X1, X0
   116  
   117      CMPB  ·hasVectorSupport(SB), $0
   118      JEQ   decodeFallback
   119  
   120      PSHUFB shuffleVec<>+0(SB), X0
   121  
   122      MOVQ       X0, R8
   123      PEXTRQ $1, X0, R9
   124  
   125      PEXTQ  interleave<>+0(SB), R8, R8
   126      BSWAPQ R8
   127      SHRQ   $24, R8
   128  
   129      PEXTQ  interleave<>+0(SB), R9, R9
   130      BSWAPQ R9
   131      SHRQ   $24, R9
   132  
   133      MOVQ R8, 0(DX)
   134      MOVQ R9, 5(DX)
   135  
   136      RET
   137  
   138  decodeFallback:
   139      // TODO(alcore) Subject to an optimization pass.
   140      MOVQ   X0, R8
   141      PSRLO  $8, X0
   142      MOVQ   X0, R9
   143  
   144      // Timestamp block - 0
   145      MOVB R8, BX
   146      SHLB $3, BX
   147  
   148      SHRQ $8, R8 // 1
   149      MOVB R8, AX
   150      SHRB $2, AX
   151      ORB  AX, BX
   152  
   153      MOVB BX, 0(DX)
   154  
   155      MOVB R8, BX
   156      SHLB $6, BX
   157  
   158      SHRQ $8, R8 // 2
   159      MOVB R8, AX
   160      SHLB $1, AX
   161      ORB  AX, BX
   162  
   163      SHRQ $8, R8 // 3
   164      MOVB R8, CX
   165      SHRB $4, CX
   166      ORB  CX, BX
   167  
   168      MOVB BX, 1(DX)
   169  
   170      MOVB R8, BX
   171      SHLB $4, BX
   172  
   173      SHRQ $8, R8 // 4
   174      MOVB R8, AX
   175      SHRB $1, AX
   176      ORB  AX, BX
   177  
   178      MOVB BX, 2(DX)
   179  
   180      MOVB R8, BX
   181      SHLB $7, BX
   182  
   183      SHRQ $8, R8 // 5
   184      MOVB R8, CX
   185      SHLB $2, CX
   186      ORB  CX, BX
   187  
   188      SHRQ $8, R8 // 6
   189      MOVB R8, AX
   190      SHRB $3, AX
   191      ORB  AX, BX
   192  
   193      MOVB BX, 3(DX)
   194  
   195      MOVB R8, BX
   196      SHLB $5, BX
   197  
   198      SHRQ $8, R8 // 7
   199      ORB  R8, BX
   200  
   201      MOVB BX, 4(DX)
   202  
   203      // Payload block - 8
   204      MOVB R9, BX
   205      SHLB $3, BX
   206  
   207      SHRQ $8, R9 // 9
   208      MOVB R9, AX
   209      SHRB $2, AX
   210      ORB  AX, BX
   211  
   212      MOVB BX, 5(DX)
   213  
   214      MOVB R9, BX
   215      SHLB $6, BX
   216  
   217      SHRQ $8, R9 // 10
   218      MOVB R9, AX
   219      SHLB $1, AX
   220      ORB  AX, BX
   221  
   222      SHRQ $8, R9 // 11
   223      MOVB R9, CX
   224      SHRB $4, CX
   225      ORB  CX, BX
   226  
   227      MOVB BX, 6(DX)
   228  
   229      MOVB R9, BX
   230      SHLB $4, BX
   231  
   232      SHRQ $8, R9 // 12
   233      MOVB R9, AX
   234      SHRB $1, AX
   235      ORB  AX, BX
   236  
   237      MOVB BX, 7(DX)
   238  
   239      MOVB R9, BX
   240      SHLB $7, BX
   241  
   242      SHRQ $8, R9 // 13
   243      MOVB R9, CX
   244      SHLB $2, CX
   245      ORB  CX, BX
   246  
   247      SHRQ $8, R9 // 14
   248      MOVB R9, AX
   249      SHRB $3, AX
   250      ORB  AX, BX
   251  
   252      MOVB BX, 8(DX)
   253  
   254      MOVB R9, BX
   255      SHLB $5, BX
   256  
   257      SHRQ $8, R9 // 15
   258      ORB  R9, BX
   259  
   260      MOVB BX, 9(DX)
   261  
   262      RET