github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/internal/bitpack/masks_int32_amd64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  // -----------------------------------------------------------------------------
     6  // Shuffle masks used to broadcast bytes of bit-packed valued into vector
     7  // registers at positions where they can then be shifted into the right
     8  // locations.
     9  // -----------------------------------------------------------------------------
    10  
    11  // Shuffle masks for unpacking values from bit widths 1 to 16.
    12  //
    13  // The masks are grouped in 32 bytes chunks containing 2 masks of 16 bytes, with
    14  // the following layout:
    15  //
    16  // - The first mask is used to shuffle values from the 16 bytes of input into
    17  //   the lower 16 bytes of output. These values are then shifted RIGHT to be
    18  //   aligned on the begining of each 32 bit word.
    19  //
    20  // - The second mask selects values from the 16 bytes of input into the upper
    21  //   16 bytes of output. These values are then shifted RIGHT to be aligned on
    22  //   the beginning of each 32 bit word.
    23  //
    24  // The bit width is intended to be used as an index into this array, using this
    25  // formula to convert from the index to a byte offset:
    26  //
    27  //      offset = 32 * (bitWidth - 1)
    28  //
    29  GLOBL ·shuffleInt32x1to16bits(SB), RODATA|NOPTR, $512
    30  
    31  // 1 bit => 32 bits
    32  // -----------------
    33  // 0: [a,b,c,d,e,f,g,h]
    34  // ...
    35  DATA ·shuffleInt32x1to16bits+0+0(SB)/4,  $0x80808000
    36  DATA ·shuffleInt32x1to16bits+0+4(SB)/4,  $0x80808000
    37  DATA ·shuffleInt32x1to16bits+0+8(SB)/4,  $0x80808000
    38  DATA ·shuffleInt32x1to16bits+0+12(SB)/4, $0x80808000
    39  
    40  DATA ·shuffleInt32x1to16bits+0+16(SB)/4, $0x80808000
    41  DATA ·shuffleInt32x1to16bits+0+20(SB)/4, $0x80808000
    42  DATA ·shuffleInt32x1to16bits+0+24(SB)/4, $0x80808000
    43  DATA ·shuffleInt32x1to16bits+0+28(SB)/4, $0x80808000
    44  
    45  // 2 bits => 32 bits
    46  // -----------------
    47  // 0: [a,a,b,b,c,c,d,d]
    48  // 1: [e,e,f,f,g,g,h,h]
    49  // ...
    50  DATA ·shuffleInt32x1to16bits+32+0(SB)/4,  $0x80808000
    51  DATA ·shuffleInt32x1to16bits+32+4(SB)/4,  $0x80808000
    52  DATA ·shuffleInt32x1to16bits+32+8(SB)/4,  $0x80808000
    53  DATA ·shuffleInt32x1to16bits+32+12(SB)/4, $0x80808000
    54  
    55  DATA ·shuffleInt32x1to16bits+32+16(SB)/4, $0x80808001
    56  DATA ·shuffleInt32x1to16bits+32+20(SB)/4, $0x80808001
    57  DATA ·shuffleInt32x1to16bits+32+24(SB)/4, $0x80808001
    58  DATA ·shuffleInt32x1to16bits+32+28(SB)/4, $0x80808001
    59  
    60  // 3 bits => 32 bits
    61  // -----------------
    62  // 0: [a,a,a,b,b,b,c,c]
    63  // 1: [c,d,d,d,e,e,e,f]
    64  // 2: [f,f,g,g,g,h,h,h]
    65  // ...
    66  DATA ·shuffleInt32x1to16bits+64+0(SB)/4,  $0x80808000
    67  DATA ·shuffleInt32x1to16bits+64+4(SB)/4,  $0x80808000
    68  DATA ·shuffleInt32x1to16bits+64+8(SB)/4,  $0x80800100
    69  DATA ·shuffleInt32x1to16bits+64+12(SB)/4, $0x80808001
    70  
    71  DATA ·shuffleInt32x1to16bits+64+16(SB)/4, $0x80808001
    72  DATA ·shuffleInt32x1to16bits+64+20(SB)/4, $0x80800201
    73  DATA ·shuffleInt32x1to16bits+64+24(SB)/4, $0x80808002
    74  DATA ·shuffleInt32x1to16bits+64+28(SB)/4, $0x80808002
    75  
    76  // 4 bits => 32 bits
    77  // -----------------
    78  // 0: [a,a,a,a,b,b,b,b]
    79  // 1: [c,c,c,c,d,d,d,d]
    80  // 2: [e,e,e,e,f,f,f,f]
    81  // 3: [g,g,g,g,h,h,h,h]
    82  // ...
    83  DATA ·shuffleInt32x1to16bits+96+0(SB)/4,  $0x80808000
    84  DATA ·shuffleInt32x1to16bits+96+4(SB)/4,  $0x80808000
    85  DATA ·shuffleInt32x1to16bits+96+8(SB)/4,  $0x80808001
    86  DATA ·shuffleInt32x1to16bits+96+12(SB)/4, $0x80808001
    87  
    88  DATA ·shuffleInt32x1to16bits+96+16(SB)/4, $0x80808002
    89  DATA ·shuffleInt32x1to16bits+96+20(SB)/4, $0x80808002
    90  DATA ·shuffleInt32x1to16bits+96+24(SB)/4, $0x80808003
    91  DATA ·shuffleInt32x1to16bits+96+28(SB)/4, $0x80808003
    92  
    93  // 5 bits => 32 bits
    94  // -----------------
    95  // 0: [a,a,a,a,a,b,b,b]
    96  // 1: [b,b,c,c,c,c,c,d]
    97  // 2: [d,d,d,d,e,e,e,e]
    98  // 3: [e,f,f,f,f,f,g,g]
    99  // 4: [g,g,g,h,h,h,h,h]
   100  // ...
   101  DATA ·shuffleInt32x1to16bits+128+0(SB)/4,  $0x80808000
   102  DATA ·shuffleInt32x1to16bits+128+4(SB)/4,  $0x80800100
   103  DATA ·shuffleInt32x1to16bits+128+8(SB)/4,  $0x80808001
   104  DATA ·shuffleInt32x1to16bits+128+12(SB)/4, $0x80800201
   105  
   106  DATA ·shuffleInt32x1to16bits+128+16(SB)/4, $0x80800302
   107  DATA ·shuffleInt32x1to16bits+128+20(SB)/4, $0x80808003
   108  DATA ·shuffleInt32x1to16bits+128+24(SB)/4, $0x80800403
   109  DATA ·shuffleInt32x1to16bits+128+28(SB)/4, $0x80808004
   110  
   111  // 6 bits => 32 bits
   112  // -----------------
   113  // 0: [a,a,a,a,a,a,b,b]
   114  // 1: [b,b,b,b,c,c,c,c]
   115  // 2: [c,c,d,d,d,d,d,d]
   116  // 3: [e,e,e,e,e,e,f,f]
   117  // 4: [f,f,f,f,g,g,g,g]
   118  // 5: [g,g,h,h,h,h,h,h]
   119  // ...
   120  DATA ·shuffleInt32x1to16bits+160+0(SB)/4,  $0x80808000
   121  DATA ·shuffleInt32x1to16bits+160+4(SB)/4,  $0x80800100
   122  DATA ·shuffleInt32x1to16bits+160+8(SB)/4,  $0x80800201
   123  DATA ·shuffleInt32x1to16bits+160+12(SB)/4, $0x80808002
   124  
   125  DATA ·shuffleInt32x1to16bits+160+16(SB)/4, $0x80808003
   126  DATA ·shuffleInt32x1to16bits+160+20(SB)/4, $0x80800403
   127  DATA ·shuffleInt32x1to16bits+160+24(SB)/4, $0x80800504
   128  DATA ·shuffleInt32x1to16bits+160+28(SB)/4, $0x80808005
   129  
   130  // 7 bits => 32 bits
   131  // -----------------
   132  // 0: [a,a,a,a,a,a,a,b]
   133  // 1: [b,b,b,b,b,b,c,c]
   134  // 2: [c,c,c,c,c,d,d,d]
   135  // 3: [d,d,d,d,e,e,e,e]
   136  // 4: [e,e,e,f,f,f,f,f]
   137  // 5: [f,f,g,g,g,g,g,g]
   138  // 6: [g,h,h,h,h,h,h,h]
   139  // ...
   140  DATA ·shuffleInt32x1to16bits+192+0(SB)/4,  $0x80808000
   141  DATA ·shuffleInt32x1to16bits+192+4(SB)/4,  $0x80800100
   142  DATA ·shuffleInt32x1to16bits+192+8(SB)/4,  $0x80800201
   143  DATA ·shuffleInt32x1to16bits+192+12(SB)/4, $0x80800302
   144  
   145  DATA ·shuffleInt32x1to16bits+192+16(SB)/4, $0x80800403
   146  DATA ·shuffleInt32x1to16bits+192+20(SB)/4, $0x80800504
   147  DATA ·shuffleInt32x1to16bits+192+24(SB)/4, $0x80800605
   148  DATA ·shuffleInt32x1to16bits+192+28(SB)/4, $0x80808006
   149  
   150  // 8 bits => 32 bits
   151  // -----------------
   152  // 0: [a,a,a,a,a,a,a,a]
   153  // 1: [b,b,b,b,b,b,b,b]
   154  // 2: [c,c,c,c,c,c,c,c]
   155  // 3: [d,d,d,d,d,d,d,d]
   156  // 4: [e,e,e,e,e,e,e,e]
   157  // 5: [f,f,f,f,f,f,f,f]
   158  // 6: [g,g,g,g,g,g,g,g]
   159  // 7: [h,h,h,h,h,h,h,h]
   160  // ...
   161  DATA ·shuffleInt32x1to16bits+224+0(SB)/4,  $0x80808000
   162  DATA ·shuffleInt32x1to16bits+224+4(SB)/4,  $0x80808001
   163  DATA ·shuffleInt32x1to16bits+224+8(SB)/4,  $0x80808002
   164  DATA ·shuffleInt32x1to16bits+224+12(SB)/4, $0x80808003
   165  
   166  DATA ·shuffleInt32x1to16bits+224+16(SB)/4, $0x80808004
   167  DATA ·shuffleInt32x1to16bits+224+20(SB)/4, $0x80808005
   168  DATA ·shuffleInt32x1to16bits+224+24(SB)/4, $0x80808006
   169  DATA ·shuffleInt32x1to16bits+224+28(SB)/4, $0x80808007
   170  
   171  // 9 bits => 32 bits
   172  // -----------------
   173  // 0: [a,a,a,a,a,a,a,a]
   174  // 1: [a,b,b,b,b,b,b,b]
   175  // 2: [b,b,c,c,c,c,c,c]
   176  // 3: [c,c,c,d,d,d,d,d]
   177  // 4: [d,d,d,d,e,e,e,e]
   178  // 5: [e,e,e,e,e,f,f,f]
   179  // 6: [f,f,f,f,f,f,g,g]
   180  // 7: [g,g,g,g,g,g,g,h]
   181  // 8: [h,h,h,h,h,h,h,h]
   182  // ...
   183  DATA ·shuffleInt32x1to16bits+256+0(SB)/4,  $0x80800100
   184  DATA ·shuffleInt32x1to16bits+256+4(SB)/4,  $0x80800201
   185  DATA ·shuffleInt32x1to16bits+256+8(SB)/4,  $0x80800302
   186  DATA ·shuffleInt32x1to16bits+256+12(SB)/4, $0x80800403
   187  
   188  DATA ·shuffleInt32x1to16bits+256+16(SB)/4, $0x80800504
   189  DATA ·shuffleInt32x1to16bits+256+20(SB)/4, $0x80800605
   190  DATA ·shuffleInt32x1to16bits+256+24(SB)/4, $0x80800706
   191  DATA ·shuffleInt32x1to16bits+256+28(SB)/4, $0x80800807
   192  
   193  // 10 bits => 32 bits
   194  // ------------------
   195  // 0: [a,a,a,a,a,a,a,a]
   196  // 1: [a,a,b,b,b,b,b,b]
   197  // 2: [b,b,b,b,c,c,c,c]
   198  // 3: [c,c,c,c,c,c,d,d]
   199  // 4: [d,d,d,d,d,d,d,d]
   200  // 5: [e,e,e,e,e,e,e,e]
   201  // 6: [e,e,f,f,f,f,f,f]
   202  // 7: [f,f,f,f,g,g,g,g]
   203  // 8: [g,g,g,g,g,g,h,h]
   204  // 9: [h,h,h,h,h,h,h,h]
   205  // ...
   206  DATA ·shuffleInt32x1to16bits+288+0(SB)/4,  $0x80800100
   207  DATA ·shuffleInt32x1to16bits+288+4(SB)/4,  $0x80800201
   208  DATA ·shuffleInt32x1to16bits+288+8(SB)/4,  $0x80800302
   209  DATA ·shuffleInt32x1to16bits+288+12(SB)/4, $0x80800403
   210  
   211  DATA ·shuffleInt32x1to16bits+288+16(SB)/4, $0x80800605
   212  DATA ·shuffleInt32x1to16bits+288+20(SB)/4, $0x80800706
   213  DATA ·shuffleInt32x1to16bits+288+24(SB)/4, $0x80800807
   214  DATA ·shuffleInt32x1to16bits+288+28(SB)/4, $0x80800908
   215  
   216  // 11 bits => 32 bits
   217  // ------------------
   218  // 0: [a,a,a,a,a,a,a,a]
   219  // 1: [a,a,a,b,b,b,b,b]
   220  // 2: [b,b,b,b,b,b,c,c]
   221  // 3: [c,c,c,c,c,c,c,c]
   222  // 4: [c,d,d,d,d,d,d,d]
   223  // 5: [d,d,d,d,e,e,e,e]
   224  // 6: [e,e,e,e,e,e,e,f]
   225  // 7: [f,f,f,f,f,f,f,f]
   226  // 8: [f,f,g,g,g,g,g,g]
   227  // 9: [g,g,g,g,g,h,h,h]
   228  // A: [h,h,h,h,h,h,h,h]
   229  // ...
   230  DATA ·shuffleInt32x1to16bits+320+0(SB)/4,  $0x80800100
   231  DATA ·shuffleInt32x1to16bits+320+4(SB)/4,  $0x80800201
   232  DATA ·shuffleInt32x1to16bits+320+8(SB)/4,  $0x80040302
   233  DATA ·shuffleInt32x1to16bits+320+12(SB)/4, $0x80800504
   234  
   235  DATA ·shuffleInt32x1to16bits+320+16(SB)/4, $0x80800605
   236  DATA ·shuffleInt32x1to16bits+320+20(SB)/4, $0x80080706
   237  DATA ·shuffleInt32x1to16bits+320+24(SB)/4, $0x80800908
   238  DATA ·shuffleInt32x1to16bits+320+28(SB)/4, $0x80800A09
   239  
   240  // 12 bits => 32 bits
   241  // ------------------
   242  // 0: [a,a,a,a,a,a,a,a]
   243  // 1: [a,a,a,a,b,b,b,b]
   244  // 2: [b,b,b,b,b,b,b,b]
   245  // 3: [c,c,c,c,c,c,c,c]
   246  // 4: [c,c,c,c,d,d,d,d]
   247  // 5: [d,d,d,d,d,d,d,d]
   248  // 6: [e,e,e,e,e,e,e,e]
   249  // 7: [e,e,e,e,f,f,f,f]
   250  // 8: [f,f,f,f,f,f,f,f]
   251  // 9: [g,g,g,g,g,g,g,g]
   252  // A: [g,g,g,g,h,h,h,h]
   253  // B: [h,h,h,h,h,h,h,h]
   254  // ...
   255  DATA ·shuffleInt32x1to16bits+352+0(SB)/4,  $0x80800100
   256  DATA ·shuffleInt32x1to16bits+352+4(SB)/4,  $0x80800201
   257  DATA ·shuffleInt32x1to16bits+352+8(SB)/4,  $0x80080403
   258  DATA ·shuffleInt32x1to16bits+352+12(SB)/4, $0x80800504
   259  
   260  DATA ·shuffleInt32x1to16bits+352+16(SB)/4, $0x80800706
   261  DATA ·shuffleInt32x1to16bits+352+20(SB)/4, $0x80800807
   262  DATA ·shuffleInt32x1to16bits+352+24(SB)/4, $0x80800A09
   263  DATA ·shuffleInt32x1to16bits+352+28(SB)/4, $0x80800B0A
   264  
   265  // 13 bits => 32 bits
   266  // ------------------
   267  // 0: [a,a,a,a,a,a,a,a]
   268  // 1: [a,a,a,a,a,b,b,b]
   269  // 2: [b,b,b,b,b,b,b,b]
   270  // 3: [b,b,c,c,c,c,c,c]
   271  // 4: [c,c,c,c,c,c,c,d]
   272  // 5: [d,d,d,d,d,d,d,d]
   273  // 6: [d,d,d,d,e,e,e,e]
   274  // 7: [e,e,e,e,e,e,e,e]
   275  // 8: [e,f,f,f,f,f,f,f]
   276  // 9: [f,f,f,f,f,f,g,g]
   277  // A: [g,g,g,g,g,g,g,g]
   278  // B: [g,g,g,h,h,h,h,h]
   279  // C: [h,h,h,h,h,h,h,h]
   280  // ...
   281  DATA ·shuffleInt32x1to16bits+384+0(SB)/4,  $0x80800100
   282  DATA ·shuffleInt32x1to16bits+384+4(SB)/4,  $0x80030201
   283  DATA ·shuffleInt32x1to16bits+384+8(SB)/4,  $0x80800403
   284  DATA ·shuffleInt32x1to16bits+384+12(SB)/4, $0x80060504
   285  
   286  DATA ·shuffleInt32x1to16bits+384+16(SB)/4, $0x80080706
   287  DATA ·shuffleInt32x1to16bits+384+20(SB)/4, $0x80800908
   288  DATA ·shuffleInt32x1to16bits+384+24(SB)/4, $0x800B0A09
   289  DATA ·shuffleInt32x1to16bits+384+28(SB)/4, $0x80800C0B
   290  
   291  // 14 bits => 32 bits
   292  // ------------------
   293  // 0: [a,a,a,a,a,a,a,a]
   294  // 1: [a,a,a,a,a,a,b,b]
   295  // 2: [b,b,b,b,b,b,b,b]
   296  // 3: [b,b,b,b,c,c,c,c]
   297  // 4: [c,c,c,c,c,c,c,c]
   298  // 5: [c,c,d,d,d,d,d,d]
   299  // 6: [d,d,d,d,d,d,d,d]
   300  // 7: [e,e,e,e,e,e,e,e]
   301  // 8: [e,e,e,e,e,e,f,f]
   302  // 9: [f,f,f,f,f,f,f,f]
   303  // A: [f,f,f,f,g,g,g,g]
   304  // B: [g,g,g,g,g,g,g,g]
   305  // C: [g,g,h,h,h,h,h,h]
   306  // D: [h,h,h,h,h,h,h,h]
   307  // ...
   308  DATA ·shuffleInt32x1to16bits+416+0(SB)/4,  $0x80800100
   309  DATA ·shuffleInt32x1to16bits+416+4(SB)/4,  $0x80030201
   310  DATA ·shuffleInt32x1to16bits+416+8(SB)/4,  $0x80050403
   311  DATA ·shuffleInt32x1to16bits+416+12(SB)/4, $0x80800605
   312  
   313  DATA ·shuffleInt32x1to16bits+416+16(SB)/4, $0x80080807
   314  DATA ·shuffleInt32x1to16bits+416+20(SB)/4, $0x800A0908
   315  DATA ·shuffleInt32x1to16bits+416+24(SB)/4, $0x800C0B0A
   316  DATA ·shuffleInt32x1to16bits+416+28(SB)/4, $0x80800D0C
   317  
   318  // 15 bits => 32 bits
   319  // ------------------
   320  // 0: [a,a,a,a,a,a,a,a]
   321  // 1: [a,a,a,a,a,a,a,b]
   322  // 2: [b,b,b,b,b,b,b,b]
   323  // 3: [b,b,b,b,b,b,c,c]
   324  // 4: [c,c,c,c,c,c,c,c]
   325  // 5: [c,c,c,c,c,d,d,d]
   326  // 6: [d,d,d,d,d,d,d,d]
   327  // 7: [d,d,d,d,e,e,e,e]
   328  // 8: [e,e,e,e,e,e,e,e]
   329  // 9: [e,e,e,f,f,f,f,f]
   330  // A: [f,f,f,f,f,f,f,f]
   331  // B: [f,f,g,g,g,g,g,g]
   332  // C: [g,g,g,g,g,g,g,g]
   333  // D: [g,h,h,h,h,h,h,h]
   334  // E: [h,h,h,h,h,h,h,h]
   335  // ...
   336  DATA ·shuffleInt32x1to16bits+448+0(SB)/4,  $0x80800100
   337  DATA ·shuffleInt32x1to16bits+448+4(SB)/4,  $0x80030201
   338  DATA ·shuffleInt32x1to16bits+448+8(SB)/4,  $0x80050403
   339  DATA ·shuffleInt32x1to16bits+448+12(SB)/4, $0x80070605
   340  
   341  DATA ·shuffleInt32x1to16bits+448+16(SB)/4, $0x80090807
   342  DATA ·shuffleInt32x1to16bits+448+20(SB)/4, $0x800B0A09
   343  DATA ·shuffleInt32x1to16bits+448+24(SB)/4, $0x800D0C0B
   344  DATA ·shuffleInt32x1to16bits+448+28(SB)/4, $0x80800E0D
   345  
   346  // 16 bits => 32 bits
   347  // ------------------
   348  // 0: [a,a,a,a,a,a,a,a]
   349  // 1: [a,a,a,a,a,a,a,a]
   350  // 2: [b,b,b,b,b,b,b,b]
   351  // 3: [b,b,b,b,b,b,c,b]
   352  // 4: [c,c,c,c,c,c,c,c]
   353  // 5: [c,c,c,c,c,c,c,c]
   354  // 6: [d,d,d,d,d,d,d,d]
   355  // 7: [d,d,d,d,d,d,d,d]
   356  // 8: [e,e,e,e,e,e,e,e]
   357  // 9: [e,e,e,e,e,e,e,e]
   358  // A: [f,f,f,f,f,f,f,f]
   359  // B: [f,f,f,f,f,f,f,f]
   360  // C: [g,g,g,g,g,g,g,g]
   361  // D: [g,g,g,g,g,g,g,g]
   362  // E: [h,h,h,h,h,h,h,h]
   363  // F: [h,h,h,h,h,h,h,h]
   364  // ...
   365  DATA ·shuffleInt32x1to16bits+480+0(SB)/4,  $0x80800100
   366  DATA ·shuffleInt32x1to16bits+480+4(SB)/4,  $0x80800302
   367  DATA ·shuffleInt32x1to16bits+480+8(SB)/4,  $0x80800504
   368  DATA ·shuffleInt32x1to16bits+480+12(SB)/4, $0x80800706
   369  
   370  DATA ·shuffleInt32x1to16bits+480+16(SB)/4, $0x80800908
   371  DATA ·shuffleInt32x1to16bits+480+20(SB)/4, $0x80800B0A
   372  DATA ·shuffleInt32x1to16bits+480+24(SB)/4, $0x80800D0C
   373  DATA ·shuffleInt32x1to16bits+480+28(SB)/4, $0x80800F0E
   374  
   375  // Shuffle masks for unpacking values from bit widths 17 to 26.
   376  //
   377  // The masks are grouped in 48 bytes chunks containing 3 masks of 16 bytes, with
   378  // the following layout:
   379  //
   380  // - The first mask is used to shuffle values from the first 16 bytes of input
   381  //   into the lower 16 bytes of output. These values are then shifted RIGHT to
   382  //   be aligned on the begining of each 32 bit word.
   383  //
   384  // - The second mask selects values from the first 16 bytes of input into the
   385  //   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
   386  //   on the beginning of each 32 bit word.
   387  //
   388  // - The third mask selects values from the second 16 bytes of input into the
   389  //   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
   390  //   on the beginning of each 32 bit word.
   391  //
   392  // The bit width is intended to be used as an index into this array, using this
   393  // formula to convert from the index to a byte offset:
   394  //
   395  //      offset = 48 * (bitWidth - 17)
   396  //
   397  GLOBL ·shuffleInt32x17to26bits(SB), RODATA|NOPTR, $480
   398  
   399  // 17 bits => 32 bits
   400  // ------------------
   401  // 0: [a,a,a,a,a,a,a,a]
   402  // 1: [a,a,a,a,a,a,a,a]
   403  // 2: [a,b,b,b,b,b,b,b]
   404  // 3: [b,b,b,b,b,b,b,b]
   405  // 4: [b,b,c,c,c,c,c,c]
   406  // 5: [c,c,c,c,c,c,c,c]
   407  // 6: [c,c,c,d,d,d,d,d]
   408  // 7: [d,d,d,d,d,d,d,d]
   409  // 8: [d,d,d,d,e,e,e,e]
   410  // 9: [e,e,e,e,e,e,e,e]
   411  // A: [e,e,e,e,e,f,f,f]
   412  // B: [f,f,f,f,f,f,f,f]
   413  // C: [f,f,f,f,f,f,g,g]
   414  // D: [g,g,g,g,g,g,g,g]
   415  // E: [g,g,g,g,g,g,g,h]
   416  // F: [h,h,h,h,h,h,h,h]
   417  // ---
   418  // 0: [h,h,h,h,h,h,h,h]
   419  // ...
   420  DATA ·shuffleInt32x17to26bits+0+0(SB)/4,  $0x80020100
   421  DATA ·shuffleInt32x17to26bits+0+4(SB)/4,  $0x80040302
   422  DATA ·shuffleInt32x17to26bits+0+8(SB)/4,  $0x80060504
   423  DATA ·shuffleInt32x17to26bits+0+12(SB)/4, $0x80080706
   424  
   425  DATA ·shuffleInt32x17to26bits+0+16(SB)/4, $0x800A0908
   426  DATA ·shuffleInt32x17to26bits+0+20(SB)/4, $0x800C0B0A
   427  DATA ·shuffleInt32x17to26bits+0+24(SB)/4, $0x800E0D0C
   428  DATA ·shuffleInt32x17to26bits+0+28(SB)/4, $0x80800F0E
   429  
   430  DATA ·shuffleInt32x17to26bits+0+32(SB)/4, $0x80808080
   431  DATA ·shuffleInt32x17to26bits+0+36(SB)/4, $0x80808080
   432  DATA ·shuffleInt32x17to26bits+0+40(SB)/4, $0x80808080
   433  DATA ·shuffleInt32x17to26bits+0+44(SB)/4, $0x80008080
   434  
   435  // 18 bits => 32 bits
   436  // ------------------
   437  // 0: [a,a,a,a,a,a,a,a]
   438  // 1: [a,a,a,a,a,a,a,a]
   439  // 2: [a,a,b,b,b,b,b,b]
   440  // 3: [b,b,b,b,b,b,b,b]
   441  // 4: [b,b,b,b,c,c,c,c]
   442  // 5: [c,c,c,c,c,c,c,c]
   443  // 6: [c,c,c,c,c,c,d,d]
   444  // 7: [d,d,d,d,d,d,d,d]
   445  // 8: [d,d,d,d,d,d,d,d]
   446  // 9: [e,e,e,e,e,e,e,e]
   447  // A: [e,e,e,e,e,e,e,e]
   448  // B: [e,e,f,f,f,f,f,f]
   449  // C: [f,f,f,f,f,f,f,f]
   450  // D: [f,f,f,f,g,g,g,g]
   451  // E: [g,g,g,g,g,g,g,g]
   452  // F: [g,g,g,g,g,g,h,h]
   453  // ---
   454  // 0: [h,h,h,h,h,h,h,h]
   455  // 1: [h,h,h,h,h,h,h,h]
   456  // ...
   457  DATA ·shuffleInt32x17to26bits+48+0(SB)/4,  $0x80020100
   458  DATA ·shuffleInt32x17to26bits+48+4(SB)/4,  $0x80040302
   459  DATA ·shuffleInt32x17to26bits+48+8(SB)/4,  $0x80060504
   460  DATA ·shuffleInt32x17to26bits+48+12(SB)/4, $0x80080706
   461  
   462  DATA ·shuffleInt32x17to26bits+48+16(SB)/4, $0x800B0A09
   463  DATA ·shuffleInt32x17to26bits+48+20(SB)/4, $0x800D0C0B
   464  DATA ·shuffleInt32x17to26bits+48+24(SB)/4, $0x800F0E0D
   465  DATA ·shuffleInt32x17to26bits+48+28(SB)/4, $0x8080800F
   466  
   467  DATA ·shuffleInt32x17to26bits+48+32(SB)/4, $0x80808080
   468  DATA ·shuffleInt32x17to26bits+48+36(SB)/4, $0x80808080
   469  DATA ·shuffleInt32x17to26bits+48+40(SB)/4, $0x80808080
   470  DATA ·shuffleInt32x17to26bits+48+44(SB)/4, $0x80010080
   471  
   472  // 19 bits => 32 bits
   473  // ------------------
   474  // 0: [a,a,a,a,a,a,a,a]
   475  // 1: [a,a,a,a,a,a,a,a]
   476  // 2: [a,a,a,b,b,b,b,b]
   477  // 3: [b,b,b,b,b,b,b,b]
   478  // 4: [b,b,b,b,b,b,c,c]
   479  // 5: [c,c,c,c,c,c,c,c]
   480  // 6: [c,c,c,c,c,c,c,c]
   481  // 7: [c,d,d,d,d,d,d,d]
   482  // 8: [d,d,d,d,d,d,d,d]
   483  // 9: [d,d,d,d,e,e,e,e]
   484  // A: [e,e,e,e,e,e,e,e]
   485  // B: [e,e,e,e,e,e,e,f]
   486  // C: [f,f,f,f,f,f,f,f]
   487  // D: [f,f,f,f,f,f,f,f]
   488  // E: [f,f,g,g,g,g,g,g]
   489  // F: [g,g,g,g,g,g,g,g]
   490  // ---
   491  // 0: [g,g,g,g,g,h,h,h]
   492  // 1: [h,h,h,h,h,h,h,h]
   493  // 2: [h,h,h,h,h,h,h,h]
   494  // ...
   495  DATA ·shuffleInt32x17to26bits+96+0(SB)/4,  $0x80020100
   496  DATA ·shuffleInt32x17to26bits+96+4(SB)/4,  $0x80040302
   497  DATA ·shuffleInt32x17to26bits+96+8(SB)/4,  $0x07060504
   498  DATA ·shuffleInt32x17to26bits+96+12(SB)/4, $0x80090807
   499  
   500  DATA ·shuffleInt32x17to26bits+96+16(SB)/4, $0x800B0A09
   501  DATA ·shuffleInt32x17to26bits+96+20(SB)/4, $0x0E0D0C0B
   502  DATA ·shuffleInt32x17to26bits+96+24(SB)/4, $0x80800F0E
   503  DATA ·shuffleInt32x17to26bits+96+28(SB)/4, $0x80808080
   504  
   505  DATA ·shuffleInt32x17to26bits+96+32(SB)/4, $0x80808080
   506  DATA ·shuffleInt32x17to26bits+96+36(SB)/4, $0x80808080
   507  DATA ·shuffleInt32x17to26bits+96+40(SB)/4, $0x80008080
   508  DATA ·shuffleInt32x17to26bits+96+44(SB)/4, $0x80020100
   509  
   510  // 20 bits => 32 bits
   511  // ------------------
   512  // 0: [a,a,a,a,a,a,a,a]
   513  // 1: [a,a,a,a,a,a,a,a]
   514  // 2: [a,a,a,a,b,b,b,b]
   515  // 3: [b,b,b,b,b,b,b,b]
   516  // 4: [b,b,b,b,b,b,b,b]
   517  // 5: [c,c,c,c,c,c,c,c]
   518  // 6: [c,c,c,c,c,c,c,c]
   519  // 7: [c,c,c,c,d,d,d,d]
   520  // 8: [d,d,d,d,d,d,d,d]
   521  // 9: [d,d,d,d,d,d,d,d]
   522  // A: [e,e,e,e,e,e,e,e]
   523  // B: [e,e,e,e,e,e,e,e]
   524  // C: [e,e,e,e,f,f,f,f]
   525  // D: [f,f,f,f,f,f,f,f]
   526  // E: [f,f,f,f,f,f,f,f]
   527  // F: [g,g,g,g,g,g,g,g]
   528  // ---
   529  // 0: [g,g,g,g,g,g,g,g]
   530  // 1: [g,g,g,g,h,h,h,h]
   531  // 2: [h,h,h,h,h,h,h,h]
   532  // 3: [h,h,h,h,h,h,h,h]
   533  // ...
   534  DATA ·shuffleInt32x17to26bits+144+0(SB)/4,  $0x80020100
   535  DATA ·shuffleInt32x17to26bits+144+4(SB)/4,  $0x80040302
   536  DATA ·shuffleInt32x17to26bits+144+8(SB)/4,  $0x80070605
   537  DATA ·shuffleInt32x17to26bits+144+12(SB)/4, $0x80090807
   538  
   539  DATA ·shuffleInt32x17to26bits+144+16(SB)/4, $0x800C0B0A
   540  DATA ·shuffleInt32x17to26bits+144+20(SB)/4, $0x800E0D0C
   541  DATA ·shuffleInt32x17to26bits+144+24(SB)/4, $0x8080800F
   542  DATA ·shuffleInt32x17to26bits+144+28(SB)/4, $0x80808080
   543  
   544  DATA ·shuffleInt32x17to26bits+144+32(SB)/4, $0x80808080
   545  DATA ·shuffleInt32x17to26bits+144+36(SB)/4, $0x80808080
   546  DATA ·shuffleInt32x17to26bits+144+40(SB)/4, $0x80010080
   547  DATA ·shuffleInt32x17to26bits+144+44(SB)/4, $0x80030201
   548  
   549  // 21 bits => 32 bits
   550  // ------------------
   551  // 0: [a,a,a,a,a,a,a,a]
   552  // 1: [a,a,a,a,a,a,a,a]
   553  // 2: [a,a,a,a,a,b,b,b]
   554  // 3: [b,b,b,b,b,b,b,b]
   555  // 4: [b,b,b,b,b,b,b,b]
   556  // 5: [b,b,c,c,c,c,c,c]
   557  // 6: [c,c,c,c,c,c,c,c]
   558  // 7: [c,c,c,c,c,c,c,d]
   559  // 8: [d,d,d,d,d,d,d,d]
   560  // 9: [d,d,d,d,d,d,d,d]
   561  // A: [d,d,d,d,e,e,e,e]
   562  // B: [e,e,e,e,e,e,e,e]
   563  // C: [e,e,e,e,e,e,e,e]
   564  // D: [e,f,f,f,f,f,f,f]
   565  // E: [f,f,f,f,f,f,f,f]
   566  // F: [f,f,f,f,f,f,g,g]
   567  // ---
   568  // 0: [g,g,g,g,g,g,g,g]
   569  // 1: [g,g,g,g,g,g,g,g]
   570  // 2: [g,g,g,h,h,h,h,h]
   571  // 3: [h,h,h,h,h,h,h,h]
   572  // 4: [h,h,h,h,h,h,h,h]
   573  // ...
   574  DATA ·shuffleInt32x17to26bits+192+0(SB)/4,  $0x80020100
   575  DATA ·shuffleInt32x17to26bits+192+4(SB)/4,  $0x05040302
   576  DATA ·shuffleInt32x17to26bits+192+8(SB)/4,  $0x80070605
   577  DATA ·shuffleInt32x17to26bits+192+12(SB)/4, $0x0A090807
   578  
   579  DATA ·shuffleInt32x17to26bits+192+16(SB)/4, $0x0D0C0B0A
   580  DATA ·shuffleInt32x17to26bits+192+20(SB)/4, $0x800F0E0D
   581  DATA ·shuffleInt32x17to26bits+192+24(SB)/4, $0x8080800F
   582  DATA ·shuffleInt32x17to26bits+192+28(SB)/4, $0x80808080
   583  
   584  DATA ·shuffleInt32x17to26bits+192+32(SB)/4, $0x80808080
   585  DATA ·shuffleInt32x17to26bits+192+36(SB)/4, $0x80808080
   586  DATA ·shuffleInt32x17to26bits+192+40(SB)/4, $0x02010080
   587  DATA ·shuffleInt32x17to26bits+192+44(SB)/4, $0x80040302
   588  
   589  // 22 bits => 32 bits
   590  // ------------------
   591  // 0: [a,a,a,a,a,a,a,a]
   592  // 1: [a,a,a,a,a,a,a,a]
   593  // 2: [a,a,a,a,a,a,b,b]
   594  // 3: [b,b,b,b,b,b,b,b]
   595  // 4: [b,b,b,b,b,b,b,b]
   596  // 5: [b,b,b,b,c,c,c,c]
   597  // 6: [c,c,c,c,c,c,c,c]
   598  // 7: [c,c,c,c,c,c,c,c]
   599  // 8: [c,c,d,d,d,d,d,d]
   600  // 9: [d,d,d,d,d,d,d,d]
   601  // A: [d,d,d,d,d,d,d,d]
   602  // B: [e,e,e,e,e,e,e,e]
   603  // C: [e,e,e,e,e,e,e,e]
   604  // D: [e,e,e,e,e,e,f,f]
   605  // E: [f,f,f,f,f,f,f,f]
   606  // F: [f,f,f,f,f,f,f,f]
   607  // ---
   608  // 0: [f,f,f,f,g,g,g,g]
   609  // 1: [g,g,g,g,g,g,g,g]
   610  // 2: [g,g,g,g,g,g,g,g]
   611  // 3: [g,g,h,h,h,h,h,h]
   612  // 4: [h,h,h,h,h,h,h,h]
   613  // 5: [h,h,h,h,h,h,h,h]
   614  // ...
   615  DATA ·shuffleInt32x17to26bits+240+0(SB)/4,  $0x80020100
   616  DATA ·shuffleInt32x17to26bits+240+4(SB)/4,  $0x05040302
   617  DATA ·shuffleInt32x17to26bits+240+8(SB)/4,  $0x08070605
   618  DATA ·shuffleInt32x17to26bits+240+12(SB)/4, $0x800A0908
   619  
   620  DATA ·shuffleInt32x17to26bits+240+16(SB)/4, $0x800D0C0B
   621  DATA ·shuffleInt32x17to26bits+240+20(SB)/4, $0x800F0E0D
   622  DATA ·shuffleInt32x17to26bits+240+24(SB)/4, $0x80808080
   623  DATA ·shuffleInt32x17to26bits+240+28(SB)/4, $0x80808080
   624  
   625  DATA ·shuffleInt32x17to26bits+240+32(SB)/4, $0x80808080
   626  DATA ·shuffleInt32x17to26bits+240+36(SB)/4, $0x00808080
   627  DATA ·shuffleInt32x17to26bits+240+40(SB)/4, $0x03020100
   628  DATA ·shuffleInt32x17to26bits+240+44(SB)/4, $0x80050403
   629  
   630  // 23 bits => 32 bits
   631  // ------------------
   632  // 0: [a,a,a,a,a,a,a,a]
   633  // 1: [a,a,a,a,a,a,a,a]
   634  // 2: [a,a,a,a,a,a,a,b]
   635  // 3: [b,b,b,b,b,b,b,b]
   636  // 4: [b,b,b,b,b,b,b,b]
   637  // 5: [b,b,b,b,b,b,c,c]
   638  // 6: [c,c,c,c,c,c,c,c]
   639  // 7: [c,c,c,c,c,c,c,c]
   640  // 8: [c,c,c,c,c,d,d,d]
   641  // 9: [d,d,d,d,d,d,d,d]
   642  // A: [d,d,d,d,d,d,d,d]
   643  // B: [d,d,d,d,e,e,e,e]
   644  // C: [e,e,e,e,e,e,e,e]
   645  // D: [e,e,e,e,e,e,e,e]
   646  // E: [e,e,e,f,f,f,f,f]
   647  // F: [f,f,f,f,f,f,f,f]
   648  // ---
   649  // 0: [f,f,f,f,f,f,f,f]
   650  // 1: [f,f,g,g,g,g,g,g]
   651  // 2: [g,g,g,g,g,g,g,g]
   652  // 3: [g,g,g,g,g,g,g,g]
   653  // 4: [g,h,h,h,h,h,h,h]
   654  // 5: [h,h,h,h,h,h,h,h]
   655  // 6: [h,h,h,h,h,h,h,h]
   656  // ...
   657  DATA ·shuffleInt32x17to26bits+288+0(SB)/4,  $0x80020100
   658  DATA ·shuffleInt32x17to26bits+288+4(SB)/4,  $0x05040302
   659  DATA ·shuffleInt32x17to26bits+288+8(SB)/4,  $0x08070605
   660  DATA ·shuffleInt32x17to26bits+288+12(SB)/4, $0x0B0A0908
   661  
   662  DATA ·shuffleInt32x17to26bits+288+16(SB)/4, $0x0E0D0C0B
   663  DATA ·shuffleInt32x17to26bits+288+20(SB)/4, $0x80800F0E
   664  DATA ·shuffleInt32x17to26bits+288+24(SB)/4, $0x80808080
   665  DATA ·shuffleInt32x17to26bits+288+28(SB)/4, $0x80808080
   666  
   667  DATA ·shuffleInt32x17to26bits+288+32(SB)/4, $0x80808080
   668  DATA ·shuffleInt32x17to26bits+288+36(SB)/4, $0x01008080
   669  DATA ·shuffleInt32x17to26bits+288+40(SB)/4, $0x04030201
   670  DATA ·shuffleInt32x17to26bits+288+44(SB)/4, $0x80060504
   671  
   672  // 24 bits => 32 bits
   673  // ------------------
   674  // 0: [a,a,a,a,a,a,a,a]
   675  // 1: [a,a,a,a,a,a,a,a]
   676  // 2: [a,a,a,a,a,a,a,a]
   677  // 3: [b,b,b,b,b,b,b,b]
   678  // 4: [b,b,b,b,b,b,b,b]
   679  // 5: [b,b,b,b,b,b,b,b]
   680  // 6: [c,c,c,c,c,c,c,c]
   681  // 7: [c,c,c,c,c,c,c,c]
   682  // 8: [c,c,c,c,c,c,c,c]
   683  // 9: [d,d,d,d,d,d,d,d]
   684  // A: [d,d,d,d,d,d,d,d]
   685  // B: [d,d,d,d,d,d,d,d]
   686  // C: [e,e,e,e,e,e,e,e]
   687  // D: [e,e,e,e,e,e,e,e]
   688  // E: [e,e,e,e,e,e,e,e]
   689  // F: [f,f,f,f,f,f,f,f]
   690  // ---
   691  // 0: [f,f,f,f,f,f,f,f]
   692  // 1: [f,f,f,f,f,f,f,f]
   693  // 2: [g,g,g,g,g,g,g,g]
   694  // 3: [g,g,g,g,g,g,g,g]
   695  // 4: [g,g,g,g,g,g,g,g]
   696  // 5: [h,h,h,h,h,h,h,h]
   697  // 6: [h,h,h,h,h,h,h,h]
   698  // 7: [h,h,h,h,h,h,h,h]
   699  // ...
   700  DATA ·shuffleInt32x17to26bits+336+0(SB)/4,  $0x80020100
   701  DATA ·shuffleInt32x17to26bits+336+4(SB)/4,  $0x80050403
   702  DATA ·shuffleInt32x17to26bits+336+8(SB)/4,  $0x80080706
   703  DATA ·shuffleInt32x17to26bits+336+12(SB)/4, $0x800B0A09
   704  
   705  DATA ·shuffleInt32x17to26bits+336+16(SB)/4, $0x800E0D0C
   706  DATA ·shuffleInt32x17to26bits+336+20(SB)/4, $0x8080800F
   707  DATA ·shuffleInt32x17to26bits+336+24(SB)/4, $0x80808080
   708  DATA ·shuffleInt32x17to26bits+336+28(SB)/4, $0x80808080
   709  
   710  DATA ·shuffleInt32x17to26bits+336+32(SB)/4, $0x80808080
   711  DATA ·shuffleInt32x17to26bits+336+36(SB)/4, $0x80010080
   712  DATA ·shuffleInt32x17to26bits+336+40(SB)/4, $0x80040302
   713  DATA ·shuffleInt32x17to26bits+336+44(SB)/4, $0x80070605
   714  
   715  // 25 bits => 32 bits
   716  // ------------------
   717  // 0: [a,a,a,a,a,a,a,a]
   718  // 1: [a,a,a,a,a,a,a,a]
   719  // 2: [a,a,a,a,a,a,a,a]
   720  // 3: [a,b,b,b,b,b,b,b]
   721  // 4: [b,b,b,b,b,b,b,b]
   722  // 5: [b,b,b,b,b,b,b,b]
   723  // 6: [b,b,c,c,c,c,c,c]
   724  // 7: [c,c,c,c,c,c,c,c]
   725  // 8: [c,c,c,c,c,c,c,c]
   726  // 9: [c,c,c,d,d,d,d,d]
   727  // A: [d,d,d,d,d,d,d,d]
   728  // B: [d,d,d,d,d,d,d,d]
   729  // C: [d,d,d,d,e,e,e,e]
   730  // D: [e,e,e,e,e,e,e,e]
   731  // E: [e,e,e,e,e,e,e,e]
   732  // F: [e,e,e,e,e,f,f,f]
   733  // ---
   734  // 0: [f,f,f,f,f,f,f,f]
   735  // 1: [f,f,f,f,f,f,f,f]
   736  // 2: [f,f,f,f,f,f,g,g]
   737  // 3: [g,g,g,g,g,g,g,g]
   738  // 4: [g,g,g,g,g,g,g,g]
   739  // 5: [g,g,g,g,g,g,g,h]
   740  // 6: [h,h,h,h,h,h,h,h]
   741  // 7: [h,h,h,h,h,h,h,h]
   742  // 8: [h,h,h,h,h,h,h,h]
   743  // ...
   744  DATA ·shuffleInt32x17to26bits+384+0(SB)/4,  $0x03020100
   745  DATA ·shuffleInt32x17to26bits+384+4(SB)/4,  $0x06050403
   746  DATA ·shuffleInt32x17to26bits+384+8(SB)/4,  $0x09080706
   747  DATA ·shuffleInt32x17to26bits+384+12(SB)/4, $0x0C0B0A09
   748  
   749  DATA ·shuffleInt32x17to26bits+384+16(SB)/4, $0x0F0E0D0C
   750  DATA ·shuffleInt32x17to26bits+384+20(SB)/4, $0x8080800F
   751  DATA ·shuffleInt32x17to26bits+384+24(SB)/4, $0x80808080
   752  DATA ·shuffleInt32x17to26bits+384+28(SB)/4, $0x80808080
   753  
   754  DATA ·shuffleInt32x17to26bits+384+32(SB)/4, $0x80808080
   755  DATA ·shuffleInt32x17to26bits+384+36(SB)/4, $0x02010080
   756  DATA ·shuffleInt32x17to26bits+384+40(SB)/4, $0x05040302
   757  DATA ·shuffleInt32x17to26bits+384+44(SB)/4, $0x08070605
   758  
   759  // 26 bits => 32 bits
   760  // ------------------
   761  // 0: [a,a,a,a,a,a,a,a]
   762  // 1: [a,a,a,a,a,a,a,a]
   763  // 2: [a,a,a,a,a,a,a,a]
   764  // 3: [a,a,b,b,b,b,b,b]
   765  // 4: [b,b,b,b,b,b,b,b]
   766  // 5: [b,b,b,b,b,b,b,b]
   767  // 6: [b,b,b,b,c,c,c,c]
   768  // 7: [c,c,c,c,c,c,c,c]
   769  // 8: [c,c,c,c,c,c,c,c]
   770  // 9: [c,c,c,c,c,c,d,d]
   771  // A: [d,d,d,d,d,d,d,d]
   772  // B: [d,d,d,d,d,d,d,d]
   773  // C: [d,d,d,d,d,d,d,d]
   774  // D: [e,e,e,e,e,e,e,e]
   775  // E: [e,e,e,e,e,e,e,e]
   776  // F: [e,e,e,e,e,e,e,e]
   777  // ---
   778  // 0: [e,e,f,f,f,f,f,f]
   779  // 1: [f,f,f,f,f,f,f,f]
   780  // 2: [f,f,f,f,f,f,f,f]
   781  // 3: [f,f,f,f,g,g,g,g]
   782  // 4: [g,g,g,g,g,g,g,g]
   783  // 5: [g,g,g,g,g,g,g,g]
   784  // 6: [g,g,g,g,g,g,h,h]
   785  // 7: [h,h,h,h,h,h,h,h]
   786  // 8: [h,h,h,h,h,h,h,h]
   787  // 9: [h,h,h,h,h,h,h,h]
   788  // ...
   789  DATA ·shuffleInt32x17to26bits+432+0(SB)/4,  $0x03020100
   790  DATA ·shuffleInt32x17to26bits+432+4(SB)/4,  $0x06050403
   791  DATA ·shuffleInt32x17to26bits+432+8(SB)/4,  $0x09080706
   792  DATA ·shuffleInt32x17to26bits+432+12(SB)/4, $0x0C0B0A09
   793  
   794  DATA ·shuffleInt32x17to26bits+432+16(SB)/4, $0x800F0E0D
   795  DATA ·shuffleInt32x17to26bits+432+20(SB)/4, $0x80808080
   796  DATA ·shuffleInt32x17to26bits+432+24(SB)/4, $0x80808080
   797  DATA ·shuffleInt32x17to26bits+432+28(SB)/4, $0x80808080
   798  
   799  DATA ·shuffleInt32x17to26bits+432+32(SB)/4, $0x00808080
   800  DATA ·shuffleInt32x17to26bits+432+36(SB)/4, $0x03020100
   801  DATA ·shuffleInt32x17to26bits+432+40(SB)/4, $0x06050403
   802  DATA ·shuffleInt32x17to26bits+432+44(SB)/4, $0x09080706
   803  
   804  // Shuffle masks for unpacking values from bit widths 27 to 31.
   805  //
   806  // The masks are grouped in 80 bytes chunks containing 5 masks of 16 bytes, with
   807  // the following layout:
   808  //
   809  // - The first mask is used to shuffle values from the first 16 bytes of input
   810  //   into the lower 16 bytes of output. These values are then shifted RIGHT to
   811  //   be aligned on the begining of each 32 bit word.
   812  //
   813  // - The second mask is used to shuffle upper bits of bit-packed values of the
   814  //   first 16 bytes of input that spanned across 5 bytes. These extra bits cannot
   815  //   be selected by the first mask (which can select at most 4 bytes per word).
   816  //   The extra bits are then shifted LEFT to be positioned at the end of the
   817  //   words, after the bits extracted by the first mask.
   818  //
   819  // - The third mask selects values from the first 16 bytes of input into the
   820  //   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
   821  //   on the beginning of each 32 bit word.
   822  //
   823  // - The fourth mask selects values from the second 16 bytes of input into the
   824  //   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
   825  //   on the beginning of each 32 bit word.
   826  //
   827  // - The fifth mask is used to shuffle upper bits of bit-packed values values of
   828  //   second 16 bytes of input that spanned across 5 bytes. These values are then
   829  //   shifted LEFT to be aligned on the beginning of each 32 bit word.
   830  //
   831  // The bit width is intended to be used as an index into this array, using this
   832  // formula to convert from the index to a byte offset:
   833  //
   834  //      offset = 80 * (bitWidth - 27)
   835  //
   836  GLOBL ·shuffleInt32x27to31bits(SB), RODATA|NOPTR, $400
   837  
   838  // 27 bits => 32 bits
   839  // ------------------
   840  // 0: [a,a,a,a,a,a,a,a]
   841  // 1: [a,a,a,a,a,a,a,a]
   842  // 2: [a,a,a,a,a,a,a,a]
   843  // 3: [a,a,a,b,b,b,b,b]
   844  // 4: [b,b,b,b,b,b,b,b]
   845  // 5: [b,b,b,b,b,b,b,b]
   846  // 6: [b,b,b,b,b,b,c,c]
   847  // 7: [c,c,c,c,c,c,c,c]
   848  // 8: [c,c,c,c,c,c,c,c]
   849  // 9: [c,c,c,c,c,c,c,c]
   850  // A: [c,d,d,d,d,d,d,d]
   851  // B: [d,d,d,d,d,d,d,d]
   852  // C: [d,d,d,d,d,d,d,d]
   853  // D: [d,d,d,d,e,e,e,e]
   854  // E: [e,e,e,e,e,e,e,e]
   855  // F: [e,e,e,e,e,e,e,e]
   856  // ---
   857  // 0: [e,e,e,e,e,e,e,f]
   858  // 1: [f,f,f,f,f,f,f,f]
   859  // 2: [f,f,f,f,f,f,f,f]
   860  // 3: [f,f,f,f,f,f,f,f]
   861  // 4: [f,f,g,g,g,g,g,g]
   862  // 5: [g,g,g,g,g,g,g,g]
   863  // 6: [g,g,g,g,g,g,g,g]
   864  // 7: [g,g,g,g,g,h,h,h]
   865  // 8: [h,h,h,h,h,h,h,h]
   866  // 9: [h,h,h,h,h,h,h,h]
   867  // A: [h,h,h,h,h,h,h,h]
   868  // ...
   869  DATA ·shuffleInt32x27to31bits+0+0(SB)/4,  $0x03020100
   870  DATA ·shuffleInt32x27to31bits+0+4(SB)/4,  $0x06050403
   871  DATA ·shuffleInt32x27to31bits+0+8(SB)/4,  $0x09080706
   872  DATA ·shuffleInt32x27to31bits+0+12(SB)/4, $0x0D0C0B0A
   873  
   874  DATA ·shuffleInt32x27to31bits+0+16(SB)/4, $0x80808080
   875  DATA ·shuffleInt32x27to31bits+0+20(SB)/4, $0x80808080
   876  DATA ·shuffleInt32x27to31bits+0+24(SB)/4, $0x0A808080
   877  DATA ·shuffleInt32x27to31bits+0+28(SB)/4, $0x80808080
   878  
   879  DATA ·shuffleInt32x27to31bits+0+32(SB)/4, $0x800F0E0D
   880  DATA ·shuffleInt32x27to31bits+0+36(SB)/4, $0x80808080
   881  DATA ·shuffleInt32x27to31bits+0+40(SB)/4, $0x80808080
   882  DATA ·shuffleInt32x27to31bits+0+44(SB)/4, $0x80808080
   883  
   884  DATA ·shuffleInt32x27to31bits+0+48(SB)/4, $0x00808080
   885  DATA ·shuffleInt32x27to31bits+0+52(SB)/4, $0x03020100
   886  DATA ·shuffleInt32x27to31bits+0+56(SB)/4, $0x07060504
   887  DATA ·shuffleInt32x27to31bits+0+60(SB)/4, $0x0A090807
   888  
   889  DATA ·shuffleInt32x27to31bits+0+64(SB)/4, $0x80808080
   890  DATA ·shuffleInt32x27to31bits+0+68(SB)/4, $0x04808080
   891  DATA ·shuffleInt32x27to31bits+0+72(SB)/4, $0x80808080
   892  DATA ·shuffleInt32x27to31bits+0+76(SB)/4, $0x80808080
   893  
   894  // 28 bits => 32 bits
   895  // ------------------
   896  // 0: [a,a,a,a,a,a,a,a]
   897  // 1: [a,a,a,a,a,a,a,a]
   898  // 2: [a,a,a,a,a,a,a,a]
   899  // 3: [a,a,a,a,b,b,b,b]
   900  // 4: [b,b,b,b,b,b,b,b]
   901  // 5: [b,b,b,b,b,b,b,b]
   902  // 6: [b,b,b,b,b,b,b,b]
   903  // 7: [c,c,c,c,c,c,c,c]
   904  // 8: [c,c,c,c,c,c,c,c]
   905  // 9: [c,c,c,c,c,c,c,c]
   906  // A: [c,c,c,c,d,d,d,d]
   907  // B: [d,d,d,d,d,d,d,d]
   908  // C: [d,d,d,d,d,d,d,d]
   909  // D: [d,d,d,d,d,d,d,d]
   910  // E: [e,e,e,e,e,e,e,e]
   911  // F: [e,e,e,e,e,e,e,e]
   912  // ---
   913  // 0: [e,e,e,e,e,e,e,e]
   914  // 1: [e,e,e,e,f,f,f,f]
   915  // 2: [f,f,f,f,f,f,f,f]
   916  // 3: [f,f,f,f,f,f,f,f]
   917  // 4: [f,f,f,f,f,f,f,f]
   918  // 5: [g,g,g,g,g,g,g,g]
   919  // 6: [g,g,g,g,g,g,g,g]
   920  // 7: [g,g,g,g,g,g,g,g]
   921  // 8: [g,g,g,g,h,h,h,h]
   922  // 9: [h,h,h,h,h,h,h,h]
   923  // A: [h,h,h,h,h,h,h,h]
   924  // B: [h,h,h,h,h,h,h,h]
   925  // ...
   926  DATA ·shuffleInt32x27to31bits+80+0(SB)/4,  $0x03020100
   927  DATA ·shuffleInt32x27to31bits+80+4(SB)/4,  $0x06050403
   928  DATA ·shuffleInt32x27to31bits+80+8(SB)/4,  $0x0A090807
   929  DATA ·shuffleInt32x27to31bits+80+12(SB)/4, $0x0D0C0B0A
   930  
   931  DATA ·shuffleInt32x27to31bits+80+16(SB)/4, $0x80808080
   932  DATA ·shuffleInt32x27to31bits+80+20(SB)/4, $0x80808080
   933  DATA ·shuffleInt32x27to31bits+80+24(SB)/4, $0x80808080
   934  DATA ·shuffleInt32x27to31bits+80+28(SB)/4, $0x80808080
   935  
   936  DATA ·shuffleInt32x27to31bits+80+32(SB)/4, $0x80800F0E
   937  DATA ·shuffleInt32x27to31bits+80+36(SB)/4, $0x80808080
   938  DATA ·shuffleInt32x27to31bits+80+40(SB)/4, $0x80808080
   939  DATA ·shuffleInt32x27to31bits+80+44(SB)/4, $0x80808080
   940  
   941  DATA ·shuffleInt32x27to31bits+80+48(SB)/4, $0x01008080
   942  DATA ·shuffleInt32x27to31bits+80+52(SB)/4, $0x04030201
   943  DATA ·shuffleInt32x27to31bits+80+56(SB)/4, $0x08070605
   944  DATA ·shuffleInt32x27to31bits+80+60(SB)/4, $0x0B0A0908
   945  
   946  DATA ·shuffleInt32x27to31bits+80+64(SB)/4, $0x80808080
   947  DATA ·shuffleInt32x27to31bits+80+68(SB)/4, $0x80808080
   948  DATA ·shuffleInt32x27to31bits+80+72(SB)/4, $0x80808080
   949  DATA ·shuffleInt32x27to31bits+80+76(SB)/4, $0x80808080
   950  
   951  // 29 bits => 32 bits
   952  // ------------------
   953  // 0: [a,a,a,a,a,a,a,a]
   954  // 1: [a,a,a,a,a,a,a,a]
   955  // 2: [a,a,a,a,a,a,a,a]
   956  // 3: [a,a,a,a,a,b,b,b]
   957  // 4: [b,b,b,b,b,b,b,b]
   958  // 5: [b,b,b,b,b,b,b,b]
   959  // 6: [b,b,b,b,b,b,b,b]
   960  // 7: [b,b,c,c,c,c,c,c]
   961  // 8: [c,c,c,c,c,c,c,c]
   962  // 9: [c,c,c,c,c,c,c,c]
   963  // A: [c,c,c,c,c,c,c,d]
   964  // B: [d,d,d,d,d,d,d,d]
   965  // C: [d,d,d,d,d,d,d,d]
   966  // D: [d,d,d,d,d,d,d,d]
   967  // E: [d,d,d,d,e,e,e,e]
   968  // F: [e,e,e,e,e,e,e,e]
   969  // ---
   970  // 0: [e,e,e,e,e,e,e,e]
   971  // 1: [e,e,e,e,e,e,e,e]
   972  // 2: [e,f,f,f,f,f,f,f]
   973  // 3: [f,f,f,f,f,f,f,f]
   974  // 4: [f,f,f,f,f,f,f,f]
   975  // 5: [f,f,f,f,f,f,g,g]
   976  // 6: [g,g,g,g,g,g,g,g]
   977  // 7: [g,g,g,g,g,g,g,g]
   978  // 8: [g,g,g,g,g,g,g,g]
   979  // 9: [g,g,g,h,h,h,h,h]
   980  // A: [h,h,h,h,h,h,h,h]
   981  // B: [h,h,h,h,h,h,h,h]
   982  // C: [h,h,h,h,h,h,h,h]
   983  // ...
   984  DATA ·shuffleInt32x27to31bits+160+0(SB)/4,  $0x03020100
   985  DATA ·shuffleInt32x27to31bits+160+4(SB)/4,  $0x06050403
   986  DATA ·shuffleInt32x27to31bits+160+8(SB)/4,  $0x0A090807
   987  DATA ·shuffleInt32x27to31bits+160+12(SB)/4, $0x0D0C0B0A
   988  
   989  DATA ·shuffleInt32x27to31bits+160+16(SB)/4, $0x80808080
   990  DATA ·shuffleInt32x27to31bits+160+20(SB)/4, $0x07808080
   991  DATA ·shuffleInt32x27to31bits+160+24(SB)/4, $0x80808080
   992  DATA ·shuffleInt32x27to31bits+160+28(SB)/4, $0x0E808080
   993  
   994  DATA ·shuffleInt32x27to31bits+160+32(SB)/4, $0x80800F0E
   995  DATA ·shuffleInt32x27to31bits+160+36(SB)/4, $0x80808080
   996  DATA ·shuffleInt32x27to31bits+160+40(SB)/4, $0x80808080
   997  DATA ·shuffleInt32x27to31bits+160+44(SB)/4, $0x80808080
   998  
   999  DATA ·shuffleInt32x27to31bits+160+48(SB)/4, $0x01008080
  1000  DATA ·shuffleInt32x27to31bits+160+52(SB)/4, $0x05040302
  1001  DATA ·shuffleInt32x27to31bits+160+56(SB)/4, $0x08070605
  1002  DATA ·shuffleInt32x27to31bits+160+60(SB)/4, $0x0C0B0A09
  1003  
  1004  DATA ·shuffleInt32x27to31bits+160+64(SB)/4, $0x02808080
  1005  DATA ·shuffleInt32x27to31bits+160+68(SB)/4, $0x80808080
  1006  DATA ·shuffleInt32x27to31bits+160+72(SB)/4, $0x09808080
  1007  DATA ·shuffleInt32x27to31bits+160+76(SB)/4, $0x80808080
  1008  
  1009  // 30 bits => 32 bits
  1010  // ------------------
  1011  // 0: [a,a,a,a,a,a,a,a]
  1012  // 1: [a,a,a,a,a,a,a,a]
  1013  // 2: [a,a,a,a,a,a,a,a]
  1014  // 3: [a,a,a,a,a,a,b,b]
  1015  // 4: [b,b,b,b,b,b,b,b]
  1016  // 5: [b,b,b,b,b,b,b,b]
  1017  // 6: [b,b,b,b,b,b,b,b]
  1018  // 7: [b,b,b,b,c,c,c,c]
  1019  // 8: [c,c,c,c,c,c,c,c]
  1020  // 9: [c,c,c,c,c,c,c,c]
  1021  // A: [c,c,c,c,c,c,c,c]
  1022  // B: [c,c,d,d,d,d,d,d]
  1023  // C: [d,d,d,d,d,d,d,d]
  1024  // D: [d,d,d,d,d,d,d,d]
  1025  // E: [d,d,d,d,d,d,d,d]
  1026  // F: [e,e,e,e,e,e,e,e]
  1027  // ---
  1028  // 0: [e,e,e,e,e,e,e,e]
  1029  // 1: [e,e,e,e,e,e,e,e]
  1030  // 2: [e,e,e,e,e,e,f,f]
  1031  // 3: [f,f,f,f,f,f,f,f]
  1032  // 4: [f,f,f,f,f,f,f,f]
  1033  // 5: [f,f,f,f,f,f,f,f]
  1034  // 6: [f,f,f,f,g,g,g,g]
  1035  // 7: [g,g,g,g,g,g,g,g]
  1036  // 8: [g,g,g,g,g,g,g,g]
  1037  // 9: [g,g,g,g,g,g,g,g]
  1038  // A: [g,g,h,h,h,h,h,h]
  1039  // B: [h,h,h,h,h,h,h,h]
  1040  // C: [h,h,h,h,h,h,h,h]
  1041  // D: [h,h,h,h,h,h,h,h]
  1042  // ...
  1043  DATA ·shuffleInt32x27to31bits+240+0(SB)/4,  $0x03020100
  1044  DATA ·shuffleInt32x27to31bits+240+4(SB)/4,  $0x06050403
  1045  DATA ·shuffleInt32x27to31bits+240+8(SB)/4,  $0x0A090807
  1046  DATA ·shuffleInt32x27to31bits+240+12(SB)/4, $0x0E0D0C0B
  1047  
  1048  DATA ·shuffleInt32x27to31bits+240+16(SB)/4, $0x80808080
  1049  DATA ·shuffleInt32x27to31bits+240+20(SB)/4, $0x07808080
  1050  DATA ·shuffleInt32x27to31bits+240+24(SB)/4, $0x0B808080
  1051  DATA ·shuffleInt32x27to31bits+240+28(SB)/4, $0x80808080
  1052  
  1053  DATA ·shuffleInt32x27to31bits+240+32(SB)/4, $0x8080800F
  1054  DATA ·shuffleInt32x27to31bits+240+36(SB)/4, $0x80808080
  1055  DATA ·shuffleInt32x27to31bits+240+40(SB)/4, $0x80808080
  1056  DATA ·shuffleInt32x27to31bits+240+44(SB)/4, $0x80808080
  1057  
  1058  DATA ·shuffleInt32x27to31bits+240+48(SB)/4, $0x02010080
  1059  DATA ·shuffleInt32x27to31bits+240+52(SB)/4, $0x05040302
  1060  DATA ·shuffleInt32x27to31bits+240+56(SB)/4, $0x09080706
  1061  DATA ·shuffleInt32x27to31bits+240+60(SB)/4, $0x0D0C0B0A
  1062  
  1063  DATA ·shuffleInt32x27to31bits+240+64(SB)/4, $0x80808080
  1064  DATA ·shuffleInt32x27to31bits+240+68(SB)/4, $0x06808080
  1065  DATA ·shuffleInt32x27to31bits+240+72(SB)/4, $0x0A808080
  1066  DATA ·shuffleInt32x27to31bits+240+76(SB)/4, $0x80808080
  1067  
  1068  // 31 bits => 32 bits
  1069  // ------------------
  1070  // 0: [a,a,a,a,a,a,a,a]
  1071  // 1: [a,a,a,a,a,a,a,a]
  1072  // 2: [a,a,a,a,a,a,a,a]
  1073  // 3: [a,a,a,a,a,a,a,b]
  1074  // 4: [b,b,b,b,b,b,b,b]
  1075  // 5: [b,b,b,b,b,b,b,b]
  1076  // 6: [b,b,b,b,b,b,b,b]
  1077  // 7: [b,b,b,b,b,b,c,c]
  1078  // 8: [c,c,c,c,c,c,c,c]
  1079  // 9: [c,c,c,c,c,c,c,c]
  1080  // A: [c,c,c,c,c,c,c,c]
  1081  // B: [c,c,c,c,c,d,d,d]
  1082  // C: [d,d,d,d,d,d,d,d]
  1083  // D: [d,d,d,d,d,d,d,d]
  1084  // E: [d,d,d,d,d,d,d,d]
  1085  // F: [d,d,d,d,e,e,e,e]
  1086  // ---
  1087  // 0: [e,e,e,e,e,e,e,e]
  1088  // 1: [e,e,e,e,e,e,e,e]
  1089  // 2: [e,e,e,e,e,e,e,e]
  1090  // 3: [e,e,e,f,f,f,f,f]
  1091  // 4: [f,f,f,f,f,f,f,f]
  1092  // 5: [f,f,f,f,f,f,f,f]
  1093  // 6: [f,f,f,f,f,f,f,f]
  1094  // 7: [f,f,g,g,g,g,g,g]
  1095  // 8: [g,g,g,g,g,g,g,g]
  1096  // 9: [g,g,g,g,g,g,g,g]
  1097  // A: [g,g,g,g,g,g,g,g]
  1098  // B: [g,h,h,h,h,h,h,h]
  1099  // C: [h,h,h,h,h,h,h,h]
  1100  // D: [h,h,h,h,h,h,h,h]
  1101  // E: [h,h,h,h,h,h,h,h]
  1102  // ...
  1103  DATA ·shuffleInt32x27to31bits+320+0(SB)/4,  $0x03020100
  1104  DATA ·shuffleInt32x27to31bits+320+4(SB)/4,  $0x06050403
  1105  DATA ·shuffleInt32x27to31bits+320+8(SB)/4,  $0x0A090807
  1106  DATA ·shuffleInt32x27to31bits+320+12(SB)/4, $0x0E0D0C0B
  1107  
  1108  DATA ·shuffleInt32x27to31bits+320+16(SB)/4, $0x80808080
  1109  DATA ·shuffleInt32x27to31bits+320+20(SB)/4, $0x07808080
  1110  DATA ·shuffleInt32x27to31bits+320+24(SB)/4, $0x0B808080
  1111  DATA ·shuffleInt32x27to31bits+320+28(SB)/4, $0x0F808080
  1112  
  1113  DATA ·shuffleInt32x27to31bits+320+32(SB)/4, $0x8080800F
  1114  DATA ·shuffleInt32x27to31bits+320+36(SB)/4, $0x80808080
  1115  DATA ·shuffleInt32x27to31bits+320+40(SB)/4, $0x80808080
  1116  DATA ·shuffleInt32x27to31bits+320+44(SB)/4, $0x80808080
  1117  
  1118  DATA ·shuffleInt32x27to31bits+320+48(SB)/4, $0x02010080
  1119  DATA ·shuffleInt32x27to31bits+320+52(SB)/4, $0x06050403
  1120  DATA ·shuffleInt32x27to31bits+320+56(SB)/4, $0x0A090807
  1121  DATA ·shuffleInt32x27to31bits+320+60(SB)/4, $0x0E0D0C0B
  1122  
  1123  DATA ·shuffleInt32x27to31bits+320+64(SB)/4, $0x03808080
  1124  DATA ·shuffleInt32x27to31bits+320+68(SB)/4, $0x07808080
  1125  DATA ·shuffleInt32x27to31bits+320+72(SB)/4, $0x0B808080
  1126  DATA ·shuffleInt32x27to31bits+320+76(SB)/4, $0x80808080
  1127  
  1128  // The RIGHT shifts to unpack 32 bits integers.
  1129  //
  1130  // The following formula was determined empirically as the expression which
  1131  // generates shift values:
  1132  //
  1133  //      shift[i] = (i * bitWidth) % 8
  1134  //
  1135  GLOBL ·shiftRightInt32(SB), RODATA|NOPTR, $256
  1136  
  1137  DATA ·shiftRightInt32+0+0(SB)/4,  $0
  1138  DATA ·shiftRightInt32+0+4(SB)/4,  $1
  1139  DATA ·shiftRightInt32+0+8(SB)/4,  $2
  1140  DATA ·shiftRightInt32+0+12(SB)/4, $3
  1141  DATA ·shiftRightInt32+0+16(SB)/4, $4
  1142  DATA ·shiftRightInt32+0+20(SB)/4, $5
  1143  DATA ·shiftRightInt32+0+24(SB)/4, $6
  1144  DATA ·shiftRightInt32+0+28(SB)/4, $7
  1145  
  1146  DATA ·shiftRightInt32+32+0(SB)/4,  $0
  1147  DATA ·shiftRightInt32+32+4(SB)/4,  $2
  1148  DATA ·shiftRightInt32+32+8(SB)/4,  $4
  1149  DATA ·shiftRightInt32+32+12(SB)/4, $6
  1150  DATA ·shiftRightInt32+32+16(SB)/4, $0
  1151  DATA ·shiftRightInt32+32+20(SB)/4, $2
  1152  DATA ·shiftRightInt32+32+24(SB)/4, $4
  1153  DATA ·shiftRightInt32+32+28(SB)/4, $6
  1154  
  1155  DATA ·shiftRightInt32+64+0(SB)/4,  $0
  1156  DATA ·shiftRightInt32+64+4(SB)/4,  $3
  1157  DATA ·shiftRightInt32+64+8(SB)/4,  $6
  1158  DATA ·shiftRightInt32+64+12(SB)/4, $1
  1159  DATA ·shiftRightInt32+64+16(SB)/4, $4
  1160  DATA ·shiftRightInt32+64+20(SB)/4, $7
  1161  DATA ·shiftRightInt32+64+24(SB)/4, $2
  1162  DATA ·shiftRightInt32+64+28(SB)/4, $5
  1163  
  1164  DATA ·shiftRightInt32+96+0(SB)/4,  $0
  1165  DATA ·shiftRightInt32+96+4(SB)/4,  $4
  1166  DATA ·shiftRightInt32+96+8(SB)/4,  $0
  1167  DATA ·shiftRightInt32+96+12(SB)/4, $4
  1168  DATA ·shiftRightInt32+96+16(SB)/4, $0
  1169  DATA ·shiftRightInt32+96+20(SB)/4, $4
  1170  DATA ·shiftRightInt32+96+24(SB)/4, $0
  1171  DATA ·shiftRightInt32+96+28(SB)/4, $4
  1172  
  1173  DATA ·shiftRightInt32+128+0(SB)/4,  $0
  1174  DATA ·shiftRightInt32+128+4(SB)/4,  $5
  1175  DATA ·shiftRightInt32+128+8(SB)/4,  $2
  1176  DATA ·shiftRightInt32+128+12(SB)/4, $7
  1177  DATA ·shiftRightInt32+128+16(SB)/4, $4
  1178  DATA ·shiftRightInt32+128+20(SB)/4, $1
  1179  DATA ·shiftRightInt32+128+24(SB)/4, $6
  1180  DATA ·shiftRightInt32+128+28(SB)/4, $3
  1181  
  1182  DATA ·shiftRightInt32+160+0(SB)/4,  $0
  1183  DATA ·shiftRightInt32+160+4(SB)/4,  $6
  1184  DATA ·shiftRightInt32+160+8(SB)/4,  $4
  1185  DATA ·shiftRightInt32+160+12(SB)/4, $2
  1186  DATA ·shiftRightInt32+160+16(SB)/4, $0
  1187  DATA ·shiftRightInt32+160+20(SB)/4, $6
  1188  DATA ·shiftRightInt32+160+24(SB)/4, $4
  1189  DATA ·shiftRightInt32+160+28(SB)/4, $2
  1190  
  1191  DATA ·shiftRightInt32+192+0(SB)/4,  $0
  1192  DATA ·shiftRightInt32+192+4(SB)/4,  $7
  1193  DATA ·shiftRightInt32+192+8(SB)/4,  $6
  1194  DATA ·shiftRightInt32+192+12(SB)/4, $5
  1195  DATA ·shiftRightInt32+192+16(SB)/4, $4
  1196  DATA ·shiftRightInt32+192+20(SB)/4, $3
  1197  DATA ·shiftRightInt32+192+24(SB)/4, $2
  1198  DATA ·shiftRightInt32+192+28(SB)/4, $1
  1199  
  1200  DATA ·shiftRightInt32+224+0(SB)/4,  $0
  1201  DATA ·shiftRightInt32+224+4(SB)/4,  $0
  1202  DATA ·shiftRightInt32+224+8(SB)/4,  $0
  1203  DATA ·shiftRightInt32+224+12(SB)/4, $0
  1204  DATA ·shiftRightInt32+224+16(SB)/4, $0
  1205  DATA ·shiftRightInt32+224+20(SB)/4, $0
  1206  DATA ·shiftRightInt32+224+24(SB)/4, $0
  1207  DATA ·shiftRightInt32+224+28(SB)/4, $0
  1208  
  1209  // The LEFT shifts to unpack 32 bits integers.
  1210  //
  1211  // The following formula was determined empirically as the expression which
  1212  // generates shift values:
  1213  //
  1214  //      shift[i] = (8 - (i * bitWidth)) % 8
  1215  //
  1216  GLOBL ·shiftLeftInt32(SB), RODATA|NOPTR, $256
  1217  
  1218  DATA ·shiftLeftInt32+0+0(SB)/4,  $0
  1219  DATA ·shiftLeftInt32+0+4(SB)/4,  $7
  1220  DATA ·shiftLeftInt32+0+8(SB)/4,  $6
  1221  DATA ·shiftLeftInt32+0+12(SB)/4, $5
  1222  DATA ·shiftLeftInt32+0+16(SB)/4, $4
  1223  DATA ·shiftLeftInt32+0+20(SB)/4, $3
  1224  DATA ·shiftLeftInt32+0+24(SB)/4, $2
  1225  DATA ·shiftLeftInt32+0+28(SB)/4, $1
  1226  
  1227  DATA ·shiftLeftInt32+32+0(SB)/4,  $0
  1228  DATA ·shiftLeftInt32+32+4(SB)/4,  $6
  1229  DATA ·shiftLeftInt32+32+8(SB)/4,  $4
  1230  DATA ·shiftLeftInt32+32+12(SB)/4, $2
  1231  DATA ·shiftLeftInt32+32+16(SB)/4, $0
  1232  DATA ·shiftLeftInt32+32+20(SB)/4, $6
  1233  DATA ·shiftLeftInt32+32+24(SB)/4, $4
  1234  DATA ·shiftLeftInt32+32+28(SB)/4, $2
  1235  
  1236  DATA ·shiftLeftInt32+64+0(SB)/4,  $0
  1237  DATA ·shiftLeftInt32+64+4(SB)/4,  $5
  1238  DATA ·shiftLeftInt32+64+8(SB)/4,  $2
  1239  DATA ·shiftLeftInt32+64+12(SB)/4, $7
  1240  DATA ·shiftLeftInt32+64+16(SB)/4, $4
  1241  DATA ·shiftLeftInt32+64+20(SB)/4, $1
  1242  DATA ·shiftLeftInt32+64+24(SB)/4, $6
  1243  DATA ·shiftLeftInt32+64+28(SB)/4, $3
  1244  
  1245  DATA ·shiftLeftInt32+96+0(SB)/4,  $0
  1246  DATA ·shiftLeftInt32+96+4(SB)/4,  $4
  1247  DATA ·shiftLeftInt32+96+8(SB)/4,  $0
  1248  DATA ·shiftLeftInt32+96+12(SB)/4, $4
  1249  DATA ·shiftLeftInt32+96+16(SB)/4, $0
  1250  DATA ·shiftLeftInt32+96+20(SB)/4, $4
  1251  DATA ·shiftLeftInt32+96+24(SB)/4, $0
  1252  DATA ·shiftLeftInt32+96+28(SB)/4, $4
  1253  
  1254  DATA ·shiftLeftInt32+128+0(SB)/4,  $0
  1255  DATA ·shiftLeftInt32+128+4(SB)/4,  $3
  1256  DATA ·shiftLeftInt32+128+8(SB)/4,  $6
  1257  DATA ·shiftLeftInt32+128+12(SB)/4, $1
  1258  DATA ·shiftLeftInt32+128+16(SB)/4, $4
  1259  DATA ·shiftLeftInt32+128+20(SB)/4, $7
  1260  DATA ·shiftLeftInt32+128+24(SB)/4, $2
  1261  DATA ·shiftLeftInt32+128+28(SB)/4, $5
  1262  
  1263  DATA ·shiftLeftInt32+160+0(SB)/4,  $0
  1264  DATA ·shiftLeftInt32+160+4(SB)/4,  $2
  1265  DATA ·shiftLeftInt32+160+8(SB)/4,  $4
  1266  DATA ·shiftLeftInt32+160+12(SB)/4, $6
  1267  DATA ·shiftLeftInt32+160+16(SB)/4, $0
  1268  DATA ·shiftLeftInt32+160+20(SB)/4, $2
  1269  DATA ·shiftLeftInt32+160+24(SB)/4, $4
  1270  DATA ·shiftLeftInt32+160+28(SB)/4, $6
  1271  
  1272  DATA ·shiftLeftInt32+192+0(SB)/4,  $0
  1273  DATA ·shiftLeftInt32+192+4(SB)/4,  $1
  1274  DATA ·shiftLeftInt32+192+8(SB)/4,  $2
  1275  DATA ·shiftLeftInt32+192+12(SB)/4, $3
  1276  DATA ·shiftLeftInt32+192+16(SB)/4, $4
  1277  DATA ·shiftLeftInt32+192+20(SB)/4, $5
  1278  DATA ·shiftLeftInt32+192+24(SB)/4, $6
  1279  DATA ·shiftLeftInt32+192+28(SB)/4, $7
  1280  
  1281  DATA ·shiftLeftInt32+224+0(SB)/4,  $0
  1282  DATA ·shiftLeftInt32+224+4(SB)/4,  $0
  1283  DATA ·shiftLeftInt32+224+8(SB)/4,  $0
  1284  DATA ·shiftLeftInt32+224+12(SB)/4, $0
  1285  DATA ·shiftLeftInt32+224+16(SB)/4, $0
  1286  DATA ·shiftLeftInt32+224+20(SB)/4, $0
  1287  DATA ·shiftLeftInt32+224+24(SB)/4, $0
  1288  DATA ·shiftLeftInt32+224+28(SB)/4, $0